lollms-client 0.31.1__py3-none-any.whl → 0.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -102,12 +102,65 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
102
102
 
103
103
  self.model: Optional[Llama] = None
104
104
  self.grammar: Optional[LlamaGrammar] = None
105
+ self.available_models: Dict[str, str] = {}
106
+
107
+ # Scan for available models on initialization
108
+ self._scan_for_models()
109
+
110
+ # If a model_path is provided, it can be a short name or a full path.
111
+ # If not, auto-select the first available model.
112
+ if not self.model_path and self.available_models:
113
+ self.model_path = next(iter(self.available_models.keys()))
114
+ ASCIIColors.info(f"No model specified. Auto-selecting first found model: '{self.model_path}'")
115
+
116
+ # Resolve the full path to the model
117
+ resolved_model_path = self.model_path
118
+ if self.model_path in self.available_models:
119
+ resolved_model_path = self.available_models[self.model_path]
120
+ elif not os.path.exists(self.model_path):
121
+ if self.available_models:
122
+ raise FileNotFoundError(f"Model '{self.model_path}' not found. Available models: {list(self.available_models.keys())}")
123
+ else:
124
+ raise FileNotFoundError(f"Model file '{self.model_path}' not found, and no other GGUF models were discovered in search paths.")
105
125
 
106
126
  # Resolve and load grammar if specified
107
127
  self._load_grammar_from_config()
108
128
 
109
129
  # Attempt to load the model
110
- self.load_model(self.model_path)
130
+ self.load_model(resolved_model_path)
131
+
132
+ def _scan_for_models(self):
133
+ """
134
+ Scans for GGUF models in the specified LoLLMs paths and populates the available_models dictionary.
135
+ """
136
+ self.available_models = {}
137
+
138
+ personal_path = self.lollms_paths.get('personal_models_path', '')
139
+ zoo_path = self.lollms_paths.get('models_zoo_path', '')
140
+
141
+ scan_paths = []
142
+ if personal_path and os.path.isdir(personal_path):
143
+ scan_paths.append(personal_path)
144
+ if zoo_path and os.path.isdir(zoo_path) and zoo_path not in scan_paths:
145
+ scan_paths.append(zoo_path)
146
+
147
+ ASCIIColors.info(f"Scanning for GGUF models in: {scan_paths}")
148
+ for scan_path in scan_paths:
149
+ for root, _, files in os.walk(scan_path):
150
+ for file in files:
151
+ if file.endswith(".gguf"):
152
+ full_path = os.path.join(root, file)
153
+ # Use relative path from scan_path as the unique "short name"
154
+ short_name = os.path.relpath(full_path, scan_path)
155
+ # In case of name conflict, personal_path takes precedence
156
+ if short_name not in self.available_models:
157
+ self.available_models[short_name] = full_path
158
+
159
+ if self.available_models:
160
+ ASCIIColors.green(f"Discovered {len(self.available_models)} GGUF models.")
161
+ else:
162
+ ASCIIColors.warning("No GGUF models found in specified search paths.")
163
+
111
164
 
112
165
  def _load_grammar_from_config(self):
113
166
  grammar_file_path = self.llama_config.get("grammar_file")
@@ -126,28 +179,14 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
126
179
  ASCIIColors.warning(f"Grammar file not found: {full_grammar_path}")
127
180
 
128
181
  def load_model(self, model_path: str) -> bool:
182
+ """
183
+ Loads the GGUF model from the given full path.
184
+ """
185
+ if not os.path.exists(model_path):
186
+ raise FileNotFoundError(f"Cannot load model. File not found at: {model_path}")
187
+
188
+ ASCIIColors.info(f"Attempting to load GGUF model from: {model_path}")
129
189
  self.model_path = model_path
130
- resolved_model_path = self.model_path
131
- if not os.path.exists(resolved_model_path):
132
- models_base_path = self.lollms_paths.get('personal_models_path', self.lollms_paths.get('models_zoo_path'))
133
- if models_base_path:
134
- # Assuming model_path might be relative to a binding-specific folder within models_base_path
135
- # e.g. models_zoo_path/llamacpp/model_name.gguf
136
- # Or it could be directly models_zoo_path/model_name.gguf
137
- potential_path_direct = os.path.join(models_base_path, self.model_path)
138
- potential_path_binding_specific = os.path.join(models_base_path, self.binding_name.lower(), self.model_path)
139
-
140
- if os.path.exists(potential_path_direct):
141
- resolved_model_path = potential_path_direct
142
- elif os.path.exists(potential_path_binding_specific):
143
- resolved_model_path = potential_path_binding_specific
144
- else:
145
- raise FileNotFoundError(f"Model file '{self.model_path}' not found directly or in model paths: '{potential_path_direct}', '{potential_path_binding_specific}'")
146
- else:
147
- raise FileNotFoundError(f"Model file not found: {self.model_path}")
148
-
149
- ASCIIColors.info(f"Attempting to load GGUF model from: {resolved_model_path}")
150
- self.model_path = resolved_model_path # Update to resolved path
151
190
 
152
191
  llama_constructor_keys = [
153
192
  "n_gpu_layers", "main_gpu", "tensor_split", "vocab_only", "use_mmap", "use_mlock",
@@ -331,12 +370,14 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
331
370
  def tokenize(self, text: str) -> List[int]:
332
371
  if not self.model:
333
372
  ASCIIColors.warning("Llama.cpp model not loaded. Tokenization fallback to tiktoken.")
373
+ import tiktoken
334
374
  return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
335
375
  return self.model.tokenize(text.encode("utf-8"), add_bos=False, special=False)
336
376
 
337
377
  def detokenize(self, tokens: List[int]) -> str:
338
378
  if not self.model:
339
379
  ASCIIColors.warning("Llama.cpp model not loaded. Detokenization fallback to tiktoken.")
380
+ import tiktoken
340
381
  return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
341
382
  try:
342
383
  return self.model.detokenize(tokens).decode("utf-8", errors="ignore")
@@ -346,13 +387,14 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
346
387
  def count_tokens(self, text: str) -> int:
347
388
  if not self.model:
348
389
  ASCIIColors.warning("Llama.cpp model not loaded. Token count fallback to tiktoken.")
390
+ import tiktoken
349
391
  return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text))
350
392
  return len(self.tokenize(text))
351
393
 
352
394
  def embed(self, text: str, **kwargs) -> List[float]:
353
395
  if not self.model:
354
396
  raise Exception("Llama.cpp model not loaded.")
355
- if not self.llama_config.get("embedding"): # or not self.model.params.embedding:
397
+ if not self.llama_config.get("embedding"):
356
398
  raise Exception("Embedding support was not enabled when loading the model (set 'embedding: true' in config).")
357
399
  try:
358
400
  return self.model.embed(text)
@@ -366,7 +408,7 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
366
408
  "error": "Model not loaded or failed to load."
367
409
  }
368
410
 
369
- is_llava_model = "llava" in self.model_path.lower() or \
411
+ is_llava_model = "llava" in os.path.basename(self.model_path).lower() or \
370
412
  (self.llama_config.get("chat_format", "").startswith("llava") and \
371
413
  self.llama_config.get("clip_model_path") is not None)
372
414
 
@@ -380,17 +422,42 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
380
422
  "config": self.llama_config
381
423
  }
382
424
 
383
- def listModels(self) -> List[Dict[str, str]]: # type: ignore
384
- # This method is more for server-based bindings. For LlamaCpp, it describes the loaded model.
385
- # It could be extended to scan lollms_paths for GGUF files.
386
- if self.model:
387
- return [{
388
- 'model_name': os.path.basename(self.model_path), 'path': self.model_path, 'loaded': True,
389
- 'n_ctx': str(self.model.context_params.n_ctx),
425
+ def listModels(self, force_rescan: bool = False) -> List[Dict[str, str]]: # type: ignore
426
+ """
427
+ Lists available GGUF models.
428
+
429
+ Args:
430
+ force_rescan: If True, rescans the model directories.
431
+
432
+ Returns:
433
+ A list of dictionaries, each representing a found model.
434
+ """
435
+ if force_rescan or not self.available_models:
436
+ self._scan_for_models()
437
+
438
+ model_list = []
439
+ for short_name, full_path in self.available_models.items():
440
+ is_loaded = (self.model is not None and self.model_path == full_path)
441
+ model_entry = {
442
+ 'model_name': short_name,
443
+ 'path': full_path,
444
+ 'loaded': is_loaded
445
+ }
446
+ if is_loaded:
447
+ model_entry.update({
448
+ 'n_ctx': str(self.model.context_params.n_ctx),
449
+ 'n_gpu_layers': str(self.llama_config.get("n_gpu_layers", "N/A")),
450
+ })
451
+ model_list.append(model_entry)
452
+
453
+ if not model_list and self.model_path: # Case where a direct path was loaded but not in scan paths
454
+ return [{
455
+ 'model_name': os.path.basename(self.model_path), 'path': self.model_path, 'loaded': self.model is not None,
456
+ 'n_ctx': str(self.model.context_params.n_ctx) if self.model else "N/A",
390
457
  'n_gpu_layers': str(self.llama_config.get("n_gpu_layers","N/A")),
391
458
  }]
392
- return [{'model_name': os.path.basename(self.model_path) if self.model_path else "Not specified",
393
- 'path': self.model_path, 'loaded': False, 'error': "Model not loaded."}]
459
+ return model_list
460
+
394
461
 
395
462
  def unload_model(self):
396
463
  if self.model:
@@ -412,47 +479,39 @@ if __name__ == '__main__':
412
479
  ASCIIColors.yellow("Testing PythonLlamaCppBinding...")
413
480
 
414
481
  # --- IMPORTANT: Configure model path ---
415
- # Replace with the ACTUAL PATH to your GGUF model file.
416
- # e.g., gguf_model_path = "C:/Models/Mistral-7B-Instruct-v0.2-Q4_K_M.gguf"
417
- # If this path is not found, a dummy GGUF will be created for basic tests.
418
- gguf_model_path = "model.gguf" # <<< REPLACE THIS OR ENSURE 'model.gguf' EXISTS
419
-
420
- # --- LLaVA Test Configuration (Optional) ---
421
- # To test LLaVA, set this to your LLaVA GGUF model path
422
- llava_test_model_path = None # e.g., "path/to/your/llava-v1.6-mistral-7b.Q4_K_M.gguf"
423
- # And the corresponding mmproj (clip model) GGUF path
424
- llava_test_clip_model_path = None # e.g., "path/to/your/mmproj-mistral7b-f16.gguf"
425
- # And set the chat format for LLaVA
426
- llava_chat_format = "llava-1-6" # or "llava-1-5" depending on your model
427
-
428
- # Attempt to create a dummy GGUF if specified path doesn't exist (for placeholder testing)
429
- is_dummy_model = False
430
- if not os.path.exists(gguf_model_path):
431
- ASCIIColors.warning(f"Model path '{gguf_model_path}' not found.")
432
- ASCIIColors.warning("Creating a tiny dummy GGUF file ('dummy_model.gguf') for placeholder testing.")
433
- ASCIIColors.warning("This dummy file WILL NOT WORK for actual inference.")
434
- try:
435
- with open("dummy_model.gguf", "wb") as f: # Minimal valid GGUF structure
436
- f.write(b"GGUF\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00")
437
- key, val = "tokenizer.ggml.model", "llama"
438
- f.write(len(key).to_bytes(8,'little')+key.encode()+b"\x02\x00\x00\x00\x00\x00\x00\x00"+len(val).to_bytes(8,'little')+val.encode())
439
- gguf_model_path = "dummy_model.gguf"
440
- is_dummy_model = True
441
- ASCIIColors.info(f"Using dummy GGUF: {gguf_model_path}. Real inference tests will fail or be skipped.")
442
- except Exception as e_dummy:
443
- ASCIIColors.error(f"Could not create dummy GGUF: {e_dummy}. Please set a valid GGUF model path.")
444
- sys.exit(1)
482
+ # The binding will now auto-discover models.
483
+ # To test auto-selection, set gguf_model_path = None
484
+ # To test loading a specific model, set its "short name" here, e.g., "Mistral-7B/model.gguf"
485
+ gguf_model_path = None # <<< SET TO A SHORT NAME, FULL PATH, OR None FOR AUTO-SELECTION
486
+
487
+ # --- Test Setup ---
488
+ # Create a dummy model directory for the test
489
+ mock_models_dir = "test_models_dir"
490
+ os.makedirs(os.path.join(mock_models_dir, "subfolder"), exist_ok=True)
491
+
492
+ dummy_gguf_content = b"GGUF\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
493
+ dummy_gguf_content += b"\x14\x00\x00\x00\x00\x00\x00\x00" + b"tokenizer.ggml.model"
494
+ dummy_gguf_content += b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x05\x00\x00\x00\x00\x00\x00\x00" + b"llama"
495
+
496
+ dummy_model_path1 = os.path.join(mock_models_dir, "dummy_model_A.gguf")
497
+ dummy_model_path2 = os.path.join(mock_models_dir, "subfolder", "dummy_model_B.gguf")
498
+
499
+ try:
500
+ with open(dummy_model_path1, "wb") as f: f.write(dummy_gguf_content)
501
+ with open(dummy_model_path2, "wb") as f: f.write(dummy_gguf_content)
502
+ ASCIIColors.info("Created dummy GGUF files for testing.")
503
+ # If no specific model path is given, the test will use auto-selection on these dummy files.
504
+ # This will fail at the INFERENCE stage but test the discovery and loading logic.
505
+ is_dummy_model = True
506
+ except Exception as e_dummy:
507
+ ASCIIColors.error(f"Could not create dummy GGUF files: {e_dummy}. Test may fail.")
508
+ is_dummy_model = False # Assume a real model path is set
445
509
 
446
510
  binding_config = {
447
- "n_gpu_layers": 0, # 0 for CPU, -1 for all possible layers to GPU, or specific number
448
- "n_ctx": 1024, # Short context for testing
449
- "seed": 1234,
450
- "embedding": True, # Enable embedding generation for the test
451
- "verbose": False, # Set to True for detailed llama.cpp logs
452
- # "grammar_file": "json.gbnf" # Example for grammar test
511
+ "n_gpu_layers": 0, "n_ctx": 1024, "seed": 1234, "embedding": True, "verbose": False,
453
512
  }
454
513
 
455
- mock_lollms_paths = { "personal_models_path": ".", "grammars_path": "grammars_test" }
514
+ mock_lollms_paths = { "personal_models_path": mock_models_dir, "grammars_path": "grammars_test" }
456
515
  if not os.path.exists(mock_lollms_paths["grammars_path"]):
457
516
  os.makedirs(mock_lollms_paths["grammars_path"], exist_ok=True)
458
517
  with open(os.path.join(mock_lollms_paths["grammars_path"], "test_grammar.gbnf"), "w") as f:
@@ -461,17 +520,27 @@ if __name__ == '__main__':
461
520
  active_binding = None
462
521
  try:
463
522
  ASCIIColors.cyan("\n--- Initializing PythonLlamaCppBinding ---")
523
+ # Initialize with the specified path (or None for auto-selection)
464
524
  active_binding = PythonLlamaCppBinding(model_path=gguf_model_path, config=binding_config, lollms_paths=mock_lollms_paths)
465
- ASCIIColors.green(f"Binding initialized. Model: {active_binding.model_path}")
466
- ASCIIColors.info(f"Model Info: {json.dumps(active_binding.get_model_info(), indent=2)}")
525
+
526
+ ASCIIColors.green(f"Binding initialized. Loaded model: {os.path.basename(active_binding.model_path)}")
527
+ ASCIIColors.info(f"Full path: {active_binding.model_path}")
528
+
529
+ # --- List Models ---
530
+ ASCIIColors.cyan("\n--- Listing Models (force_rescan=True) ---")
531
+ model_list = active_binding.listModels(force_rescan=True)
532
+ print(json.dumps(model_list, indent=2))
533
+ assert len(model_list) == 2, "Model discovery failed to find all dummy models."
534
+ assert any(m['loaded'] for m in model_list), "listModels did not correctly report a loaded model."
535
+
467
536
 
468
537
  if is_dummy_model:
469
- ASCIIColors.warning("\nRUNNING WITH DUMMY MODEL. MOST FUNCTIONALITY TESTS WILL BE SKIPPED.")
538
+ ASCIIColors.warning("\nRUNNING WITH DUMMY MODEL. INFERENCE TESTS WILL BE SKIPPED.")
539
+ ASCIIColors.info("Discovery, loading, and listing tests passed.")
470
540
  else:
471
- # --- List Models ---
472
- ASCIIColors.cyan("\n--- Listing Models ---")
473
- print(json.dumps(active_binding.listModels(), indent=2))
474
-
541
+ # These tests will only run if you provided a path to a REAL model
542
+ ASCIIColors.info("\nRUNNING WITH REAL MODEL. PROCEEDING WITH INFERENCE TESTS...")
543
+
475
544
  # --- Tokenize/Detokenize ---
476
545
  ASCIIColors.cyan("\n--- Tokenize/Detokenize ---")
477
546
  sample_text = "Hello, Llama.cpp world! This is a test sentence."
@@ -508,71 +577,13 @@ if __name__ == '__main__':
508
577
  if isinstance(result, str): ASCIIColors.green(f"Full streamed text: {result}")
509
578
  else: ASCIIColors.error(f"Streaming generation failed: {result}")
510
579
 
511
- # --- Text Generation with Grammar ---
512
- ASCIIColors.cyan("\n--- Text Generation with Grammar ---")
513
- generated_grammar_text = active_binding.generate_text(
514
- "Output a greeting:", n_predict=5, stream=False, use_chat_format=False, # Grammar often better with raw completion
515
- grammar=os.path.join(mock_lollms_paths["grammars_path"], "test_grammar.gbnf")
516
- )
517
- if isinstance(generated_grammar_text, str):
518
- ASCIIColors.green(f"Generated text with grammar: '{generated_grammar_text.strip()}'")
519
- assert generated_grammar_text.strip().lower() in ["hello", "world"], "Grammar constraint failed!"
520
- else: ASCIIColors.error(f"Grammar generation failed: {generated_grammar_text}")
521
-
522
- # --- Embeddings ---
523
- if binding_config.get("embedding"):
524
- ASCIIColors.cyan("\n--- Embeddings ---")
525
- embedding_text = "This is a test for embeddings."
526
- try:
527
- embedding_vector = active_binding.embed(embedding_text)
528
- ASCIIColors.green(f"Embedding for '{embedding_text}' (first 3 dims): {embedding_vector[:3]}...")
529
- ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
530
- except Exception as e_emb: ASCIIColors.warning(f"Could not get embedding: {e_emb}")
531
- else: ASCIIColors.yellow("\n--- Embeddings Skipped (embedding: false in config) ---")
532
-
533
- # --- LLaVA Test (if configured and real model is LLaVA) ---
534
- if not is_dummy_model and llava_test_model_path and os.path.exists(llava_test_model_path) and \
535
- llava_test_clip_model_path and os.path.exists(llava_test_clip_model_path) and \
536
- active_binding and active_binding.model_path.lower() == llava_test_model_path.lower():
537
-
538
- ASCIIColors.cyan("\n--- LLaVA Vision Test ---")
539
- # This assumes the 'active_binding' was ALREADY loaded with the LLaVA model
540
- # and its specific config (clip_model_path, chat_format="llava-1-x").
541
- # If not, you'd need to unload and reload/reinitialize the binding for LLaVA.
542
- if not (active_binding.llama_config.get("chat_format","").startswith("llava") and \
543
- active_binding.llama_config.get("clip_model_path")):
544
- ASCIIColors.warning("Current binding not configured for LLaVA. Skipping LLaVA test.")
545
- ASCIIColors.warning("To test LLaVA, ensure gguf_model_path points to LLaVA model and config includes 'chat_format' and 'clip_model_path'.")
546
- else:
547
- dummy_image_path = "dummy_llava_image.png"
548
- try:
549
- from PIL import Image, ImageDraw
550
- img = Image.new('RGB', (200, 80), color = ('cyan'))
551
- d = ImageDraw.Draw(img); d.text((10,20), "LLaVA Test", fill=('black'))
552
- img.save(dummy_image_path)
553
- ASCIIColors.info(f"Created dummy image for LLaVA: {dummy_image_path}")
554
-
555
- llava_prompt = "What do you see in this image?"
556
- llava_response = active_binding.generate_text(
557
- prompt=llava_prompt, images=[dummy_image_path], n_predict=50, stream=False, use_chat_format=True
558
- )
559
- if isinstance(llava_response, str): ASCIIColors.green(f"LLaVA response: {llava_response}")
560
- else: ASCIIColors.error(f"LLaVA generation failed: {llava_response}")
561
- except ImportError: ASCIIColors.warning("Pillow not found. Cannot create dummy image for LLaVA.")
562
- except Exception as e_llava: ASCIIColors.error(f"LLaVA test error: {e_llava}"); trace_exception(e_llava)
563
- finally:
564
- if os.path.exists(dummy_image_path): os.remove(dummy_image_path)
565
- elif not is_dummy_model and llava_test_model_path: # If LLaVA test paths are set but model isn't LLaVA
566
- ASCIIColors.yellow(f"LLaVA test paths are set, but current model '{active_binding.model_path if active_binding else 'N/A'}' is not '{llava_test_model_path}'.")
567
- ASCIIColors.yellow("Skipping LLaVA-specific test section. To run, set main gguf_model_path to LLaVA model and configure LLaVA params.")
568
-
569
-
570
580
  except ImportError as e_imp:
571
581
  ASCIIColors.error(f"Import error: {e_imp}. Llama-cpp-python might not be installed/configured correctly.")
572
582
  except FileNotFoundError as e_fnf:
573
- ASCIIColors.error(f"Model file error: {e_fnf}. Ensure GGUF model path is correct.")
583
+ ASCIIColors.error(f"Model file error: {e_fnf}. Ensure GGUF model path is correct or models are in the right directory.")
574
584
  except RuntimeError as e_rt:
575
585
  ASCIIColors.error(f"Runtime error (often model load failure or llama.cpp issue): {e_rt}")
586
+ if is_dummy_model: ASCIIColors.yellow("This error is expected when using a dummy model for loading.")
576
587
  trace_exception(e_rt)
577
588
  except Exception as e_main:
578
589
  ASCIIColors.error(f"An unexpected error occurred: {e_main}")
@@ -583,8 +594,11 @@ if __name__ == '__main__':
583
594
  active_binding.unload_model()
584
595
  ASCIIColors.green("Model unloaded.")
585
596
 
586
- if is_dummy_model and os.path.exists("dummy_model.gguf"):
587
- os.remove("dummy_model.gguf")
597
+ # Cleanup dummy files and directories
598
+ if os.path.exists(mock_models_dir):
599
+ import shutil
600
+ shutil.rmtree(mock_models_dir)
601
+ ASCIIColors.info(f"Cleaned up dummy model directory: {mock_models_dir}")
588
602
 
589
603
  test_grammar_file = os.path.join(mock_lollms_paths["grammars_path"], "test_grammar.gbnf")
590
604
  if os.path.exists(test_grammar_file): os.remove(test_grammar_file)
@@ -65,8 +65,8 @@ class LollmsClient():
65
65
  # General Parameters (mostly defaults for LLM generation)
66
66
  service_key: Optional[str] = None, # Shared service key/client_id
67
67
  verify_ssl_certificate: bool = True,
68
- ctx_size: Optional[int] = 8192,
69
- n_predict: Optional[int] = 4096,
68
+ ctx_size: Optional[int|None] = None,
69
+ n_predict: Optional[int|None] = None,
70
70
  stream: bool = False,
71
71
  temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
72
72
  top_k: int = 40, # Ollama default is 40
@@ -277,7 +277,7 @@ class LollmsClient():
277
277
  available = self.binding_manager.get_available_bindings()
278
278
  raise ValueError(f"Failed to update LLM binding: {binding_name}. Available: {available}")
279
279
 
280
- def get_ctx_size(self, model_name=None):
280
+ def get_ctx_size(self, model_name:str|None=None):
281
281
  if self.binding:
282
282
  ctx_size = self.binding.get_ctx_size(model_name)
283
283
  return ctx_size if ctx_size else self.default_ctx_size
@@ -524,11 +524,23 @@ class LollmsClient():
524
524
  Union[str, dict]: Generated text or error dictionary if failed.
525
525
  """
526
526
  if self.binding:
527
+
528
+ ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size if self.default_ctx_size else None
529
+ if ctx_size is None:
530
+ ctx_size = self.binding.get_ctx_size()
531
+ if ctx_size is None:
532
+ ctx_size = 1024*8 # 1028*8= 8192 tokens, a common default for many models
533
+ nb_input_tokens = self.count_tokens(prompt)+ (sum([self.count_image_tokens(image) for image in images]) if images else 0)
534
+ if kwargs.get("debug", False):
535
+ ASCIIColors.magenta(f"Generating text using these parameters:")
536
+ ASCIIColors.magenta(f"ctx_size : {ctx_size}")
537
+ ASCIIColors.magenta(f"nb_input_tokens : {nb_input_tokens}")
538
+
527
539
  return self.binding.generate_text(
528
540
  prompt=prompt,
529
541
  images=images,
530
542
  system_prompt=system_prompt,
531
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
543
+ n_predict=n_predict if n_predict else self.default_n_predict if self.default_n_predict else ctx_size - nb_input_tokens,
532
544
  stream=stream if stream is not None else self.default_stream,
533
545
  temperature=temperature if temperature is not None else self.default_temperature,
534
546
  top_k=top_k if top_k is not None else self.default_top_k,
@@ -3025,7 +3037,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
3025
3037
 
3026
3038
  if len(tokens) <= chunk_size_tokens:
3027
3039
  if streaming_callback:
3028
- streaming_callback("Text is short enough for a single process.", MSG_TYPE.MSG_TYPE_STEP, {"progress": 0})
3040
+ streaming_callback("Text is short enough for a single pass.", MSG_TYPE.MSG_TYPE_STEP, {"progress": 0})
3029
3041
  system_prompt = ("You are a content processor expert.\n"
3030
3042
  "You perform tasks on the content as requested by the user.\n\n"
3031
3043
  "--- Content ---\n"
@@ -1738,6 +1738,8 @@ class LollmsDiscussion:
1738
1738
  "description": "JSON object as title of the discussion."
1739
1739
  }
1740
1740
  infos = self.lollmsClient.generate_structured_content(prompt = prompt, system_prompt=system_prompt, schema = title_generation_schema)
1741
+ if infos is None or "title" not in infos:
1742
+ raise ValueError("Title generation failed or returned invalid data.")
1741
1743
  discussion_title = infos["title"]
1742
1744
  new_metadata = (self.metadata or {}).copy()
1743
1745
  new_metadata['title'] = discussion_title
@@ -154,7 +154,7 @@ class LollmsLLMBinding(ABC):
154
154
  """
155
155
  pass
156
156
 
157
- def get_ctx_size(self, model_name=None):
157
+ def get_ctx_size(self, model_name:str|None=None):
158
158
  # if model_name is none use current model name
159
159
  return None
160
160
 
@@ -0,0 +1,84 @@
1
+ # File name: lollms_mcp_security.py
2
+ # Author: parisneo
3
+
4
+ """
5
+ This script defines a custom token verifier for MCP (Model Context Protocol) using an introspection endpoint.
6
+ The verifier queries the authorization server to check if a given token is valid. It is agnostic about how tokens are created.
7
+
8
+ Key components:
9
+ - MyTokenInfo class: Extends AccessToken and includes additional fields like user_id and username.
10
+ - IntrospectionTokenVerifier class: Implements the logic to verify tokens by making HTTP requests to an introspection endpoint.
11
+ - token_info_context: A context variable to store token information for easy access.
12
+
13
+ The script also includes an example of how to use these components within a FastMCP instance, setting up authentication and authorization settings.
14
+
15
+ Dependencies:
16
+ - mcp.server.auth.provider
17
+ - httpx
18
+ - os
19
+ - contextvars
20
+
21
+ Environment Variables:
22
+ - AUTHORIZATION_SERVER_URL: The URL of the authorization server. Default is 'http://localhost:9642'.
23
+ """
24
+
25
+ from mcp.server.auth.provider import AccessToken, TokenVerifier
26
+ import httpx
27
+ import os
28
+ from contextvars import ContextVar
29
+
30
+ AUTHORIZATION_SERVER_URL = os.environ.get("AUTHORIZATION_SERVER_URL", "http://localhost:9642")
31
+
32
+ class MyTokenInfo(AccessToken):
33
+ user_id: int | None = None
34
+ username: str | None = None
35
+
36
+ token_info_context: ContextVar[MyTokenInfo | None] = ContextVar("token_info_context", default=None)
37
+
38
+ # This is our set of valid API keys. In a real app, you'd check a database.
39
+ class IntrospectionTokenVerifier(TokenVerifier):
40
+ """
41
+ This verifier asks the authorization server if a token is valid.
42
+ It is completely agnostic about how tokens are created.
43
+ """
44
+ async def verify_token(self, token: str) -> AccessToken:
45
+ # Make a secure HTTP call to your /introspect endpoint
46
+ async with httpx.AsyncClient() as client:
47
+ try:
48
+ response = await client.post(
49
+ f"{AUTHORIZATION_SERVER_URL}/api/auth/introspect",
50
+ data={"token": token}
51
+ )
52
+ response.raise_for_status() # Raise an exception for 4xx/5xx errors
53
+ except httpx.RequestError as e:
54
+ print(f"ERROR: Could not connect to introspection endpoint: {e}")
55
+ return AccessToken(active=False, token="", client_id="", scopes=[])
56
+
57
+ # Create a TokenInfo object directly from the JSON response
58
+ token_info_dict = response.json()
59
+ token_info_dict["token"] = token
60
+ token_info_dict["client_id"] = str(token_info_dict.get("user_id"))
61
+ token_info_dict["scopes"] = []
62
+ token_info = MyTokenInfo(**token_info_dict)
63
+ token_info_context.set(token_info)
64
+ return MyTokenInfo(**token_info_dict)
65
+
66
+ # To recover the user information, just use token_info = token_info_context.get()
67
+ # Example use
68
+ # resource_server_url=f"http://localhost:{port}"
69
+ # mcp = FastMCP(
70
+ # name="MyMCPServer",
71
+ # host=host,
72
+ # port=port,
73
+ # log_level=log_level,
74
+ # # 1. This tells MCP to use our class for authentication.
75
+ # token_verifier=IntrospectionTokenVerifier(),
76
+ # # 2. This tells MCP to protect all tools by default.
77
+ # auth=AuthSettings(
78
+ # # The URL of the server that issues tokens
79
+ # issuer_url=AUTHORIZATION_SERVER_URL,
80
+ # # The URL of the MCP server itself
81
+ # resource_server_url=resource_server_url, # The port of the MCP server
82
+ # required_scopes=[] # Requires valid authentication
83
+ # )
84
+ # )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.31.1
3
+ Version: 0.32.1
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -29,13 +29,14 @@ examples/mcp_examples/openai_mcp.py,sha256=7IEnPGPXZgYZyiES_VaUbQ6viQjenpcUxGiHE
29
29
  examples/mcp_examples/run_remote_mcp_example_v2.py,sha256=bbNn93NO_lKcFzfIsdvJJijGx2ePFTYfknofqZxMuRM,14626
30
30
  examples/mcp_examples/run_standard_mcp_example.py,sha256=GSZpaACPf3mDPsjA8esBQVUsIi7owI39ca5avsmvCxA,9419
31
31
  examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
32
- lollms_client/__init__.py,sha256=JktSbTe0t4EQVVLvNRScoCv9YSjDkFpblIwHDeE_-CE,1147
32
+ lollms_client/__init__.py,sha256=5paEgQrICKx3_3wtk257FCKOwolkgmfFPJPwc9aYkWY,1147
33
33
  lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
34
- lollms_client/lollms_core.py,sha256=eOO92EFcs1akOXfRBV2QD5kpDmJyiDZP90g75jHgl6w,176520
35
- lollms_client/lollms_discussion.py,sha256=vaBJ9LJumTUgi2550toNOnEOYMN412OvPicMn8CNi64,85306
34
+ lollms_client/lollms_core.py,sha256=gDhpB62AluEmbVFvPm7vdnZgP2hGBymDLun57K1jrOM,177352
35
+ lollms_client/lollms_discussion.py,sha256=TT-AIKMdEuNNBjrWgMLcww8z6vIETO0J3cnoQgb9dhU,85448
36
36
  lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
37
- lollms_client/lollms_llm_binding.py,sha256=_r5_bZfasJQlI84EfH_sKlVMlOuiIgMXL6wYznRT_GM,15526
37
+ lollms_client/lollms_llm_binding.py,sha256=3x5Y_RYZJmMDJkYZp1saLSOnwXszqSmedAEiijsjZlk,15535
38
38
  lollms_client/lollms_mcp_binding.py,sha256=0rK9HQCBEGryNc8ApBmtOlhKE1Yfn7X7xIQssXxS2Zc,8933
39
+ lollms_client/lollms_mcp_security.py,sha256=FhVTDhSBjksGEZnopVnjFmEF5dv7D8bBTqoaj4BiF0E,3562
39
40
  lollms_client/lollms_personality.py,sha256=O-9nqZhazcITOkxjT24ENTxTmIoZLgqIsQ9WtWs0Id0,8719
40
41
  lollms_client/lollms_python_analyzer.py,sha256=7gf1fdYgXCOkPUkBAPNmr6S-66hMH4_KonOMsADASxc,10246
41
42
  lollms_client/lollms_stt_binding.py,sha256=jAUhLouEhh2hmm1bK76ianfw_6B59EHfY3FmLv6DU-g,5111
@@ -53,15 +54,15 @@ lollms_client/llm_bindings/grok/__init__.py,sha256=5tIf3348RgAEaSp6FdG-LM9N8R7aR
53
54
  lollms_client/llm_bindings/groq/__init__.py,sha256=zyWKM78qHwSt5g0Bb8Njj7Jy8CYuLMyplx2maOKFFpg,12218
54
55
  lollms_client/llm_bindings/hugging_face_inference_api/__init__.py,sha256=PxgeRqT8dpa9GZoXwtSncy9AUgAN2cDKrvp_nbaWq0E,14027
55
56
  lollms_client/llm_bindings/litellm/__init__.py,sha256=pNkwyRPeENvTM4CDh6Pj3kQfxHfhX2pvXhGJDjKjp30,12340
56
- lollms_client/llm_bindings/llamacpp/__init__.py,sha256=4cotP3cYhiA0501UnGVljlEBBVatNyfIyrZsHUPJk24,63878
57
+ lollms_client/llm_bindings/llamacpp/__init__.py,sha256=uNqOoxFYnsgrYb-lVXQ0QrENWTJC5Np5NMTXfOAYoko,72800
57
58
  lollms_client/llm_bindings/lollms/__init__.py,sha256=scGHEKzlGX5fw2XwefVicsf28GrwgN3wU5nl4EPJ_Sk,24424
58
59
  lollms_client/llm_bindings/lollms_webui/__init__.py,sha256=Thoq3PJR2e03Y2Kd_FBb-DULJK0zT5-2ID1YIJLcPlw,17864
59
60
  lollms_client/llm_bindings/mistral/__init__.py,sha256=624Gr462yBh52ttHFOapKgJOn8zZ1vZcTEcC3i4FYt8,12750
60
- lollms_client/llm_bindings/ollama/__init__.py,sha256=_plx8cO3Bl9igmIEvTkJ6tkZ2imHS_L76hCHdJAGIhQ,36851
61
+ lollms_client/llm_bindings/ollama/__init__.py,sha256=dXKHIeQCS9pz5AS07GF1eWj3ieWiz3aFOtxOX7yojbs,41314
61
62
  lollms_client/llm_bindings/open_router/__init__.py,sha256=v91BpNcuQCbbA6r82gbgMP8UYhSrJUMOf4UtOzEo18Q,13235
62
- lollms_client/llm_bindings/openai/__init__.py,sha256=kLG0-FyLMoSbEay1hcK46XjEBaLbFE3U51lUjAZ8HoI,25663
63
+ lollms_client/llm_bindings/openai/__init__.py,sha256=Z0zNTfBgBGwkwArN375kBt4otrUTI_84pHgVuyuDy0c,26253
63
64
  lollms_client/llm_bindings/openllm/__init__.py,sha256=xv2XDhJNCYe6NPnWBboDs24AQ1VJBOzsTuMcmuQ6xYY,29864
64
- lollms_client/llm_bindings/pythonllamacpp/__init__.py,sha256=7dM42TCGKh0eV0njNL1tc9cInhyvBRIXzN3dcy12Gl0,33551
65
+ lollms_client/llm_bindings/pythonllamacpp/__init__.py,sha256=VNa6NXe7bY44Oev8r6q5XlQbxqWU2nBV62SFlsPPH78,31810
65
66
  lollms_client/llm_bindings/tensor_rt/__init__.py,sha256=nPaNhGRd-bsG0UlYwcEqjd_UagCMEf5VEbBUW-GWu6A,32203
66
67
  lollms_client/llm_bindings/transformers/__init__.py,sha256=9LkqEC5bp1zHgyeGEcPQ3_uqvEAEf_B4p9DztcBaC5w,37211
67
68
  lollms_client/llm_bindings/vllm/__init__.py,sha256=2NqeeqYWXNq1aNicdcAwN9DaoL4gq96GZ7hsKErfC6c,32187
@@ -92,9 +93,9 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
92
93
  lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
93
94
  lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
94
95
  lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
- lollms_client-0.31.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
96
+ lollms_client-0.32.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
96
97
  test/test_lollms_discussion.py,sha256=KxTsV1bPdNz8QqZd7tIof9kTWkeXLUtAMU08BQmoY6U,16829
97
- lollms_client-0.31.1.dist-info/METADATA,sha256=fqTGolR3AxVMxM-Wc1TX9112dp2wMEEGqQyHSS4xx14,38717
98
- lollms_client-0.31.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- lollms_client-0.31.1.dist-info/top_level.txt,sha256=1jIpjTnOSGEGtIW2rGAFM6tVRzgsDdMOiox_SmDH_zw,28
100
- lollms_client-0.31.1.dist-info/RECORD,,
98
+ lollms_client-0.32.1.dist-info/METADATA,sha256=mw11ol3fFJpA2_sxa4olRXLkfVDeMfR_WCd63aLZdXw,38717
99
+ lollms_client-0.32.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ lollms_client-0.32.1.dist-info/top_level.txt,sha256=1jIpjTnOSGEGtIW2rGAFM6tVRzgsDdMOiox_SmDH_zw,28
101
+ lollms_client-0.32.1.dist-info/RECORD,,