lollms-client 0.31.1__py3-none-any.whl → 0.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +210 -35
- lollms_client/llm_bindings/ollama/__init__.py +88 -0
- lollms_client/llm_bindings/openai/__init__.py +372 -294
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +153 -139
- lollms_client/lollms_core.py +17 -5
- lollms_client/lollms_discussion.py +2 -0
- lollms_client/lollms_llm_binding.py +1 -1
- lollms_client/lollms_mcp_security.py +84 -0
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/METADATA +1 -1
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/RECORD +14 -13
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/WHEEL +0 -0
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/top_level.txt +0 -0
|
@@ -102,12 +102,65 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
102
102
|
|
|
103
103
|
self.model: Optional[Llama] = None
|
|
104
104
|
self.grammar: Optional[LlamaGrammar] = None
|
|
105
|
+
self.available_models: Dict[str, str] = {}
|
|
106
|
+
|
|
107
|
+
# Scan for available models on initialization
|
|
108
|
+
self._scan_for_models()
|
|
109
|
+
|
|
110
|
+
# If a model_path is provided, it can be a short name or a full path.
|
|
111
|
+
# If not, auto-select the first available model.
|
|
112
|
+
if not self.model_path and self.available_models:
|
|
113
|
+
self.model_path = next(iter(self.available_models.keys()))
|
|
114
|
+
ASCIIColors.info(f"No model specified. Auto-selecting first found model: '{self.model_path}'")
|
|
115
|
+
|
|
116
|
+
# Resolve the full path to the model
|
|
117
|
+
resolved_model_path = self.model_path
|
|
118
|
+
if self.model_path in self.available_models:
|
|
119
|
+
resolved_model_path = self.available_models[self.model_path]
|
|
120
|
+
elif not os.path.exists(self.model_path):
|
|
121
|
+
if self.available_models:
|
|
122
|
+
raise FileNotFoundError(f"Model '{self.model_path}' not found. Available models: {list(self.available_models.keys())}")
|
|
123
|
+
else:
|
|
124
|
+
raise FileNotFoundError(f"Model file '{self.model_path}' not found, and no other GGUF models were discovered in search paths.")
|
|
105
125
|
|
|
106
126
|
# Resolve and load grammar if specified
|
|
107
127
|
self._load_grammar_from_config()
|
|
108
128
|
|
|
109
129
|
# Attempt to load the model
|
|
110
|
-
self.load_model(
|
|
130
|
+
self.load_model(resolved_model_path)
|
|
131
|
+
|
|
132
|
+
def _scan_for_models(self):
|
|
133
|
+
"""
|
|
134
|
+
Scans for GGUF models in the specified LoLLMs paths and populates the available_models dictionary.
|
|
135
|
+
"""
|
|
136
|
+
self.available_models = {}
|
|
137
|
+
|
|
138
|
+
personal_path = self.lollms_paths.get('personal_models_path', '')
|
|
139
|
+
zoo_path = self.lollms_paths.get('models_zoo_path', '')
|
|
140
|
+
|
|
141
|
+
scan_paths = []
|
|
142
|
+
if personal_path and os.path.isdir(personal_path):
|
|
143
|
+
scan_paths.append(personal_path)
|
|
144
|
+
if zoo_path and os.path.isdir(zoo_path) and zoo_path not in scan_paths:
|
|
145
|
+
scan_paths.append(zoo_path)
|
|
146
|
+
|
|
147
|
+
ASCIIColors.info(f"Scanning for GGUF models in: {scan_paths}")
|
|
148
|
+
for scan_path in scan_paths:
|
|
149
|
+
for root, _, files in os.walk(scan_path):
|
|
150
|
+
for file in files:
|
|
151
|
+
if file.endswith(".gguf"):
|
|
152
|
+
full_path = os.path.join(root, file)
|
|
153
|
+
# Use relative path from scan_path as the unique "short name"
|
|
154
|
+
short_name = os.path.relpath(full_path, scan_path)
|
|
155
|
+
# In case of name conflict, personal_path takes precedence
|
|
156
|
+
if short_name not in self.available_models:
|
|
157
|
+
self.available_models[short_name] = full_path
|
|
158
|
+
|
|
159
|
+
if self.available_models:
|
|
160
|
+
ASCIIColors.green(f"Discovered {len(self.available_models)} GGUF models.")
|
|
161
|
+
else:
|
|
162
|
+
ASCIIColors.warning("No GGUF models found in specified search paths.")
|
|
163
|
+
|
|
111
164
|
|
|
112
165
|
def _load_grammar_from_config(self):
|
|
113
166
|
grammar_file_path = self.llama_config.get("grammar_file")
|
|
@@ -126,28 +179,14 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
126
179
|
ASCIIColors.warning(f"Grammar file not found: {full_grammar_path}")
|
|
127
180
|
|
|
128
181
|
def load_model(self, model_path: str) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Loads the GGUF model from the given full path.
|
|
184
|
+
"""
|
|
185
|
+
if not os.path.exists(model_path):
|
|
186
|
+
raise FileNotFoundError(f"Cannot load model. File not found at: {model_path}")
|
|
187
|
+
|
|
188
|
+
ASCIIColors.info(f"Attempting to load GGUF model from: {model_path}")
|
|
129
189
|
self.model_path = model_path
|
|
130
|
-
resolved_model_path = self.model_path
|
|
131
|
-
if not os.path.exists(resolved_model_path):
|
|
132
|
-
models_base_path = self.lollms_paths.get('personal_models_path', self.lollms_paths.get('models_zoo_path'))
|
|
133
|
-
if models_base_path:
|
|
134
|
-
# Assuming model_path might be relative to a binding-specific folder within models_base_path
|
|
135
|
-
# e.g. models_zoo_path/llamacpp/model_name.gguf
|
|
136
|
-
# Or it could be directly models_zoo_path/model_name.gguf
|
|
137
|
-
potential_path_direct = os.path.join(models_base_path, self.model_path)
|
|
138
|
-
potential_path_binding_specific = os.path.join(models_base_path, self.binding_name.lower(), self.model_path)
|
|
139
|
-
|
|
140
|
-
if os.path.exists(potential_path_direct):
|
|
141
|
-
resolved_model_path = potential_path_direct
|
|
142
|
-
elif os.path.exists(potential_path_binding_specific):
|
|
143
|
-
resolved_model_path = potential_path_binding_specific
|
|
144
|
-
else:
|
|
145
|
-
raise FileNotFoundError(f"Model file '{self.model_path}' not found directly or in model paths: '{potential_path_direct}', '{potential_path_binding_specific}'")
|
|
146
|
-
else:
|
|
147
|
-
raise FileNotFoundError(f"Model file not found: {self.model_path}")
|
|
148
|
-
|
|
149
|
-
ASCIIColors.info(f"Attempting to load GGUF model from: {resolved_model_path}")
|
|
150
|
-
self.model_path = resolved_model_path # Update to resolved path
|
|
151
190
|
|
|
152
191
|
llama_constructor_keys = [
|
|
153
192
|
"n_gpu_layers", "main_gpu", "tensor_split", "vocab_only", "use_mmap", "use_mlock",
|
|
@@ -331,12 +370,14 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
331
370
|
def tokenize(self, text: str) -> List[int]:
|
|
332
371
|
if not self.model:
|
|
333
372
|
ASCIIColors.warning("Llama.cpp model not loaded. Tokenization fallback to tiktoken.")
|
|
373
|
+
import tiktoken
|
|
334
374
|
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
|
|
335
375
|
return self.model.tokenize(text.encode("utf-8"), add_bos=False, special=False)
|
|
336
376
|
|
|
337
377
|
def detokenize(self, tokens: List[int]) -> str:
|
|
338
378
|
if not self.model:
|
|
339
379
|
ASCIIColors.warning("Llama.cpp model not loaded. Detokenization fallback to tiktoken.")
|
|
380
|
+
import tiktoken
|
|
340
381
|
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
|
|
341
382
|
try:
|
|
342
383
|
return self.model.detokenize(tokens).decode("utf-8", errors="ignore")
|
|
@@ -346,13 +387,14 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
346
387
|
def count_tokens(self, text: str) -> int:
|
|
347
388
|
if not self.model:
|
|
348
389
|
ASCIIColors.warning("Llama.cpp model not loaded. Token count fallback to tiktoken.")
|
|
390
|
+
import tiktoken
|
|
349
391
|
return len(tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text))
|
|
350
392
|
return len(self.tokenize(text))
|
|
351
393
|
|
|
352
394
|
def embed(self, text: str, **kwargs) -> List[float]:
|
|
353
395
|
if not self.model:
|
|
354
396
|
raise Exception("Llama.cpp model not loaded.")
|
|
355
|
-
if not self.llama_config.get("embedding"):
|
|
397
|
+
if not self.llama_config.get("embedding"):
|
|
356
398
|
raise Exception("Embedding support was not enabled when loading the model (set 'embedding: true' in config).")
|
|
357
399
|
try:
|
|
358
400
|
return self.model.embed(text)
|
|
@@ -366,7 +408,7 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
366
408
|
"error": "Model not loaded or failed to load."
|
|
367
409
|
}
|
|
368
410
|
|
|
369
|
-
is_llava_model = "llava" in self.model_path.lower() or \
|
|
411
|
+
is_llava_model = "llava" in os.path.basename(self.model_path).lower() or \
|
|
370
412
|
(self.llama_config.get("chat_format", "").startswith("llava") and \
|
|
371
413
|
self.llama_config.get("clip_model_path") is not None)
|
|
372
414
|
|
|
@@ -380,17 +422,42 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
380
422
|
"config": self.llama_config
|
|
381
423
|
}
|
|
382
424
|
|
|
383
|
-
def listModels(self) -> List[Dict[str, str]]: # type: ignore
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
425
|
+
def listModels(self, force_rescan: bool = False) -> List[Dict[str, str]]: # type: ignore
|
|
426
|
+
"""
|
|
427
|
+
Lists available GGUF models.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
force_rescan: If True, rescans the model directories.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
A list of dictionaries, each representing a found model.
|
|
434
|
+
"""
|
|
435
|
+
if force_rescan or not self.available_models:
|
|
436
|
+
self._scan_for_models()
|
|
437
|
+
|
|
438
|
+
model_list = []
|
|
439
|
+
for short_name, full_path in self.available_models.items():
|
|
440
|
+
is_loaded = (self.model is not None and self.model_path == full_path)
|
|
441
|
+
model_entry = {
|
|
442
|
+
'model_name': short_name,
|
|
443
|
+
'path': full_path,
|
|
444
|
+
'loaded': is_loaded
|
|
445
|
+
}
|
|
446
|
+
if is_loaded:
|
|
447
|
+
model_entry.update({
|
|
448
|
+
'n_ctx': str(self.model.context_params.n_ctx),
|
|
449
|
+
'n_gpu_layers': str(self.llama_config.get("n_gpu_layers", "N/A")),
|
|
450
|
+
})
|
|
451
|
+
model_list.append(model_entry)
|
|
452
|
+
|
|
453
|
+
if not model_list and self.model_path: # Case where a direct path was loaded but not in scan paths
|
|
454
|
+
return [{
|
|
455
|
+
'model_name': os.path.basename(self.model_path), 'path': self.model_path, 'loaded': self.model is not None,
|
|
456
|
+
'n_ctx': str(self.model.context_params.n_ctx) if self.model else "N/A",
|
|
390
457
|
'n_gpu_layers': str(self.llama_config.get("n_gpu_layers","N/A")),
|
|
391
458
|
}]
|
|
392
|
-
return
|
|
393
|
-
|
|
459
|
+
return model_list
|
|
460
|
+
|
|
394
461
|
|
|
395
462
|
def unload_model(self):
|
|
396
463
|
if self.model:
|
|
@@ -412,47 +479,39 @@ if __name__ == '__main__':
|
|
|
412
479
|
ASCIIColors.yellow("Testing PythonLlamaCppBinding...")
|
|
413
480
|
|
|
414
481
|
# --- IMPORTANT: Configure model path ---
|
|
415
|
-
#
|
|
416
|
-
#
|
|
417
|
-
#
|
|
418
|
-
gguf_model_path =
|
|
419
|
-
|
|
420
|
-
# ---
|
|
421
|
-
#
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
except Exception as e_dummy:
|
|
443
|
-
ASCIIColors.error(f"Could not create dummy GGUF: {e_dummy}. Please set a valid GGUF model path.")
|
|
444
|
-
sys.exit(1)
|
|
482
|
+
# The binding will now auto-discover models.
|
|
483
|
+
# To test auto-selection, set gguf_model_path = None
|
|
484
|
+
# To test loading a specific model, set its "short name" here, e.g., "Mistral-7B/model.gguf"
|
|
485
|
+
gguf_model_path = None # <<< SET TO A SHORT NAME, FULL PATH, OR None FOR AUTO-SELECTION
|
|
486
|
+
|
|
487
|
+
# --- Test Setup ---
|
|
488
|
+
# Create a dummy model directory for the test
|
|
489
|
+
mock_models_dir = "test_models_dir"
|
|
490
|
+
os.makedirs(os.path.join(mock_models_dir, "subfolder"), exist_ok=True)
|
|
491
|
+
|
|
492
|
+
dummy_gguf_content = b"GGUF\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
|
|
493
|
+
dummy_gguf_content += b"\x14\x00\x00\x00\x00\x00\x00\x00" + b"tokenizer.ggml.model"
|
|
494
|
+
dummy_gguf_content += b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x05\x00\x00\x00\x00\x00\x00\x00" + b"llama"
|
|
495
|
+
|
|
496
|
+
dummy_model_path1 = os.path.join(mock_models_dir, "dummy_model_A.gguf")
|
|
497
|
+
dummy_model_path2 = os.path.join(mock_models_dir, "subfolder", "dummy_model_B.gguf")
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
with open(dummy_model_path1, "wb") as f: f.write(dummy_gguf_content)
|
|
501
|
+
with open(dummy_model_path2, "wb") as f: f.write(dummy_gguf_content)
|
|
502
|
+
ASCIIColors.info("Created dummy GGUF files for testing.")
|
|
503
|
+
# If no specific model path is given, the test will use auto-selection on these dummy files.
|
|
504
|
+
# This will fail at the INFERENCE stage but test the discovery and loading logic.
|
|
505
|
+
is_dummy_model = True
|
|
506
|
+
except Exception as e_dummy:
|
|
507
|
+
ASCIIColors.error(f"Could not create dummy GGUF files: {e_dummy}. Test may fail.")
|
|
508
|
+
is_dummy_model = False # Assume a real model path is set
|
|
445
509
|
|
|
446
510
|
binding_config = {
|
|
447
|
-
"n_gpu_layers": 0,
|
|
448
|
-
"n_ctx": 1024, # Short context for testing
|
|
449
|
-
"seed": 1234,
|
|
450
|
-
"embedding": True, # Enable embedding generation for the test
|
|
451
|
-
"verbose": False, # Set to True for detailed llama.cpp logs
|
|
452
|
-
# "grammar_file": "json.gbnf" # Example for grammar test
|
|
511
|
+
"n_gpu_layers": 0, "n_ctx": 1024, "seed": 1234, "embedding": True, "verbose": False,
|
|
453
512
|
}
|
|
454
513
|
|
|
455
|
-
mock_lollms_paths = { "personal_models_path":
|
|
514
|
+
mock_lollms_paths = { "personal_models_path": mock_models_dir, "grammars_path": "grammars_test" }
|
|
456
515
|
if not os.path.exists(mock_lollms_paths["grammars_path"]):
|
|
457
516
|
os.makedirs(mock_lollms_paths["grammars_path"], exist_ok=True)
|
|
458
517
|
with open(os.path.join(mock_lollms_paths["grammars_path"], "test_grammar.gbnf"), "w") as f:
|
|
@@ -461,17 +520,27 @@ if __name__ == '__main__':
|
|
|
461
520
|
active_binding = None
|
|
462
521
|
try:
|
|
463
522
|
ASCIIColors.cyan("\n--- Initializing PythonLlamaCppBinding ---")
|
|
523
|
+
# Initialize with the specified path (or None for auto-selection)
|
|
464
524
|
active_binding = PythonLlamaCppBinding(model_path=gguf_model_path, config=binding_config, lollms_paths=mock_lollms_paths)
|
|
465
|
-
|
|
466
|
-
ASCIIColors.
|
|
525
|
+
|
|
526
|
+
ASCIIColors.green(f"Binding initialized. Loaded model: {os.path.basename(active_binding.model_path)}")
|
|
527
|
+
ASCIIColors.info(f"Full path: {active_binding.model_path}")
|
|
528
|
+
|
|
529
|
+
# --- List Models ---
|
|
530
|
+
ASCIIColors.cyan("\n--- Listing Models (force_rescan=True) ---")
|
|
531
|
+
model_list = active_binding.listModels(force_rescan=True)
|
|
532
|
+
print(json.dumps(model_list, indent=2))
|
|
533
|
+
assert len(model_list) == 2, "Model discovery failed to find all dummy models."
|
|
534
|
+
assert any(m['loaded'] for m in model_list), "listModels did not correctly report a loaded model."
|
|
535
|
+
|
|
467
536
|
|
|
468
537
|
if is_dummy_model:
|
|
469
|
-
ASCIIColors.warning("\nRUNNING WITH DUMMY MODEL.
|
|
538
|
+
ASCIIColors.warning("\nRUNNING WITH DUMMY MODEL. INFERENCE TESTS WILL BE SKIPPED.")
|
|
539
|
+
ASCIIColors.info("Discovery, loading, and listing tests passed.")
|
|
470
540
|
else:
|
|
471
|
-
#
|
|
472
|
-
ASCIIColors.
|
|
473
|
-
|
|
474
|
-
|
|
541
|
+
# These tests will only run if you provided a path to a REAL model
|
|
542
|
+
ASCIIColors.info("\nRUNNING WITH REAL MODEL. PROCEEDING WITH INFERENCE TESTS...")
|
|
543
|
+
|
|
475
544
|
# --- Tokenize/Detokenize ---
|
|
476
545
|
ASCIIColors.cyan("\n--- Tokenize/Detokenize ---")
|
|
477
546
|
sample_text = "Hello, Llama.cpp world! This is a test sentence."
|
|
@@ -508,71 +577,13 @@ if __name__ == '__main__':
|
|
|
508
577
|
if isinstance(result, str): ASCIIColors.green(f"Full streamed text: {result}")
|
|
509
578
|
else: ASCIIColors.error(f"Streaming generation failed: {result}")
|
|
510
579
|
|
|
511
|
-
# --- Text Generation with Grammar ---
|
|
512
|
-
ASCIIColors.cyan("\n--- Text Generation with Grammar ---")
|
|
513
|
-
generated_grammar_text = active_binding.generate_text(
|
|
514
|
-
"Output a greeting:", n_predict=5, stream=False, use_chat_format=False, # Grammar often better with raw completion
|
|
515
|
-
grammar=os.path.join(mock_lollms_paths["grammars_path"], "test_grammar.gbnf")
|
|
516
|
-
)
|
|
517
|
-
if isinstance(generated_grammar_text, str):
|
|
518
|
-
ASCIIColors.green(f"Generated text with grammar: '{generated_grammar_text.strip()}'")
|
|
519
|
-
assert generated_grammar_text.strip().lower() in ["hello", "world"], "Grammar constraint failed!"
|
|
520
|
-
else: ASCIIColors.error(f"Grammar generation failed: {generated_grammar_text}")
|
|
521
|
-
|
|
522
|
-
# --- Embeddings ---
|
|
523
|
-
if binding_config.get("embedding"):
|
|
524
|
-
ASCIIColors.cyan("\n--- Embeddings ---")
|
|
525
|
-
embedding_text = "This is a test for embeddings."
|
|
526
|
-
try:
|
|
527
|
-
embedding_vector = active_binding.embed(embedding_text)
|
|
528
|
-
ASCIIColors.green(f"Embedding for '{embedding_text}' (first 3 dims): {embedding_vector[:3]}...")
|
|
529
|
-
ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
|
|
530
|
-
except Exception as e_emb: ASCIIColors.warning(f"Could not get embedding: {e_emb}")
|
|
531
|
-
else: ASCIIColors.yellow("\n--- Embeddings Skipped (embedding: false in config) ---")
|
|
532
|
-
|
|
533
|
-
# --- LLaVA Test (if configured and real model is LLaVA) ---
|
|
534
|
-
if not is_dummy_model and llava_test_model_path and os.path.exists(llava_test_model_path) and \
|
|
535
|
-
llava_test_clip_model_path and os.path.exists(llava_test_clip_model_path) and \
|
|
536
|
-
active_binding and active_binding.model_path.lower() == llava_test_model_path.lower():
|
|
537
|
-
|
|
538
|
-
ASCIIColors.cyan("\n--- LLaVA Vision Test ---")
|
|
539
|
-
# This assumes the 'active_binding' was ALREADY loaded with the LLaVA model
|
|
540
|
-
# and its specific config (clip_model_path, chat_format="llava-1-x").
|
|
541
|
-
# If not, you'd need to unload and reload/reinitialize the binding for LLaVA.
|
|
542
|
-
if not (active_binding.llama_config.get("chat_format","").startswith("llava") and \
|
|
543
|
-
active_binding.llama_config.get("clip_model_path")):
|
|
544
|
-
ASCIIColors.warning("Current binding not configured for LLaVA. Skipping LLaVA test.")
|
|
545
|
-
ASCIIColors.warning("To test LLaVA, ensure gguf_model_path points to LLaVA model and config includes 'chat_format' and 'clip_model_path'.")
|
|
546
|
-
else:
|
|
547
|
-
dummy_image_path = "dummy_llava_image.png"
|
|
548
|
-
try:
|
|
549
|
-
from PIL import Image, ImageDraw
|
|
550
|
-
img = Image.new('RGB', (200, 80), color = ('cyan'))
|
|
551
|
-
d = ImageDraw.Draw(img); d.text((10,20), "LLaVA Test", fill=('black'))
|
|
552
|
-
img.save(dummy_image_path)
|
|
553
|
-
ASCIIColors.info(f"Created dummy image for LLaVA: {dummy_image_path}")
|
|
554
|
-
|
|
555
|
-
llava_prompt = "What do you see in this image?"
|
|
556
|
-
llava_response = active_binding.generate_text(
|
|
557
|
-
prompt=llava_prompt, images=[dummy_image_path], n_predict=50, stream=False, use_chat_format=True
|
|
558
|
-
)
|
|
559
|
-
if isinstance(llava_response, str): ASCIIColors.green(f"LLaVA response: {llava_response}")
|
|
560
|
-
else: ASCIIColors.error(f"LLaVA generation failed: {llava_response}")
|
|
561
|
-
except ImportError: ASCIIColors.warning("Pillow not found. Cannot create dummy image for LLaVA.")
|
|
562
|
-
except Exception as e_llava: ASCIIColors.error(f"LLaVA test error: {e_llava}"); trace_exception(e_llava)
|
|
563
|
-
finally:
|
|
564
|
-
if os.path.exists(dummy_image_path): os.remove(dummy_image_path)
|
|
565
|
-
elif not is_dummy_model and llava_test_model_path: # If LLaVA test paths are set but model isn't LLaVA
|
|
566
|
-
ASCIIColors.yellow(f"LLaVA test paths are set, but current model '{active_binding.model_path if active_binding else 'N/A'}' is not '{llava_test_model_path}'.")
|
|
567
|
-
ASCIIColors.yellow("Skipping LLaVA-specific test section. To run, set main gguf_model_path to LLaVA model and configure LLaVA params.")
|
|
568
|
-
|
|
569
|
-
|
|
570
580
|
except ImportError as e_imp:
|
|
571
581
|
ASCIIColors.error(f"Import error: {e_imp}. Llama-cpp-python might not be installed/configured correctly.")
|
|
572
582
|
except FileNotFoundError as e_fnf:
|
|
573
|
-
ASCIIColors.error(f"Model file error: {e_fnf}. Ensure GGUF model path is correct.")
|
|
583
|
+
ASCIIColors.error(f"Model file error: {e_fnf}. Ensure GGUF model path is correct or models are in the right directory.")
|
|
574
584
|
except RuntimeError as e_rt:
|
|
575
585
|
ASCIIColors.error(f"Runtime error (often model load failure or llama.cpp issue): {e_rt}")
|
|
586
|
+
if is_dummy_model: ASCIIColors.yellow("This error is expected when using a dummy model for loading.")
|
|
576
587
|
trace_exception(e_rt)
|
|
577
588
|
except Exception as e_main:
|
|
578
589
|
ASCIIColors.error(f"An unexpected error occurred: {e_main}")
|
|
@@ -583,8 +594,11 @@ if __name__ == '__main__':
|
|
|
583
594
|
active_binding.unload_model()
|
|
584
595
|
ASCIIColors.green("Model unloaded.")
|
|
585
596
|
|
|
586
|
-
|
|
587
|
-
|
|
597
|
+
# Cleanup dummy files and directories
|
|
598
|
+
if os.path.exists(mock_models_dir):
|
|
599
|
+
import shutil
|
|
600
|
+
shutil.rmtree(mock_models_dir)
|
|
601
|
+
ASCIIColors.info(f"Cleaned up dummy model directory: {mock_models_dir}")
|
|
588
602
|
|
|
589
603
|
test_grammar_file = os.path.join(mock_lollms_paths["grammars_path"], "test_grammar.gbnf")
|
|
590
604
|
if os.path.exists(test_grammar_file): os.remove(test_grammar_file)
|
lollms_client/lollms_core.py
CHANGED
|
@@ -65,8 +65,8 @@ class LollmsClient():
|
|
|
65
65
|
# General Parameters (mostly defaults for LLM generation)
|
|
66
66
|
service_key: Optional[str] = None, # Shared service key/client_id
|
|
67
67
|
verify_ssl_certificate: bool = True,
|
|
68
|
-
ctx_size: Optional[int] =
|
|
69
|
-
n_predict: Optional[int] =
|
|
68
|
+
ctx_size: Optional[int|None] = None,
|
|
69
|
+
n_predict: Optional[int|None] = None,
|
|
70
70
|
stream: bool = False,
|
|
71
71
|
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
72
72
|
top_k: int = 40, # Ollama default is 40
|
|
@@ -277,7 +277,7 @@ class LollmsClient():
|
|
|
277
277
|
available = self.binding_manager.get_available_bindings()
|
|
278
278
|
raise ValueError(f"Failed to update LLM binding: {binding_name}. Available: {available}")
|
|
279
279
|
|
|
280
|
-
def get_ctx_size(self, model_name=None):
|
|
280
|
+
def get_ctx_size(self, model_name:str|None=None):
|
|
281
281
|
if self.binding:
|
|
282
282
|
ctx_size = self.binding.get_ctx_size(model_name)
|
|
283
283
|
return ctx_size if ctx_size else self.default_ctx_size
|
|
@@ -524,11 +524,23 @@ class LollmsClient():
|
|
|
524
524
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
525
525
|
"""
|
|
526
526
|
if self.binding:
|
|
527
|
+
|
|
528
|
+
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size if self.default_ctx_size else None
|
|
529
|
+
if ctx_size is None:
|
|
530
|
+
ctx_size = self.binding.get_ctx_size()
|
|
531
|
+
if ctx_size is None:
|
|
532
|
+
ctx_size = 1024*8 # 1028*8= 8192 tokens, a common default for many models
|
|
533
|
+
nb_input_tokens = self.count_tokens(prompt)+ (sum([self.count_image_tokens(image) for image in images]) if images else 0)
|
|
534
|
+
if kwargs.get("debug", False):
|
|
535
|
+
ASCIIColors.magenta(f"Generating text using these parameters:")
|
|
536
|
+
ASCIIColors.magenta(f"ctx_size : {ctx_size}")
|
|
537
|
+
ASCIIColors.magenta(f"nb_input_tokens : {nb_input_tokens}")
|
|
538
|
+
|
|
527
539
|
return self.binding.generate_text(
|
|
528
540
|
prompt=prompt,
|
|
529
541
|
images=images,
|
|
530
542
|
system_prompt=system_prompt,
|
|
531
|
-
n_predict=n_predict if n_predict
|
|
543
|
+
n_predict=n_predict if n_predict else self.default_n_predict if self.default_n_predict else ctx_size - nb_input_tokens,
|
|
532
544
|
stream=stream if stream is not None else self.default_stream,
|
|
533
545
|
temperature=temperature if temperature is not None else self.default_temperature,
|
|
534
546
|
top_k=top_k if top_k is not None else self.default_top_k,
|
|
@@ -3025,7 +3037,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3025
3037
|
|
|
3026
3038
|
if len(tokens) <= chunk_size_tokens:
|
|
3027
3039
|
if streaming_callback:
|
|
3028
|
-
streaming_callback("Text is short enough for a single
|
|
3040
|
+
streaming_callback("Text is short enough for a single pass.", MSG_TYPE.MSG_TYPE_STEP, {"progress": 0})
|
|
3029
3041
|
system_prompt = ("You are a content processor expert.\n"
|
|
3030
3042
|
"You perform tasks on the content as requested by the user.\n\n"
|
|
3031
3043
|
"--- Content ---\n"
|
|
@@ -1738,6 +1738,8 @@ class LollmsDiscussion:
|
|
|
1738
1738
|
"description": "JSON object as title of the discussion."
|
|
1739
1739
|
}
|
|
1740
1740
|
infos = self.lollmsClient.generate_structured_content(prompt = prompt, system_prompt=system_prompt, schema = title_generation_schema)
|
|
1741
|
+
if infos is None or "title" not in infos:
|
|
1742
|
+
raise ValueError("Title generation failed or returned invalid data.")
|
|
1741
1743
|
discussion_title = infos["title"]
|
|
1742
1744
|
new_metadata = (self.metadata or {}).copy()
|
|
1743
1745
|
new_metadata['title'] = discussion_title
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# File name: lollms_mcp_security.py
|
|
2
|
+
# Author: parisneo
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
This script defines a custom token verifier for MCP (Model Context Protocol) using an introspection endpoint.
|
|
6
|
+
The verifier queries the authorization server to check if a given token is valid. It is agnostic about how tokens are created.
|
|
7
|
+
|
|
8
|
+
Key components:
|
|
9
|
+
- MyTokenInfo class: Extends AccessToken and includes additional fields like user_id and username.
|
|
10
|
+
- IntrospectionTokenVerifier class: Implements the logic to verify tokens by making HTTP requests to an introspection endpoint.
|
|
11
|
+
- token_info_context: A context variable to store token information for easy access.
|
|
12
|
+
|
|
13
|
+
The script also includes an example of how to use these components within a FastMCP instance, setting up authentication and authorization settings.
|
|
14
|
+
|
|
15
|
+
Dependencies:
|
|
16
|
+
- mcp.server.auth.provider
|
|
17
|
+
- httpx
|
|
18
|
+
- os
|
|
19
|
+
- contextvars
|
|
20
|
+
|
|
21
|
+
Environment Variables:
|
|
22
|
+
- AUTHORIZATION_SERVER_URL: The URL of the authorization server. Default is 'http://localhost:9642'.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from mcp.server.auth.provider import AccessToken, TokenVerifier
|
|
26
|
+
import httpx
|
|
27
|
+
import os
|
|
28
|
+
from contextvars import ContextVar
|
|
29
|
+
|
|
30
|
+
AUTHORIZATION_SERVER_URL = os.environ.get("AUTHORIZATION_SERVER_URL", "http://localhost:9642")
|
|
31
|
+
|
|
32
|
+
class MyTokenInfo(AccessToken):
|
|
33
|
+
user_id: int | None = None
|
|
34
|
+
username: str | None = None
|
|
35
|
+
|
|
36
|
+
token_info_context: ContextVar[MyTokenInfo | None] = ContextVar("token_info_context", default=None)
|
|
37
|
+
|
|
38
|
+
# This is our set of valid API keys. In a real app, you'd check a database.
|
|
39
|
+
class IntrospectionTokenVerifier(TokenVerifier):
|
|
40
|
+
"""
|
|
41
|
+
This verifier asks the authorization server if a token is valid.
|
|
42
|
+
It is completely agnostic about how tokens are created.
|
|
43
|
+
"""
|
|
44
|
+
async def verify_token(self, token: str) -> AccessToken:
|
|
45
|
+
# Make a secure HTTP call to your /introspect endpoint
|
|
46
|
+
async with httpx.AsyncClient() as client:
|
|
47
|
+
try:
|
|
48
|
+
response = await client.post(
|
|
49
|
+
f"{AUTHORIZATION_SERVER_URL}/api/auth/introspect",
|
|
50
|
+
data={"token": token}
|
|
51
|
+
)
|
|
52
|
+
response.raise_for_status() # Raise an exception for 4xx/5xx errors
|
|
53
|
+
except httpx.RequestError as e:
|
|
54
|
+
print(f"ERROR: Could not connect to introspection endpoint: {e}")
|
|
55
|
+
return AccessToken(active=False, token="", client_id="", scopes=[])
|
|
56
|
+
|
|
57
|
+
# Create a TokenInfo object directly from the JSON response
|
|
58
|
+
token_info_dict = response.json()
|
|
59
|
+
token_info_dict["token"] = token
|
|
60
|
+
token_info_dict["client_id"] = str(token_info_dict.get("user_id"))
|
|
61
|
+
token_info_dict["scopes"] = []
|
|
62
|
+
token_info = MyTokenInfo(**token_info_dict)
|
|
63
|
+
token_info_context.set(token_info)
|
|
64
|
+
return MyTokenInfo(**token_info_dict)
|
|
65
|
+
|
|
66
|
+
# To recover the user information, just use token_info = token_info_context.get()
|
|
67
|
+
# Example use
|
|
68
|
+
# resource_server_url=f"http://localhost:{port}"
|
|
69
|
+
# mcp = FastMCP(
|
|
70
|
+
# name="MyMCPServer",
|
|
71
|
+
# host=host,
|
|
72
|
+
# port=port,
|
|
73
|
+
# log_level=log_level,
|
|
74
|
+
# # 1. This tells MCP to use our class for authentication.
|
|
75
|
+
# token_verifier=IntrospectionTokenVerifier(),
|
|
76
|
+
# # 2. This tells MCP to protect all tools by default.
|
|
77
|
+
# auth=AuthSettings(
|
|
78
|
+
# # The URL of the server that issues tokens
|
|
79
|
+
# issuer_url=AUTHORIZATION_SERVER_URL,
|
|
80
|
+
# # The URL of the MCP server itself
|
|
81
|
+
# resource_server_url=resource_server_url, # The port of the MCP server
|
|
82
|
+
# required_scopes=[] # Requires valid authentication
|
|
83
|
+
# )
|
|
84
|
+
# )
|
|
@@ -29,13 +29,14 @@ examples/mcp_examples/openai_mcp.py,sha256=7IEnPGPXZgYZyiES_VaUbQ6viQjenpcUxGiHE
|
|
|
29
29
|
examples/mcp_examples/run_remote_mcp_example_v2.py,sha256=bbNn93NO_lKcFzfIsdvJJijGx2ePFTYfknofqZxMuRM,14626
|
|
30
30
|
examples/mcp_examples/run_standard_mcp_example.py,sha256=GSZpaACPf3mDPsjA8esBQVUsIi7owI39ca5avsmvCxA,9419
|
|
31
31
|
examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
|
|
32
|
-
lollms_client/__init__.py,sha256=
|
|
32
|
+
lollms_client/__init__.py,sha256=5paEgQrICKx3_3wtk257FCKOwolkgmfFPJPwc9aYkWY,1147
|
|
33
33
|
lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
|
|
34
|
-
lollms_client/lollms_core.py,sha256=
|
|
35
|
-
lollms_client/lollms_discussion.py,sha256=
|
|
34
|
+
lollms_client/lollms_core.py,sha256=gDhpB62AluEmbVFvPm7vdnZgP2hGBymDLun57K1jrOM,177352
|
|
35
|
+
lollms_client/lollms_discussion.py,sha256=TT-AIKMdEuNNBjrWgMLcww8z6vIETO0J3cnoQgb9dhU,85448
|
|
36
36
|
lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
|
|
37
|
-
lollms_client/lollms_llm_binding.py,sha256=
|
|
37
|
+
lollms_client/lollms_llm_binding.py,sha256=3x5Y_RYZJmMDJkYZp1saLSOnwXszqSmedAEiijsjZlk,15535
|
|
38
38
|
lollms_client/lollms_mcp_binding.py,sha256=0rK9HQCBEGryNc8ApBmtOlhKE1Yfn7X7xIQssXxS2Zc,8933
|
|
39
|
+
lollms_client/lollms_mcp_security.py,sha256=FhVTDhSBjksGEZnopVnjFmEF5dv7D8bBTqoaj4BiF0E,3562
|
|
39
40
|
lollms_client/lollms_personality.py,sha256=O-9nqZhazcITOkxjT24ENTxTmIoZLgqIsQ9WtWs0Id0,8719
|
|
40
41
|
lollms_client/lollms_python_analyzer.py,sha256=7gf1fdYgXCOkPUkBAPNmr6S-66hMH4_KonOMsADASxc,10246
|
|
41
42
|
lollms_client/lollms_stt_binding.py,sha256=jAUhLouEhh2hmm1bK76ianfw_6B59EHfY3FmLv6DU-g,5111
|
|
@@ -53,15 +54,15 @@ lollms_client/llm_bindings/grok/__init__.py,sha256=5tIf3348RgAEaSp6FdG-LM9N8R7aR
|
|
|
53
54
|
lollms_client/llm_bindings/groq/__init__.py,sha256=zyWKM78qHwSt5g0Bb8Njj7Jy8CYuLMyplx2maOKFFpg,12218
|
|
54
55
|
lollms_client/llm_bindings/hugging_face_inference_api/__init__.py,sha256=PxgeRqT8dpa9GZoXwtSncy9AUgAN2cDKrvp_nbaWq0E,14027
|
|
55
56
|
lollms_client/llm_bindings/litellm/__init__.py,sha256=pNkwyRPeENvTM4CDh6Pj3kQfxHfhX2pvXhGJDjKjp30,12340
|
|
56
|
-
lollms_client/llm_bindings/llamacpp/__init__.py,sha256=
|
|
57
|
+
lollms_client/llm_bindings/llamacpp/__init__.py,sha256=uNqOoxFYnsgrYb-lVXQ0QrENWTJC5Np5NMTXfOAYoko,72800
|
|
57
58
|
lollms_client/llm_bindings/lollms/__init__.py,sha256=scGHEKzlGX5fw2XwefVicsf28GrwgN3wU5nl4EPJ_Sk,24424
|
|
58
59
|
lollms_client/llm_bindings/lollms_webui/__init__.py,sha256=Thoq3PJR2e03Y2Kd_FBb-DULJK0zT5-2ID1YIJLcPlw,17864
|
|
59
60
|
lollms_client/llm_bindings/mistral/__init__.py,sha256=624Gr462yBh52ttHFOapKgJOn8zZ1vZcTEcC3i4FYt8,12750
|
|
60
|
-
lollms_client/llm_bindings/ollama/__init__.py,sha256=
|
|
61
|
+
lollms_client/llm_bindings/ollama/__init__.py,sha256=dXKHIeQCS9pz5AS07GF1eWj3ieWiz3aFOtxOX7yojbs,41314
|
|
61
62
|
lollms_client/llm_bindings/open_router/__init__.py,sha256=v91BpNcuQCbbA6r82gbgMP8UYhSrJUMOf4UtOzEo18Q,13235
|
|
62
|
-
lollms_client/llm_bindings/openai/__init__.py,sha256=
|
|
63
|
+
lollms_client/llm_bindings/openai/__init__.py,sha256=Z0zNTfBgBGwkwArN375kBt4otrUTI_84pHgVuyuDy0c,26253
|
|
63
64
|
lollms_client/llm_bindings/openllm/__init__.py,sha256=xv2XDhJNCYe6NPnWBboDs24AQ1VJBOzsTuMcmuQ6xYY,29864
|
|
64
|
-
lollms_client/llm_bindings/pythonllamacpp/__init__.py,sha256=
|
|
65
|
+
lollms_client/llm_bindings/pythonllamacpp/__init__.py,sha256=VNa6NXe7bY44Oev8r6q5XlQbxqWU2nBV62SFlsPPH78,31810
|
|
65
66
|
lollms_client/llm_bindings/tensor_rt/__init__.py,sha256=nPaNhGRd-bsG0UlYwcEqjd_UagCMEf5VEbBUW-GWu6A,32203
|
|
66
67
|
lollms_client/llm_bindings/transformers/__init__.py,sha256=9LkqEC5bp1zHgyeGEcPQ3_uqvEAEf_B4p9DztcBaC5w,37211
|
|
67
68
|
lollms_client/llm_bindings/vllm/__init__.py,sha256=2NqeeqYWXNq1aNicdcAwN9DaoL4gq96GZ7hsKErfC6c,32187
|
|
@@ -92,9 +93,9 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
|
|
|
92
93
|
lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
|
|
93
94
|
lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
|
|
94
95
|
lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
-
lollms_client-0.
|
|
96
|
+
lollms_client-0.32.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
96
97
|
test/test_lollms_discussion.py,sha256=KxTsV1bPdNz8QqZd7tIof9kTWkeXLUtAMU08BQmoY6U,16829
|
|
97
|
-
lollms_client-0.
|
|
98
|
-
lollms_client-0.
|
|
99
|
-
lollms_client-0.
|
|
100
|
-
lollms_client-0.
|
|
98
|
+
lollms_client-0.32.1.dist-info/METADATA,sha256=mw11ol3fFJpA2_sxa4olRXLkfVDeMfR_WCd63aLZdXw,38717
|
|
99
|
+
lollms_client-0.32.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
100
|
+
lollms_client-0.32.1.dist-info/top_level.txt,sha256=1jIpjTnOSGEGtIW2rGAFM6tVRzgsDdMOiox_SmDH_zw,28
|
|
101
|
+
lollms_client-0.32.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|