abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +12 -0
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/architectures/detection.py +250 -4
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +583 -44
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +66 -1
- abstractcore/config/manager.py +111 -5
- abstractcore/core/session.py +105 -5
- abstractcore/events/__init__.py +1 -1
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +29 -2
- abstractcore/providers/base.py +279 -6
- abstractcore/providers/huggingface_provider.py +658 -27
- abstractcore/providers/lmstudio_provider.py +52 -2
- abstractcore/providers/mlx_provider.py +103 -4
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +44 -6
- abstractcore/providers/openai_provider.py +29 -2
- abstractcore/providers/registry.py +91 -19
- abstractcore/server/app.py +91 -81
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
- abstractcore-2.5.3.dist-info/RECORD +107 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
- abstractcore-2.5.0.dist-info/RECORD +0 -86
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,25 @@ import json
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
10
10
|
|
|
11
|
+
# Import config manager to respect offline-first settings
|
|
12
|
+
from ..config.manager import get_config_manager
|
|
13
|
+
|
|
14
|
+
# Get config instance and set offline environment variables if needed
|
|
15
|
+
_config = get_config_manager()
|
|
16
|
+
if _config.is_offline_first():
|
|
17
|
+
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
|
18
|
+
os.environ["HF_DATASETS_OFFLINE"] = "1"
|
|
19
|
+
os.environ["HF_HUB_OFFLINE"] = "1"
|
|
20
|
+
|
|
21
|
+
# Enable MPS fallback for Apple Silicon to handle unsupported operations
|
|
22
|
+
# This prevents "MPS: Unsupported Border padding mode" errors in vision models
|
|
23
|
+
try:
|
|
24
|
+
import torch
|
|
25
|
+
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
26
|
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
|
27
|
+
except ImportError:
|
|
28
|
+
pass # torch not available, skip MPS setup
|
|
29
|
+
|
|
11
30
|
try:
|
|
12
31
|
from pydantic import BaseModel
|
|
13
32
|
PYDANTIC_AVAILABLE = True
|
|
@@ -22,7 +41,7 @@ from ..events import EventType
|
|
|
22
41
|
|
|
23
42
|
# Try to import transformers (standard HuggingFace support)
|
|
24
43
|
try:
|
|
25
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
44
|
+
from transformers import AutoModelForCausalLM, AutoModel, AutoTokenizer, AutoProcessor, AutoModelForImageTextToText, pipeline
|
|
26
45
|
import torch
|
|
27
46
|
TRANSFORMERS_AVAILABLE = True
|
|
28
47
|
except ImportError:
|
|
@@ -35,16 +54,40 @@ try:
|
|
|
35
54
|
except ImportError:
|
|
36
55
|
LLAMACPP_AVAILABLE = False
|
|
37
56
|
|
|
57
|
+
# Try to import Outlines (native structured output for transformers models)
|
|
58
|
+
try:
|
|
59
|
+
import outlines
|
|
60
|
+
OUTLINES_AVAILABLE = True
|
|
61
|
+
except ImportError:
|
|
62
|
+
OUTLINES_AVAILABLE = False
|
|
63
|
+
|
|
38
64
|
# We no longer download models - cache-only approach
|
|
39
65
|
# huggingface_hub not required for basic operation
|
|
40
66
|
|
|
41
67
|
|
|
68
|
+
def _get_local_model_path(model_name: str) -> Optional[str]:
|
|
69
|
+
"""Get local cache path for a HuggingFace model if it exists."""
|
|
70
|
+
# Use centralized configuration for cache directory
|
|
71
|
+
config = _config
|
|
72
|
+
hf_cache_dir = Path(config.config.cache.huggingface_cache_dir).expanduser()
|
|
73
|
+
|
|
74
|
+
model_cache_name = f"models--{model_name.replace('/', '--')}"
|
|
75
|
+
model_cache_path = hf_cache_dir / "hub" / model_cache_name / "snapshots"
|
|
76
|
+
|
|
77
|
+
if model_cache_path.exists():
|
|
78
|
+
snapshot_dirs = [d for d in model_cache_path.iterdir() if d.is_dir()]
|
|
79
|
+
if snapshot_dirs:
|
|
80
|
+
return str(snapshot_dirs[0]) # Return first snapshot
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
42
84
|
class HuggingFaceProvider(BaseProvider):
|
|
43
85
|
"""HuggingFace provider with dual support for transformers and GGUF models"""
|
|
44
86
|
|
|
45
|
-
def __init__(self, model: str = "
|
|
87
|
+
def __init__(self, model: str = "unsloth/Qwen3-4B-Instruct-2507-GGUF",
|
|
46
88
|
device: Optional[str] = None,
|
|
47
89
|
n_gpu_layers: Optional[int] = None,
|
|
90
|
+
structured_output_method: str = "auto",
|
|
48
91
|
**kwargs):
|
|
49
92
|
|
|
50
93
|
# Handle legacy context_size parameter with deprecation warning
|
|
@@ -61,10 +104,18 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
61
104
|
kwargs["max_tokens"] = context_size
|
|
62
105
|
|
|
63
106
|
super().__init__(model, **kwargs)
|
|
107
|
+
self.provider = "huggingface"
|
|
64
108
|
|
|
65
109
|
# Handle timeout parameter for local models
|
|
66
110
|
self._handle_timeout_parameter(kwargs)
|
|
67
111
|
|
|
112
|
+
# Structured output method: "auto", "native_outlines", "prompted"
|
|
113
|
+
# auto: Use Outlines if available (for transformers), otherwise prompted (default)
|
|
114
|
+
# native_outlines: Force Outlines (error if unavailable)
|
|
115
|
+
# prompted: Always use prompted fallback (fastest for transformers, still 100% success)
|
|
116
|
+
# Note: GGUF models always use llama-cpp-python native support regardless of this setting
|
|
117
|
+
self.structured_output_method = structured_output_method
|
|
118
|
+
|
|
68
119
|
# Initialize tool handler
|
|
69
120
|
self.tool_handler = UniversalToolHandler(model)
|
|
70
121
|
|
|
@@ -72,9 +123,19 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
72
123
|
self.n_gpu_layers = n_gpu_layers
|
|
73
124
|
self.model_type = None # Will be "transformers" or "gguf"
|
|
74
125
|
self.device = device
|
|
126
|
+
|
|
127
|
+
# Store transformers-specific parameters
|
|
128
|
+
self.transformers_kwargs = {
|
|
129
|
+
k: v for k, v in kwargs.items()
|
|
130
|
+
if k in ['trust_remote_code', 'torch_dtype', 'device_map', 'load_in_8bit', 'load_in_4bit', 'attn_implementation']
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Store device preference for custom models
|
|
134
|
+
self.preferred_device = kwargs.get('device_map', 'auto')
|
|
75
135
|
|
|
76
136
|
# Model instances
|
|
77
137
|
self.tokenizer = None
|
|
138
|
+
self.processor = None # For vision models
|
|
78
139
|
self.model_instance = None
|
|
79
140
|
self.pipeline = None
|
|
80
141
|
self.llm = None # For GGUF models
|
|
@@ -111,6 +172,9 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
111
172
|
|
|
112
173
|
if hasattr(self, 'tokenizer') and self.tokenizer is not None:
|
|
113
174
|
self.tokenizer = None
|
|
175
|
+
|
|
176
|
+
if hasattr(self, 'processor') and self.processor is not None:
|
|
177
|
+
self.processor = None
|
|
114
178
|
|
|
115
179
|
if hasattr(self, 'model') and hasattr(self, 'model') and self.model is not None:
|
|
116
180
|
# For transformers models, clear the model
|
|
@@ -153,6 +217,26 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
153
217
|
|
|
154
218
|
return False
|
|
155
219
|
|
|
220
|
+
def _is_vision_model(self, model: str) -> bool:
|
|
221
|
+
"""Detect if the model is a vision model that requires special handling"""
|
|
222
|
+
model_lower = model.lower()
|
|
223
|
+
|
|
224
|
+
# Known vision models that require AutoModelForImageTextToText
|
|
225
|
+
vision_models = [
|
|
226
|
+
'glyph', # zai-org/Glyph
|
|
227
|
+
'glm-4.1v', # GLM-4.1V variants
|
|
228
|
+
'glm4v', # GLM4V architecture
|
|
229
|
+
'qwen-vl', # Qwen-VL models
|
|
230
|
+
'qwen2-vl', # Qwen2-VL models
|
|
231
|
+
'qwen2.5-vl', # Qwen2.5-VL models
|
|
232
|
+
'llava', # LLaVA models
|
|
233
|
+
'instructblip', # InstructBLIP models
|
|
234
|
+
'blip2', # BLIP2 models
|
|
235
|
+
'flamingo', # Flamingo models
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
return any(vision_keyword in model_lower for vision_keyword in vision_models)
|
|
239
|
+
|
|
156
240
|
def _setup_device_transformers(self):
|
|
157
241
|
"""Setup device for transformers models"""
|
|
158
242
|
if not TRANSFORMERS_AVAILABLE:
|
|
@@ -200,24 +284,65 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
200
284
|
def _load_transformers_model(self):
|
|
201
285
|
"""Load standard HuggingFace transformers model"""
|
|
202
286
|
try:
|
|
203
|
-
|
|
204
|
-
self.
|
|
287
|
+
# Check if this is a vision model that requires special handling
|
|
288
|
+
if self._is_vision_model(self.model):
|
|
289
|
+
return self._load_vision_model()
|
|
290
|
+
|
|
291
|
+
# Load tokenizer with transformers-specific parameters
|
|
292
|
+
tokenizer_kwargs = {k: v for k, v in self.transformers_kwargs.items()
|
|
293
|
+
if k in ['trust_remote_code']}
|
|
294
|
+
# Respect offline-first configuration
|
|
295
|
+
if _config.should_force_local_files_only():
|
|
296
|
+
tokenizer_kwargs['local_files_only'] = True
|
|
297
|
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model, **tokenizer_kwargs)
|
|
298
|
+
|
|
299
|
+
# Load model with all transformers-specific parameters
|
|
300
|
+
# Try AutoModelForCausalLM first, fall back to AutoModel for custom models
|
|
301
|
+
model_kwargs = self.transformers_kwargs.copy()
|
|
302
|
+
# Respect offline-first configuration
|
|
303
|
+
if _config.should_force_local_files_only():
|
|
304
|
+
model_kwargs['local_files_only'] = True
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
self.model_instance = AutoModelForCausalLM.from_pretrained(self.model, **model_kwargs)
|
|
308
|
+
except ValueError as e:
|
|
309
|
+
if "Unrecognized configuration class" in str(e) or "glm4v" in str(e).lower():
|
|
310
|
+
# Fall back to AutoModel for custom models like DeepSeek-OCR
|
|
311
|
+
self.model_instance = AutoModel.from_pretrained(self.model, **model_kwargs)
|
|
312
|
+
else:
|
|
313
|
+
raise
|
|
205
314
|
|
|
206
|
-
# Move to device
|
|
207
|
-
if self.device in ["cuda", "mps"]:
|
|
315
|
+
# Move to device (only if not using device_map)
|
|
316
|
+
if self.device in ["cuda", "mps"] and 'device_map' not in self.transformers_kwargs:
|
|
208
317
|
self.model_instance = self.model_instance.to(self.device)
|
|
209
318
|
|
|
210
|
-
# Create pipeline
|
|
319
|
+
# Create pipeline - handle custom models that don't support text-generation
|
|
211
320
|
device_arg = 0 if self.device == "cuda" else -1
|
|
212
321
|
if self.device == "mps":
|
|
213
322
|
device_arg = -1
|
|
214
323
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
324
|
+
try:
|
|
325
|
+
# Don't pass device argument if using device_map (accelerate)
|
|
326
|
+
if 'device_map' in self.transformers_kwargs:
|
|
327
|
+
self.pipeline = pipeline(
|
|
328
|
+
"text-generation",
|
|
329
|
+
model=self.model_instance,
|
|
330
|
+
tokenizer=self.tokenizer
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
self.pipeline = pipeline(
|
|
334
|
+
"text-generation",
|
|
335
|
+
model=self.model_instance,
|
|
336
|
+
tokenizer=self.tokenizer,
|
|
337
|
+
device=device_arg
|
|
338
|
+
)
|
|
339
|
+
except ValueError as e:
|
|
340
|
+
if "not supported for text-generation" in str(e) or "accelerate" in str(e):
|
|
341
|
+
# For custom models like DeepSeek-OCR, skip pipeline creation
|
|
342
|
+
# We'll handle generation directly through the model
|
|
343
|
+
self.pipeline = None
|
|
344
|
+
else:
|
|
345
|
+
raise
|
|
221
346
|
|
|
222
347
|
except Exception as e:
|
|
223
348
|
error_str = str(e).lower()
|
|
@@ -229,6 +354,96 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
229
354
|
else:
|
|
230
355
|
raise RuntimeError(f"Failed to load HuggingFace model {self.model}: {str(e)}")
|
|
231
356
|
|
|
357
|
+
def _load_vision_model(self):
|
|
358
|
+
"""Load vision model using AutoModelForImageTextToText and AutoProcessor"""
|
|
359
|
+
try:
|
|
360
|
+
# Suppress progress bars during model loading unless in debug mode
|
|
361
|
+
import os
|
|
362
|
+
from transformers.utils import logging as transformers_logging
|
|
363
|
+
|
|
364
|
+
if not self.debug:
|
|
365
|
+
# Disable transformers progress bars
|
|
366
|
+
os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
|
|
367
|
+
transformers_logging.set_verbosity_error()
|
|
368
|
+
# Disable tqdm progress bars
|
|
369
|
+
os.environ['DISABLE_TQDM'] = '1'
|
|
370
|
+
|
|
371
|
+
# Load processor for vision models (handles both text and images)
|
|
372
|
+
processor_kwargs = {k: v for k, v in self.transformers_kwargs.items()
|
|
373
|
+
if k in ['trust_remote_code']}
|
|
374
|
+
# Enable trust_remote_code for custom architectures like GLM4V
|
|
375
|
+
processor_kwargs['trust_remote_code'] = True
|
|
376
|
+
# Set use_fast=True to avoid the slow processor warning
|
|
377
|
+
processor_kwargs['use_fast'] = True
|
|
378
|
+
# Respect offline-first configuration
|
|
379
|
+
if _config.should_force_local_files_only():
|
|
380
|
+
processor_kwargs['local_files_only'] = True
|
|
381
|
+
|
|
382
|
+
# Use local cache path if offline mode is enabled and model is cached
|
|
383
|
+
model_path = self.model
|
|
384
|
+
if _config.should_force_local_files_only():
|
|
385
|
+
local_path = _get_local_model_path(self.model)
|
|
386
|
+
if local_path:
|
|
387
|
+
model_path = local_path
|
|
388
|
+
processor_kwargs.pop('local_files_only', None) # Remove since we're using local path
|
|
389
|
+
self.logger.debug(f"Loading processor from local cache: {local_path}")
|
|
390
|
+
|
|
391
|
+
self.processor = AutoProcessor.from_pretrained(model_path, **processor_kwargs)
|
|
392
|
+
|
|
393
|
+
# Load vision model using AutoModelForImageTextToText with trust_remote_code
|
|
394
|
+
vision_kwargs = self.transformers_kwargs.copy()
|
|
395
|
+
vision_kwargs['trust_remote_code'] = True
|
|
396
|
+
# Respect offline-first configuration
|
|
397
|
+
if _config.should_force_local_files_only():
|
|
398
|
+
vision_kwargs['local_files_only'] = True
|
|
399
|
+
|
|
400
|
+
# Use local cache path if offline mode is enabled and model is cached
|
|
401
|
+
model_path = self.model
|
|
402
|
+
if _config.should_force_local_files_only():
|
|
403
|
+
local_path = _get_local_model_path(self.model)
|
|
404
|
+
if local_path:
|
|
405
|
+
model_path = local_path
|
|
406
|
+
vision_kwargs.pop('local_files_only', None) # Remove since we're using local path
|
|
407
|
+
self.logger.debug(f"Loading model from local cache: {local_path}")
|
|
408
|
+
|
|
409
|
+
self.model_instance = AutoModelForImageTextToText.from_pretrained(model_path, **vision_kwargs)
|
|
410
|
+
|
|
411
|
+
# Restore logging levels if they were suppressed
|
|
412
|
+
if not self.debug:
|
|
413
|
+
# Restore transformers logging
|
|
414
|
+
transformers_logging.set_verbosity_warning()
|
|
415
|
+
# Remove tqdm suppression
|
|
416
|
+
if 'DISABLE_TQDM' in os.environ:
|
|
417
|
+
del os.environ['DISABLE_TQDM']
|
|
418
|
+
|
|
419
|
+
# Move to device (only if not using device_map)
|
|
420
|
+
if self.device in ["cuda", "mps"] and 'device_map' not in self.transformers_kwargs:
|
|
421
|
+
self.model_instance = self.model_instance.to(self.device)
|
|
422
|
+
|
|
423
|
+
# For vision models, we don't use the standard pipeline
|
|
424
|
+
self.pipeline = None
|
|
425
|
+
|
|
426
|
+
self.logger.info(f"Successfully loaded vision model {self.model} using AutoModelForImageTextToText")
|
|
427
|
+
|
|
428
|
+
except Exception as e:
|
|
429
|
+
error_str = str(e).lower()
|
|
430
|
+
|
|
431
|
+
# Check for transformers version issues
|
|
432
|
+
if 'glm4v' in error_str and 'does not recognize this architecture' in error_str:
|
|
433
|
+
import transformers
|
|
434
|
+
current_version = transformers.__version__
|
|
435
|
+
raise RuntimeError(
|
|
436
|
+
f"GLM4V architecture requires transformers>=4.57.1, but you have {current_version}. "
|
|
437
|
+
f"Please upgrade: pip install transformers>=4.57.1"
|
|
438
|
+
)
|
|
439
|
+
elif ('not found' in error_str or 'does not exist' in error_str or
|
|
440
|
+
'not a valid model identifier' in error_str):
|
|
441
|
+
available_models = self.list_available_models()
|
|
442
|
+
error_message = format_model_error("HuggingFace", self.model, available_models)
|
|
443
|
+
raise ModelNotFoundError(error_message)
|
|
444
|
+
else:
|
|
445
|
+
raise RuntimeError(f"Failed to load HuggingFace vision model {self.model}: {str(e)}")
|
|
446
|
+
|
|
232
447
|
def _find_gguf_in_cache(self, model_name: str) -> Optional[str]:
|
|
233
448
|
"""Find GGUF model in HuggingFace cache (cache-only, no downloading)"""
|
|
234
449
|
|
|
@@ -481,9 +696,9 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
481
696
|
"""Generate response using appropriate backend"""
|
|
482
697
|
|
|
483
698
|
if self.model_type == "gguf":
|
|
484
|
-
return self._generate_gguf(prompt, messages, system_prompt, tools, media, stream, **kwargs)
|
|
699
|
+
return self._generate_gguf(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
485
700
|
else:
|
|
486
|
-
return self._generate_transformers(prompt, messages, system_prompt, tools, media, stream, **kwargs)
|
|
701
|
+
return self._generate_transformers(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
487
702
|
|
|
488
703
|
def _generate_transformers(self,
|
|
489
704
|
prompt: str,
|
|
@@ -492,15 +707,83 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
492
707
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
493
708
|
media: Optional[List['MediaContent']] = None,
|
|
494
709
|
stream: bool = False,
|
|
710
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
495
711
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
496
|
-
"""Generate using transformers backend
|
|
712
|
+
"""Generate using transformers backend with optional Outlines native structured output"""
|
|
497
713
|
|
|
498
714
|
if not self.pipeline:
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
)
|
|
715
|
+
# Handle vision models that use processor instead of pipeline
|
|
716
|
+
if self.processor and hasattr(self.model_instance, 'generate'):
|
|
717
|
+
return self._generate_vision_model(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
718
|
+
# Handle custom models like DeepSeek-OCR that don't support standard pipelines
|
|
719
|
+
elif hasattr(self.model_instance, 'infer'):
|
|
720
|
+
return self._generate_custom_model(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
721
|
+
else:
|
|
722
|
+
return GenerateResponse(
|
|
723
|
+
content="Error: Transformers model not loaded or doesn't support generation",
|
|
724
|
+
model=self.model,
|
|
725
|
+
finish_reason="error"
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
# Native structured output via Outlines (if configured and available)
|
|
729
|
+
should_use_outlines = (
|
|
730
|
+
response_model and
|
|
731
|
+
PYDANTIC_AVAILABLE and
|
|
732
|
+
not stream and
|
|
733
|
+
self.structured_output_method != "prompted" # Skip if explicitly prompted
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
if should_use_outlines:
|
|
737
|
+
# Check if Outlines is required but unavailable
|
|
738
|
+
if self.structured_output_method == "native_outlines" and not OUTLINES_AVAILABLE:
|
|
739
|
+
return GenerateResponse(
|
|
740
|
+
content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install abstractcore[huggingface]",
|
|
741
|
+
model=self.model,
|
|
742
|
+
finish_reason="error"
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
# Try Outlines if available (auto or native_outlines mode)
|
|
746
|
+
if OUTLINES_AVAILABLE:
|
|
747
|
+
try:
|
|
748
|
+
# Cache Outlines model wrapper to avoid re-initialization
|
|
749
|
+
if not hasattr(self, '_outlines_model') or self._outlines_model is None:
|
|
750
|
+
self.logger.debug("Creating Outlines model wrapper for native structured output")
|
|
751
|
+
self._outlines_model = outlines.from_transformers(
|
|
752
|
+
self.model_instance,
|
|
753
|
+
self.tokenizer
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
# Build input text (same as normal generation)
|
|
757
|
+
input_text = self._build_input_text_transformers(prompt, messages, system_prompt, tools)
|
|
758
|
+
|
|
759
|
+
# Create constrained generator with JSON schema
|
|
760
|
+
self.logger.debug(f"Using Outlines native structured output for {response_model.__name__}")
|
|
761
|
+
generator = self._outlines_model(
|
|
762
|
+
input_text,
|
|
763
|
+
outlines.json_schema(response_model),
|
|
764
|
+
max_tokens=kwargs.get("max_tokens", self.max_tokens or 512)
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
# Validate and return
|
|
768
|
+
validated_obj = response_model.model_validate(generator)
|
|
769
|
+
|
|
770
|
+
return GenerateResponse(
|
|
771
|
+
content=validated_obj.model_dump_json(),
|
|
772
|
+
model=self.model,
|
|
773
|
+
finish_reason="stop",
|
|
774
|
+
validated_object=validated_obj
|
|
775
|
+
)
|
|
776
|
+
except Exception as e:
|
|
777
|
+
# If native_outlines was explicitly requested, don't fall back
|
|
778
|
+
if self.structured_output_method == "native_outlines":
|
|
779
|
+
return GenerateResponse(
|
|
780
|
+
content=f"Error: Outlines native structured output failed: {str(e)}",
|
|
781
|
+
model=self.model,
|
|
782
|
+
finish_reason="error"
|
|
783
|
+
)
|
|
784
|
+
# Otherwise fall back to prompted approach
|
|
785
|
+
self.logger.debug(f"Outlines generation failed, falling back to prompted: {e}")
|
|
786
|
+
# Continue with normal generation below
|
|
504
787
|
|
|
505
788
|
# Build input text with tool and media support
|
|
506
789
|
# Handle media content first if present
|
|
@@ -561,6 +844,311 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
561
844
|
finish_reason="error"
|
|
562
845
|
)
|
|
563
846
|
|
|
847
|
+
def _generate_custom_model(self,
|
|
848
|
+
prompt: str,
|
|
849
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
850
|
+
system_prompt: Optional[str] = None,
|
|
851
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
852
|
+
media: Optional[List['MediaContent']] = None,
|
|
853
|
+
stream: bool = False,
|
|
854
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
855
|
+
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
856
|
+
"""Generate using custom model methods (e.g., DeepSeek-OCR's infer method)"""
|
|
857
|
+
|
|
858
|
+
import time
|
|
859
|
+
import tempfile
|
|
860
|
+
import os
|
|
861
|
+
start_time = time.time()
|
|
862
|
+
|
|
863
|
+
try:
|
|
864
|
+
# Handle media content for vision models like DeepSeek-OCR
|
|
865
|
+
if media and len(media) > 0:
|
|
866
|
+
# Use the first image for OCR
|
|
867
|
+
media_item = media[0]
|
|
868
|
+
|
|
869
|
+
# DeepSeek-OCR expects image file path
|
|
870
|
+
if hasattr(media_item, 'file_path') and media_item.file_path:
|
|
871
|
+
image_file = str(media_item.file_path)
|
|
872
|
+
else:
|
|
873
|
+
# If no file path, save media content to temp file
|
|
874
|
+
from PIL import Image
|
|
875
|
+
|
|
876
|
+
if hasattr(media_item, 'content') and media_item.content:
|
|
877
|
+
# Handle base64 content
|
|
878
|
+
if media_item.content_format == 'BASE64':
|
|
879
|
+
import base64
|
|
880
|
+
image_data = base64.b64decode(media_item.content)
|
|
881
|
+
temp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
|
882
|
+
temp_file.write(image_data)
|
|
883
|
+
temp_file.close()
|
|
884
|
+
image_file = temp_file.name
|
|
885
|
+
else:
|
|
886
|
+
return GenerateResponse(
|
|
887
|
+
content="Error: Unsupported media format for DeepSeek-OCR",
|
|
888
|
+
model=self.model,
|
|
889
|
+
finish_reason="error"
|
|
890
|
+
)
|
|
891
|
+
else:
|
|
892
|
+
return GenerateResponse(
|
|
893
|
+
content="Error: No valid image content found",
|
|
894
|
+
model=self.model,
|
|
895
|
+
finish_reason="error"
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# Use DeepSeek-OCR's infer method
|
|
899
|
+
try:
|
|
900
|
+
# Create temporary output directory for DeepSeek-OCR
|
|
901
|
+
temp_output_dir = tempfile.mkdtemp()
|
|
902
|
+
|
|
903
|
+
# Patch DeepSeek-OCR for MPS/CPU compatibility if needed
|
|
904
|
+
if self.device == "mps" or (self.device is None and hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()):
|
|
905
|
+
self._patch_deepseek_for_mps()
|
|
906
|
+
|
|
907
|
+
result = self.model_instance.infer(
|
|
908
|
+
self.tokenizer,
|
|
909
|
+
prompt=prompt,
|
|
910
|
+
image_file=image_file,
|
|
911
|
+
output_path=temp_output_dir, # DeepSeek-OCR requires output path
|
|
912
|
+
base_size=1024,
|
|
913
|
+
image_size=640,
|
|
914
|
+
crop_mode=True,
|
|
915
|
+
save_results=False,
|
|
916
|
+
test_compress=False
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
# Clean up temp output directory
|
|
920
|
+
import shutil
|
|
921
|
+
shutil.rmtree(temp_output_dir, ignore_errors=True)
|
|
922
|
+
|
|
923
|
+
# Clean up temp file if created
|
|
924
|
+
if 'temp_file' in locals() and os.path.exists(image_file):
|
|
925
|
+
os.unlink(image_file)
|
|
926
|
+
|
|
927
|
+
# Calculate generation time
|
|
928
|
+
gen_time = (time.time() - start_time) * 1000
|
|
929
|
+
|
|
930
|
+
return GenerateResponse(
|
|
931
|
+
content=result if isinstance(result, str) else str(result),
|
|
932
|
+
model=self.model,
|
|
933
|
+
finish_reason="stop",
|
|
934
|
+
input_tokens=len(prompt.split()), # Rough estimate
|
|
935
|
+
output_tokens=len(str(result).split()) if result else 0,
|
|
936
|
+
gen_time=gen_time
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
except Exception as e:
|
|
940
|
+
return GenerateResponse(
|
|
941
|
+
content=f"Error during DeepSeek-OCR inference: {str(e)}",
|
|
942
|
+
model=self.model,
|
|
943
|
+
finish_reason="error"
|
|
944
|
+
)
|
|
945
|
+
else:
|
|
946
|
+
return GenerateResponse(
|
|
947
|
+
content="Error: DeepSeek-OCR requires image input",
|
|
948
|
+
model=self.model,
|
|
949
|
+
finish_reason="error"
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
except Exception as e:
|
|
953
|
+
return GenerateResponse(
|
|
954
|
+
content=f"Error in custom model generation: {str(e)}",
|
|
955
|
+
model=self.model,
|
|
956
|
+
finish_reason="error"
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
def _generate_vision_model(self,
|
|
960
|
+
prompt: str,
|
|
961
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
962
|
+
system_prompt: Optional[str] = None,
|
|
963
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
964
|
+
media: Optional[List['MediaContent']] = None,
|
|
965
|
+
stream: bool = False,
|
|
966
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
967
|
+
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
968
|
+
"""Generate using vision model (Glyph, GLM-4.1V, etc.)"""
|
|
969
|
+
|
|
970
|
+
import time
|
|
971
|
+
start_time = time.time()
|
|
972
|
+
|
|
973
|
+
# Import torch safely
|
|
974
|
+
try:
|
|
975
|
+
import torch
|
|
976
|
+
except ImportError:
|
|
977
|
+
return GenerateResponse(
|
|
978
|
+
content="Error: PyTorch not available for vision model generation",
|
|
979
|
+
model=self.model,
|
|
980
|
+
finish_reason="error",
|
|
981
|
+
gen_time=0.0
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
try:
|
|
985
|
+
# Build messages for vision model
|
|
986
|
+
chat_messages = []
|
|
987
|
+
|
|
988
|
+
if system_prompt:
|
|
989
|
+
chat_messages.append({"role": "system", "content": system_prompt})
|
|
990
|
+
|
|
991
|
+
if messages:
|
|
992
|
+
chat_messages.extend(messages)
|
|
993
|
+
|
|
994
|
+
# Build user message with media content
|
|
995
|
+
user_content = []
|
|
996
|
+
|
|
997
|
+
# Add text content
|
|
998
|
+
if prompt:
|
|
999
|
+
user_content.append({"type": "text", "text": prompt})
|
|
1000
|
+
|
|
1001
|
+
# Add media content (images)
|
|
1002
|
+
if media:
|
|
1003
|
+
for media_item in media:
|
|
1004
|
+
if hasattr(media_item, 'file_path') and media_item.file_path:
|
|
1005
|
+
# Use file path directly
|
|
1006
|
+
user_content.append({
|
|
1007
|
+
"type": "image",
|
|
1008
|
+
"url": str(media_item.file_path)
|
|
1009
|
+
})
|
|
1010
|
+
elif hasattr(media_item, 'content') and media_item.content:
|
|
1011
|
+
# Handle base64 content
|
|
1012
|
+
if media_item.content_format == 'BASE64':
|
|
1013
|
+
# Create data URL for base64 content
|
|
1014
|
+
mime_type = getattr(media_item, 'mime_type', 'image/png')
|
|
1015
|
+
data_url = f"data:{mime_type};base64,{media_item.content}"
|
|
1016
|
+
user_content.append({
|
|
1017
|
+
"type": "image",
|
|
1018
|
+
"url": data_url
|
|
1019
|
+
})
|
|
1020
|
+
|
|
1021
|
+
# Add user message
|
|
1022
|
+
chat_messages.append({
|
|
1023
|
+
"role": "user",
|
|
1024
|
+
"content": user_content
|
|
1025
|
+
})
|
|
1026
|
+
|
|
1027
|
+
# Process messages using the processor
|
|
1028
|
+
inputs = self.processor.apply_chat_template(
|
|
1029
|
+
chat_messages,
|
|
1030
|
+
tokenize=True,
|
|
1031
|
+
add_generation_prompt=True,
|
|
1032
|
+
return_dict=True,
|
|
1033
|
+
return_tensors="pt"
|
|
1034
|
+
).to(self.model_instance.device)
|
|
1035
|
+
|
|
1036
|
+
# Generation parameters
|
|
1037
|
+
generation_kwargs = {
|
|
1038
|
+
"max_new_tokens": kwargs.get("max_tokens", self.max_output_tokens or 512),
|
|
1039
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
1040
|
+
"do_sample": True,
|
|
1041
|
+
"pad_token_id": self.processor.tokenizer.eos_token_id,
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
# Add seed if provided
|
|
1045
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
1046
|
+
if seed_value is not None:
|
|
1047
|
+
torch.manual_seed(seed_value)
|
|
1048
|
+
if torch.cuda.is_available():
|
|
1049
|
+
torch.cuda.manual_seed_all(seed_value)
|
|
1050
|
+
|
|
1051
|
+
# Generate response
|
|
1052
|
+
# For Apple Silicon, move inputs to CPU if MPS causes issues
|
|
1053
|
+
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
1054
|
+
try:
|
|
1055
|
+
generated_ids = self.model_instance.generate(**inputs, **generation_kwargs)
|
|
1056
|
+
except RuntimeError as e:
|
|
1057
|
+
if "MPS: Unsupported Border padding mode" in str(e):
|
|
1058
|
+
self.logger.warning("MPS Border padding mode error detected, falling back to CPU")
|
|
1059
|
+
# Move model and inputs to CPU
|
|
1060
|
+
cpu_model = self.model_instance.to('cpu')
|
|
1061
|
+
cpu_inputs = {k: v.to('cpu') if hasattr(v, 'to') else v for k, v in inputs.items()}
|
|
1062
|
+
generated_ids = cpu_model.generate(**cpu_inputs, **generation_kwargs)
|
|
1063
|
+
# Move model back to original device
|
|
1064
|
+
self.model_instance.to(self.model_instance.device)
|
|
1065
|
+
else:
|
|
1066
|
+
raise e
|
|
1067
|
+
else:
|
|
1068
|
+
generated_ids = self.model_instance.generate(**inputs, **generation_kwargs)
|
|
1069
|
+
|
|
1070
|
+
# Decode response
|
|
1071
|
+
output_text = self.processor.decode(
|
|
1072
|
+
generated_ids[0][inputs["input_ids"].shape[1]:],
|
|
1073
|
+
skip_special_tokens=True
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
# Calculate generation time
|
|
1077
|
+
gen_time = (time.time() - start_time) * 1000
|
|
1078
|
+
|
|
1079
|
+
# Calculate token usage
|
|
1080
|
+
input_tokens = inputs["input_ids"].shape[1]
|
|
1081
|
+
output_tokens = len(generated_ids[0]) - input_tokens
|
|
1082
|
+
|
|
1083
|
+
return GenerateResponse(
|
|
1084
|
+
content=output_text.strip(),
|
|
1085
|
+
model=self.model,
|
|
1086
|
+
finish_reason="stop",
|
|
1087
|
+
usage={
|
|
1088
|
+
"input_tokens": input_tokens,
|
|
1089
|
+
"output_tokens": output_tokens,
|
|
1090
|
+
"total_tokens": input_tokens + output_tokens,
|
|
1091
|
+
"prompt_tokens": input_tokens,
|
|
1092
|
+
"completion_tokens": output_tokens
|
|
1093
|
+
},
|
|
1094
|
+
gen_time=gen_time
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
except Exception as e:
|
|
1098
|
+
gen_time = (time.time() - start_time) * 1000 if 'start_time' in locals() else 0.0
|
|
1099
|
+
return GenerateResponse(
|
|
1100
|
+
content=f"Error in vision model generation: {str(e)}",
|
|
1101
|
+
model=self.model,
|
|
1102
|
+
finish_reason="error",
|
|
1103
|
+
gen_time=gen_time
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
def _patch_deepseek_for_mps(self):
|
|
1107
|
+
"""Patch DeepSeek-OCR model to work with MPS instead of CUDA"""
|
|
1108
|
+
import types
|
|
1109
|
+
|
|
1110
|
+
def patched_infer(self, tokenizer, prompt='', image_file='', output_path='', base_size=1024, image_size=640, crop_mode=True, test_compress=False, save_results=False, eval_mode=False):
|
|
1111
|
+
"""Patched infer method that uses MPS instead of CUDA"""
|
|
1112
|
+
import torch
|
|
1113
|
+
|
|
1114
|
+
# Determine the best available device
|
|
1115
|
+
if torch.backends.mps.is_available():
|
|
1116
|
+
device = torch.device('mps')
|
|
1117
|
+
elif torch.cuda.is_available():
|
|
1118
|
+
device = torch.device('cuda')
|
|
1119
|
+
else:
|
|
1120
|
+
device = torch.device('cpu')
|
|
1121
|
+
|
|
1122
|
+
# Call the original infer method but patch tensor.cuda() calls
|
|
1123
|
+
original_cuda = torch.Tensor.cuda
|
|
1124
|
+
|
|
1125
|
+
def patched_cuda(tensor, device=None, non_blocking=False, **kwargs):
|
|
1126
|
+
"""Redirect .cuda() calls to the appropriate device"""
|
|
1127
|
+
if device == 'mps' or (device is None and torch.backends.mps.is_available()):
|
|
1128
|
+
return tensor.to('mps', non_blocking=non_blocking)
|
|
1129
|
+
elif torch.cuda.is_available():
|
|
1130
|
+
return original_cuda(tensor, device, non_blocking, **kwargs)
|
|
1131
|
+
else:
|
|
1132
|
+
return tensor.to('cpu', non_blocking=non_blocking)
|
|
1133
|
+
|
|
1134
|
+
# Temporarily patch the cuda method
|
|
1135
|
+
torch.Tensor.cuda = patched_cuda
|
|
1136
|
+
|
|
1137
|
+
try:
|
|
1138
|
+
# Move model to the appropriate device first
|
|
1139
|
+
self.to(device)
|
|
1140
|
+
|
|
1141
|
+
# Call original infer with device patching
|
|
1142
|
+
return self._original_infer(tokenizer, prompt, image_file, output_path, base_size, image_size, crop_mode, test_compress, save_results, eval_mode)
|
|
1143
|
+
finally:
|
|
1144
|
+
# Restore original cuda method
|
|
1145
|
+
torch.Tensor.cuda = original_cuda
|
|
1146
|
+
|
|
1147
|
+
# Only patch if not already patched
|
|
1148
|
+
if not hasattr(self.model_instance, '_original_infer'):
|
|
1149
|
+
self.model_instance._original_infer = self.model_instance.infer
|
|
1150
|
+
self.model_instance.infer = types.MethodType(patched_infer, self.model_instance)
|
|
1151
|
+
|
|
564
1152
|
def _generate_gguf(self,
|
|
565
1153
|
prompt: str,
|
|
566
1154
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
@@ -568,6 +1156,7 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
568
1156
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
569
1157
|
media: Optional[List['MediaContent']] = None,
|
|
570
1158
|
stream: bool = False,
|
|
1159
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
571
1160
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
572
1161
|
"""Generate using GGUF backend with llama-cpp-python"""
|
|
573
1162
|
|
|
@@ -663,6 +1252,19 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
663
1252
|
if seed_value is not None:
|
|
664
1253
|
generation_kwargs["seed"] = seed_value
|
|
665
1254
|
|
|
1255
|
+
# Add native structured output support (llama-cpp-python format)
|
|
1256
|
+
# llama-cpp-python supports native structured outputs using the response_format parameter
|
|
1257
|
+
# This provides server-side guaranteed schema compliance
|
|
1258
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
1259
|
+
json_schema = response_model.model_json_schema()
|
|
1260
|
+
generation_kwargs["response_format"] = {
|
|
1261
|
+
"type": "json_schema",
|
|
1262
|
+
"json_schema": {
|
|
1263
|
+
"name": response_model.__name__,
|
|
1264
|
+
"schema": json_schema
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
|
|
666
1268
|
# Handle tools - both native and prompted support
|
|
667
1269
|
has_native_tools = False
|
|
668
1270
|
if tools:
|
|
@@ -858,10 +1460,13 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
858
1460
|
try:
|
|
859
1461
|
# Set seed for deterministic generation if provided
|
|
860
1462
|
if seed is not None:
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
torch.cuda.
|
|
1463
|
+
try:
|
|
1464
|
+
import torch
|
|
1465
|
+
torch.manual_seed(seed)
|
|
1466
|
+
if torch.cuda.is_available():
|
|
1467
|
+
torch.cuda.manual_seed_all(seed)
|
|
1468
|
+
except ImportError:
|
|
1469
|
+
pass # Skip seeding if torch not available
|
|
865
1470
|
|
|
866
1471
|
# Track generation time
|
|
867
1472
|
start_time = time.time()
|
|
@@ -1147,8 +1752,20 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1147
1752
|
|
|
1148
1753
|
@classmethod
|
|
1149
1754
|
def list_available_models(cls, **kwargs) -> List[str]:
|
|
1150
|
-
"""
|
|
1755
|
+
"""
|
|
1756
|
+
List available HuggingFace models from local cache (excluding MLX models).
|
|
1757
|
+
|
|
1758
|
+
Args:
|
|
1759
|
+
**kwargs: Optional parameters including:
|
|
1760
|
+
- input_capabilities: List of ModelInputCapability enums to filter by input capability
|
|
1761
|
+
- output_capabilities: List of ModelOutputCapability enums to filter by output capability
|
|
1762
|
+
|
|
1763
|
+
Returns:
|
|
1764
|
+
List of model names, optionally filtered by capabilities
|
|
1765
|
+
"""
|
|
1151
1766
|
try:
|
|
1767
|
+
from .model_capabilities import filter_models_by_capabilities
|
|
1768
|
+
|
|
1152
1769
|
hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
|
|
1153
1770
|
if not hf_cache.exists():
|
|
1154
1771
|
return []
|
|
@@ -1164,7 +1781,21 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1164
1781
|
if "mlx" not in model_name.lower():
|
|
1165
1782
|
models.append(model_name)
|
|
1166
1783
|
|
|
1167
|
-
|
|
1784
|
+
models = sorted(models)
|
|
1785
|
+
|
|
1786
|
+
# Apply new capability filtering if provided
|
|
1787
|
+
input_capabilities = kwargs.get('input_capabilities')
|
|
1788
|
+
output_capabilities = kwargs.get('output_capabilities')
|
|
1789
|
+
|
|
1790
|
+
if input_capabilities or output_capabilities:
|
|
1791
|
+
models = filter_models_by_capabilities(
|
|
1792
|
+
models,
|
|
1793
|
+
input_capabilities=input_capabilities,
|
|
1794
|
+
output_capabilities=output_capabilities
|
|
1795
|
+
)
|
|
1796
|
+
|
|
1797
|
+
|
|
1798
|
+
return models
|
|
1168
1799
|
|
|
1169
1800
|
except Exception:
|
|
1170
1801
|
return []
|