abstractcore 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +12 -0
- abstractcore/architectures/detection.py +250 -4
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +533 -10
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +64 -0
- abstractcore/config/manager.py +100 -5
- abstractcore/core/session.py +61 -6
- abstractcore/events/__init__.py +1 -1
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +28 -2
- abstractcore/providers/base.py +278 -6
- abstractcore/providers/huggingface_provider.py +563 -23
- abstractcore/providers/lmstudio_provider.py +38 -2
- abstractcore/providers/mlx_provider.py +27 -2
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +38 -4
- abstractcore/providers/openai_provider.py +28 -2
- abstractcore/providers/registry.py +85 -13
- abstractcore/server/app.py +91 -81
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/METADATA +107 -6
- {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/RECORD +50 -33
- {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,25 @@ import json
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
10
10
|
|
|
11
|
+
# Import config manager to respect offline-first settings
|
|
12
|
+
from ..config.manager import get_config_manager
|
|
13
|
+
|
|
14
|
+
# Get config instance and set offline environment variables if needed
|
|
15
|
+
_config = get_config_manager()
|
|
16
|
+
if _config.is_offline_first():
|
|
17
|
+
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
|
18
|
+
os.environ["HF_DATASETS_OFFLINE"] = "1"
|
|
19
|
+
os.environ["HF_HUB_OFFLINE"] = "1"
|
|
20
|
+
|
|
21
|
+
# Enable MPS fallback for Apple Silicon to handle unsupported operations
|
|
22
|
+
# This prevents "MPS: Unsupported Border padding mode" errors in vision models
|
|
23
|
+
try:
|
|
24
|
+
import torch
|
|
25
|
+
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
26
|
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
|
27
|
+
except ImportError:
|
|
28
|
+
pass # torch not available, skip MPS setup
|
|
29
|
+
|
|
11
30
|
try:
|
|
12
31
|
from pydantic import BaseModel
|
|
13
32
|
PYDANTIC_AVAILABLE = True
|
|
@@ -22,7 +41,7 @@ from ..events import EventType
|
|
|
22
41
|
|
|
23
42
|
# Try to import transformers (standard HuggingFace support)
|
|
24
43
|
try:
|
|
25
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
44
|
+
from transformers import AutoModelForCausalLM, AutoModel, AutoTokenizer, AutoProcessor, AutoModelForImageTextToText, pipeline
|
|
26
45
|
import torch
|
|
27
46
|
TRANSFORMERS_AVAILABLE = True
|
|
28
47
|
except ImportError:
|
|
@@ -46,6 +65,22 @@ except ImportError:
|
|
|
46
65
|
# huggingface_hub not required for basic operation
|
|
47
66
|
|
|
48
67
|
|
|
68
|
+
def _get_local_model_path(model_name: str) -> Optional[str]:
|
|
69
|
+
"""Get local cache path for a HuggingFace model if it exists."""
|
|
70
|
+
# Use centralized configuration for cache directory
|
|
71
|
+
config = _config
|
|
72
|
+
hf_cache_dir = Path(config.config.cache.huggingface_cache_dir).expanduser()
|
|
73
|
+
|
|
74
|
+
model_cache_name = f"models--{model_name.replace('/', '--')}"
|
|
75
|
+
model_cache_path = hf_cache_dir / "hub" / model_cache_name / "snapshots"
|
|
76
|
+
|
|
77
|
+
if model_cache_path.exists():
|
|
78
|
+
snapshot_dirs = [d for d in model_cache_path.iterdir() if d.is_dir()]
|
|
79
|
+
if snapshot_dirs:
|
|
80
|
+
return str(snapshot_dirs[0]) # Return first snapshot
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
49
84
|
class HuggingFaceProvider(BaseProvider):
|
|
50
85
|
"""HuggingFace provider with dual support for transformers and GGUF models"""
|
|
51
86
|
|
|
@@ -88,9 +123,19 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
88
123
|
self.n_gpu_layers = n_gpu_layers
|
|
89
124
|
self.model_type = None # Will be "transformers" or "gguf"
|
|
90
125
|
self.device = device
|
|
126
|
+
|
|
127
|
+
# Store transformers-specific parameters
|
|
128
|
+
self.transformers_kwargs = {
|
|
129
|
+
k: v for k, v in kwargs.items()
|
|
130
|
+
if k in ['trust_remote_code', 'torch_dtype', 'device_map', 'load_in_8bit', 'load_in_4bit', 'attn_implementation']
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Store device preference for custom models
|
|
134
|
+
self.preferred_device = kwargs.get('device_map', 'auto')
|
|
91
135
|
|
|
92
136
|
# Model instances
|
|
93
137
|
self.tokenizer = None
|
|
138
|
+
self.processor = None # For vision models
|
|
94
139
|
self.model_instance = None
|
|
95
140
|
self.pipeline = None
|
|
96
141
|
self.llm = None # For GGUF models
|
|
@@ -127,6 +172,9 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
127
172
|
|
|
128
173
|
if hasattr(self, 'tokenizer') and self.tokenizer is not None:
|
|
129
174
|
self.tokenizer = None
|
|
175
|
+
|
|
176
|
+
if hasattr(self, 'processor') and self.processor is not None:
|
|
177
|
+
self.processor = None
|
|
130
178
|
|
|
131
179
|
if hasattr(self, 'model') and hasattr(self, 'model') and self.model is not None:
|
|
132
180
|
# For transformers models, clear the model
|
|
@@ -169,6 +217,26 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
169
217
|
|
|
170
218
|
return False
|
|
171
219
|
|
|
220
|
+
def _is_vision_model(self, model: str) -> bool:
|
|
221
|
+
"""Detect if the model is a vision model that requires special handling"""
|
|
222
|
+
model_lower = model.lower()
|
|
223
|
+
|
|
224
|
+
# Known vision models that require AutoModelForImageTextToText
|
|
225
|
+
vision_models = [
|
|
226
|
+
'glyph', # zai-org/Glyph
|
|
227
|
+
'glm-4.1v', # GLM-4.1V variants
|
|
228
|
+
'glm4v', # GLM4V architecture
|
|
229
|
+
'qwen-vl', # Qwen-VL models
|
|
230
|
+
'qwen2-vl', # Qwen2-VL models
|
|
231
|
+
'qwen2.5-vl', # Qwen2.5-VL models
|
|
232
|
+
'llava', # LLaVA models
|
|
233
|
+
'instructblip', # InstructBLIP models
|
|
234
|
+
'blip2', # BLIP2 models
|
|
235
|
+
'flamingo', # Flamingo models
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
return any(vision_keyword in model_lower for vision_keyword in vision_models)
|
|
239
|
+
|
|
172
240
|
def _setup_device_transformers(self):
|
|
173
241
|
"""Setup device for transformers models"""
|
|
174
242
|
if not TRANSFORMERS_AVAILABLE:
|
|
@@ -216,24 +284,65 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
216
284
|
def _load_transformers_model(self):
|
|
217
285
|
"""Load standard HuggingFace transformers model"""
|
|
218
286
|
try:
|
|
219
|
-
|
|
220
|
-
self.
|
|
287
|
+
# Check if this is a vision model that requires special handling
|
|
288
|
+
if self._is_vision_model(self.model):
|
|
289
|
+
return self._load_vision_model()
|
|
290
|
+
|
|
291
|
+
# Load tokenizer with transformers-specific parameters
|
|
292
|
+
tokenizer_kwargs = {k: v for k, v in self.transformers_kwargs.items()
|
|
293
|
+
if k in ['trust_remote_code']}
|
|
294
|
+
# Respect offline-first configuration
|
|
295
|
+
if _config.should_force_local_files_only():
|
|
296
|
+
tokenizer_kwargs['local_files_only'] = True
|
|
297
|
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model, **tokenizer_kwargs)
|
|
298
|
+
|
|
299
|
+
# Load model with all transformers-specific parameters
|
|
300
|
+
# Try AutoModelForCausalLM first, fall back to AutoModel for custom models
|
|
301
|
+
model_kwargs = self.transformers_kwargs.copy()
|
|
302
|
+
# Respect offline-first configuration
|
|
303
|
+
if _config.should_force_local_files_only():
|
|
304
|
+
model_kwargs['local_files_only'] = True
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
self.model_instance = AutoModelForCausalLM.from_pretrained(self.model, **model_kwargs)
|
|
308
|
+
except ValueError as e:
|
|
309
|
+
if "Unrecognized configuration class" in str(e) or "glm4v" in str(e).lower():
|
|
310
|
+
# Fall back to AutoModel for custom models like DeepSeek-OCR
|
|
311
|
+
self.model_instance = AutoModel.from_pretrained(self.model, **model_kwargs)
|
|
312
|
+
else:
|
|
313
|
+
raise
|
|
221
314
|
|
|
222
|
-
# Move to device
|
|
223
|
-
if self.device in ["cuda", "mps"]:
|
|
315
|
+
# Move to device (only if not using device_map)
|
|
316
|
+
if self.device in ["cuda", "mps"] and 'device_map' not in self.transformers_kwargs:
|
|
224
317
|
self.model_instance = self.model_instance.to(self.device)
|
|
225
318
|
|
|
226
|
-
# Create pipeline
|
|
319
|
+
# Create pipeline - handle custom models that don't support text-generation
|
|
227
320
|
device_arg = 0 if self.device == "cuda" else -1
|
|
228
321
|
if self.device == "mps":
|
|
229
322
|
device_arg = -1
|
|
230
323
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
324
|
+
try:
|
|
325
|
+
# Don't pass device argument if using device_map (accelerate)
|
|
326
|
+
if 'device_map' in self.transformers_kwargs:
|
|
327
|
+
self.pipeline = pipeline(
|
|
328
|
+
"text-generation",
|
|
329
|
+
model=self.model_instance,
|
|
330
|
+
tokenizer=self.tokenizer
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
self.pipeline = pipeline(
|
|
334
|
+
"text-generation",
|
|
335
|
+
model=self.model_instance,
|
|
336
|
+
tokenizer=self.tokenizer,
|
|
337
|
+
device=device_arg
|
|
338
|
+
)
|
|
339
|
+
except ValueError as e:
|
|
340
|
+
if "not supported for text-generation" in str(e) or "accelerate" in str(e):
|
|
341
|
+
# For custom models like DeepSeek-OCR, skip pipeline creation
|
|
342
|
+
# We'll handle generation directly through the model
|
|
343
|
+
self.pipeline = None
|
|
344
|
+
else:
|
|
345
|
+
raise
|
|
237
346
|
|
|
238
347
|
except Exception as e:
|
|
239
348
|
error_str = str(e).lower()
|
|
@@ -245,6 +354,96 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
245
354
|
else:
|
|
246
355
|
raise RuntimeError(f"Failed to load HuggingFace model {self.model}: {str(e)}")
|
|
247
356
|
|
|
357
|
+
def _load_vision_model(self):
|
|
358
|
+
"""Load vision model using AutoModelForImageTextToText and AutoProcessor"""
|
|
359
|
+
try:
|
|
360
|
+
# Suppress progress bars during model loading unless in debug mode
|
|
361
|
+
import os
|
|
362
|
+
from transformers.utils import logging as transformers_logging
|
|
363
|
+
|
|
364
|
+
if not self.debug:
|
|
365
|
+
# Disable transformers progress bars
|
|
366
|
+
os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
|
|
367
|
+
transformers_logging.set_verbosity_error()
|
|
368
|
+
# Disable tqdm progress bars
|
|
369
|
+
os.environ['DISABLE_TQDM'] = '1'
|
|
370
|
+
|
|
371
|
+
# Load processor for vision models (handles both text and images)
|
|
372
|
+
processor_kwargs = {k: v for k, v in self.transformers_kwargs.items()
|
|
373
|
+
if k in ['trust_remote_code']}
|
|
374
|
+
# Enable trust_remote_code for custom architectures like GLM4V
|
|
375
|
+
processor_kwargs['trust_remote_code'] = True
|
|
376
|
+
# Set use_fast=True to avoid the slow processor warning
|
|
377
|
+
processor_kwargs['use_fast'] = True
|
|
378
|
+
# Respect offline-first configuration
|
|
379
|
+
if _config.should_force_local_files_only():
|
|
380
|
+
processor_kwargs['local_files_only'] = True
|
|
381
|
+
|
|
382
|
+
# Use local cache path if offline mode is enabled and model is cached
|
|
383
|
+
model_path = self.model
|
|
384
|
+
if _config.should_force_local_files_only():
|
|
385
|
+
local_path = _get_local_model_path(self.model)
|
|
386
|
+
if local_path:
|
|
387
|
+
model_path = local_path
|
|
388
|
+
processor_kwargs.pop('local_files_only', None) # Remove since we're using local path
|
|
389
|
+
self.logger.debug(f"Loading processor from local cache: {local_path}")
|
|
390
|
+
|
|
391
|
+
self.processor = AutoProcessor.from_pretrained(model_path, **processor_kwargs)
|
|
392
|
+
|
|
393
|
+
# Load vision model using AutoModelForImageTextToText with trust_remote_code
|
|
394
|
+
vision_kwargs = self.transformers_kwargs.copy()
|
|
395
|
+
vision_kwargs['trust_remote_code'] = True
|
|
396
|
+
# Respect offline-first configuration
|
|
397
|
+
if _config.should_force_local_files_only():
|
|
398
|
+
vision_kwargs['local_files_only'] = True
|
|
399
|
+
|
|
400
|
+
# Use local cache path if offline mode is enabled and model is cached
|
|
401
|
+
model_path = self.model
|
|
402
|
+
if _config.should_force_local_files_only():
|
|
403
|
+
local_path = _get_local_model_path(self.model)
|
|
404
|
+
if local_path:
|
|
405
|
+
model_path = local_path
|
|
406
|
+
vision_kwargs.pop('local_files_only', None) # Remove since we're using local path
|
|
407
|
+
self.logger.debug(f"Loading model from local cache: {local_path}")
|
|
408
|
+
|
|
409
|
+
self.model_instance = AutoModelForImageTextToText.from_pretrained(model_path, **vision_kwargs)
|
|
410
|
+
|
|
411
|
+
# Restore logging levels if they were suppressed
|
|
412
|
+
if not self.debug:
|
|
413
|
+
# Restore transformers logging
|
|
414
|
+
transformers_logging.set_verbosity_warning()
|
|
415
|
+
# Remove tqdm suppression
|
|
416
|
+
if 'DISABLE_TQDM' in os.environ:
|
|
417
|
+
del os.environ['DISABLE_TQDM']
|
|
418
|
+
|
|
419
|
+
# Move to device (only if not using device_map)
|
|
420
|
+
if self.device in ["cuda", "mps"] and 'device_map' not in self.transformers_kwargs:
|
|
421
|
+
self.model_instance = self.model_instance.to(self.device)
|
|
422
|
+
|
|
423
|
+
# For vision models, we don't use the standard pipeline
|
|
424
|
+
self.pipeline = None
|
|
425
|
+
|
|
426
|
+
self.logger.info(f"Successfully loaded vision model {self.model} using AutoModelForImageTextToText")
|
|
427
|
+
|
|
428
|
+
except Exception as e:
|
|
429
|
+
error_str = str(e).lower()
|
|
430
|
+
|
|
431
|
+
# Check for transformers version issues
|
|
432
|
+
if 'glm4v' in error_str and 'does not recognize this architecture' in error_str:
|
|
433
|
+
import transformers
|
|
434
|
+
current_version = transformers.__version__
|
|
435
|
+
raise RuntimeError(
|
|
436
|
+
f"GLM4V architecture requires transformers>=4.57.1, but you have {current_version}. "
|
|
437
|
+
f"Please upgrade: pip install transformers>=4.57.1"
|
|
438
|
+
)
|
|
439
|
+
elif ('not found' in error_str or 'does not exist' in error_str or
|
|
440
|
+
'not a valid model identifier' in error_str):
|
|
441
|
+
available_models = self.list_available_models()
|
|
442
|
+
error_message = format_model_error("HuggingFace", self.model, available_models)
|
|
443
|
+
raise ModelNotFoundError(error_message)
|
|
444
|
+
else:
|
|
445
|
+
raise RuntimeError(f"Failed to load HuggingFace vision model {self.model}: {str(e)}")
|
|
446
|
+
|
|
248
447
|
def _find_gguf_in_cache(self, model_name: str) -> Optional[str]:
|
|
249
448
|
"""Find GGUF model in HuggingFace cache (cache-only, no downloading)"""
|
|
250
449
|
|
|
@@ -513,11 +712,18 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
513
712
|
"""Generate using transformers backend with optional Outlines native structured output"""
|
|
514
713
|
|
|
515
714
|
if not self.pipeline:
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
)
|
|
715
|
+
# Handle vision models that use processor instead of pipeline
|
|
716
|
+
if self.processor and hasattr(self.model_instance, 'generate'):
|
|
717
|
+
return self._generate_vision_model(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
718
|
+
# Handle custom models like DeepSeek-OCR that don't support standard pipelines
|
|
719
|
+
elif hasattr(self.model_instance, 'infer'):
|
|
720
|
+
return self._generate_custom_model(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
721
|
+
else:
|
|
722
|
+
return GenerateResponse(
|
|
723
|
+
content="Error: Transformers model not loaded or doesn't support generation",
|
|
724
|
+
model=self.model,
|
|
725
|
+
finish_reason="error"
|
|
726
|
+
)
|
|
521
727
|
|
|
522
728
|
# Native structured output via Outlines (if configured and available)
|
|
523
729
|
should_use_outlines = (
|
|
@@ -638,6 +844,311 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
638
844
|
finish_reason="error"
|
|
639
845
|
)
|
|
640
846
|
|
|
847
|
+
def _generate_custom_model(self,
|
|
848
|
+
prompt: str,
|
|
849
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
850
|
+
system_prompt: Optional[str] = None,
|
|
851
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
852
|
+
media: Optional[List['MediaContent']] = None,
|
|
853
|
+
stream: bool = False,
|
|
854
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
855
|
+
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
856
|
+
"""Generate using custom model methods (e.g., DeepSeek-OCR's infer method)"""
|
|
857
|
+
|
|
858
|
+
import time
|
|
859
|
+
import tempfile
|
|
860
|
+
import os
|
|
861
|
+
start_time = time.time()
|
|
862
|
+
|
|
863
|
+
try:
|
|
864
|
+
# Handle media content for vision models like DeepSeek-OCR
|
|
865
|
+
if media and len(media) > 0:
|
|
866
|
+
# Use the first image for OCR
|
|
867
|
+
media_item = media[0]
|
|
868
|
+
|
|
869
|
+
# DeepSeek-OCR expects image file path
|
|
870
|
+
if hasattr(media_item, 'file_path') and media_item.file_path:
|
|
871
|
+
image_file = str(media_item.file_path)
|
|
872
|
+
else:
|
|
873
|
+
# If no file path, save media content to temp file
|
|
874
|
+
from PIL import Image
|
|
875
|
+
|
|
876
|
+
if hasattr(media_item, 'content') and media_item.content:
|
|
877
|
+
# Handle base64 content
|
|
878
|
+
if media_item.content_format == 'BASE64':
|
|
879
|
+
import base64
|
|
880
|
+
image_data = base64.b64decode(media_item.content)
|
|
881
|
+
temp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
|
882
|
+
temp_file.write(image_data)
|
|
883
|
+
temp_file.close()
|
|
884
|
+
image_file = temp_file.name
|
|
885
|
+
else:
|
|
886
|
+
return GenerateResponse(
|
|
887
|
+
content="Error: Unsupported media format for DeepSeek-OCR",
|
|
888
|
+
model=self.model,
|
|
889
|
+
finish_reason="error"
|
|
890
|
+
)
|
|
891
|
+
else:
|
|
892
|
+
return GenerateResponse(
|
|
893
|
+
content="Error: No valid image content found",
|
|
894
|
+
model=self.model,
|
|
895
|
+
finish_reason="error"
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# Use DeepSeek-OCR's infer method
|
|
899
|
+
try:
|
|
900
|
+
# Create temporary output directory for DeepSeek-OCR
|
|
901
|
+
temp_output_dir = tempfile.mkdtemp()
|
|
902
|
+
|
|
903
|
+
# Patch DeepSeek-OCR for MPS/CPU compatibility if needed
|
|
904
|
+
if self.device == "mps" or (self.device is None and hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()):
|
|
905
|
+
self._patch_deepseek_for_mps()
|
|
906
|
+
|
|
907
|
+
result = self.model_instance.infer(
|
|
908
|
+
self.tokenizer,
|
|
909
|
+
prompt=prompt,
|
|
910
|
+
image_file=image_file,
|
|
911
|
+
output_path=temp_output_dir, # DeepSeek-OCR requires output path
|
|
912
|
+
base_size=1024,
|
|
913
|
+
image_size=640,
|
|
914
|
+
crop_mode=True,
|
|
915
|
+
save_results=False,
|
|
916
|
+
test_compress=False
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
# Clean up temp output directory
|
|
920
|
+
import shutil
|
|
921
|
+
shutil.rmtree(temp_output_dir, ignore_errors=True)
|
|
922
|
+
|
|
923
|
+
# Clean up temp file if created
|
|
924
|
+
if 'temp_file' in locals() and os.path.exists(image_file):
|
|
925
|
+
os.unlink(image_file)
|
|
926
|
+
|
|
927
|
+
# Calculate generation time
|
|
928
|
+
gen_time = (time.time() - start_time) * 1000
|
|
929
|
+
|
|
930
|
+
return GenerateResponse(
|
|
931
|
+
content=result if isinstance(result, str) else str(result),
|
|
932
|
+
model=self.model,
|
|
933
|
+
finish_reason="stop",
|
|
934
|
+
input_tokens=len(prompt.split()), # Rough estimate
|
|
935
|
+
output_tokens=len(str(result).split()) if result else 0,
|
|
936
|
+
gen_time=gen_time
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
except Exception as e:
|
|
940
|
+
return GenerateResponse(
|
|
941
|
+
content=f"Error during DeepSeek-OCR inference: {str(e)}",
|
|
942
|
+
model=self.model,
|
|
943
|
+
finish_reason="error"
|
|
944
|
+
)
|
|
945
|
+
else:
|
|
946
|
+
return GenerateResponse(
|
|
947
|
+
content="Error: DeepSeek-OCR requires image input",
|
|
948
|
+
model=self.model,
|
|
949
|
+
finish_reason="error"
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
except Exception as e:
|
|
953
|
+
return GenerateResponse(
|
|
954
|
+
content=f"Error in custom model generation: {str(e)}",
|
|
955
|
+
model=self.model,
|
|
956
|
+
finish_reason="error"
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
def _generate_vision_model(self,
|
|
960
|
+
prompt: str,
|
|
961
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
962
|
+
system_prompt: Optional[str] = None,
|
|
963
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
964
|
+
media: Optional[List['MediaContent']] = None,
|
|
965
|
+
stream: bool = False,
|
|
966
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
967
|
+
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
968
|
+
"""Generate using vision model (Glyph, GLM-4.1V, etc.)"""
|
|
969
|
+
|
|
970
|
+
import time
|
|
971
|
+
start_time = time.time()
|
|
972
|
+
|
|
973
|
+
# Import torch safely
|
|
974
|
+
try:
|
|
975
|
+
import torch
|
|
976
|
+
except ImportError:
|
|
977
|
+
return GenerateResponse(
|
|
978
|
+
content="Error: PyTorch not available for vision model generation",
|
|
979
|
+
model=self.model,
|
|
980
|
+
finish_reason="error",
|
|
981
|
+
gen_time=0.0
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
try:
|
|
985
|
+
# Build messages for vision model
|
|
986
|
+
chat_messages = []
|
|
987
|
+
|
|
988
|
+
if system_prompt:
|
|
989
|
+
chat_messages.append({"role": "system", "content": system_prompt})
|
|
990
|
+
|
|
991
|
+
if messages:
|
|
992
|
+
chat_messages.extend(messages)
|
|
993
|
+
|
|
994
|
+
# Build user message with media content
|
|
995
|
+
user_content = []
|
|
996
|
+
|
|
997
|
+
# Add text content
|
|
998
|
+
if prompt:
|
|
999
|
+
user_content.append({"type": "text", "text": prompt})
|
|
1000
|
+
|
|
1001
|
+
# Add media content (images)
|
|
1002
|
+
if media:
|
|
1003
|
+
for media_item in media:
|
|
1004
|
+
if hasattr(media_item, 'file_path') and media_item.file_path:
|
|
1005
|
+
# Use file path directly
|
|
1006
|
+
user_content.append({
|
|
1007
|
+
"type": "image",
|
|
1008
|
+
"url": str(media_item.file_path)
|
|
1009
|
+
})
|
|
1010
|
+
elif hasattr(media_item, 'content') and media_item.content:
|
|
1011
|
+
# Handle base64 content
|
|
1012
|
+
if media_item.content_format == 'BASE64':
|
|
1013
|
+
# Create data URL for base64 content
|
|
1014
|
+
mime_type = getattr(media_item, 'mime_type', 'image/png')
|
|
1015
|
+
data_url = f"data:{mime_type};base64,{media_item.content}"
|
|
1016
|
+
user_content.append({
|
|
1017
|
+
"type": "image",
|
|
1018
|
+
"url": data_url
|
|
1019
|
+
})
|
|
1020
|
+
|
|
1021
|
+
# Add user message
|
|
1022
|
+
chat_messages.append({
|
|
1023
|
+
"role": "user",
|
|
1024
|
+
"content": user_content
|
|
1025
|
+
})
|
|
1026
|
+
|
|
1027
|
+
# Process messages using the processor
|
|
1028
|
+
inputs = self.processor.apply_chat_template(
|
|
1029
|
+
chat_messages,
|
|
1030
|
+
tokenize=True,
|
|
1031
|
+
add_generation_prompt=True,
|
|
1032
|
+
return_dict=True,
|
|
1033
|
+
return_tensors="pt"
|
|
1034
|
+
).to(self.model_instance.device)
|
|
1035
|
+
|
|
1036
|
+
# Generation parameters
|
|
1037
|
+
generation_kwargs = {
|
|
1038
|
+
"max_new_tokens": kwargs.get("max_tokens", self.max_output_tokens or 512),
|
|
1039
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
1040
|
+
"do_sample": True,
|
|
1041
|
+
"pad_token_id": self.processor.tokenizer.eos_token_id,
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
# Add seed if provided
|
|
1045
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
1046
|
+
if seed_value is not None:
|
|
1047
|
+
torch.manual_seed(seed_value)
|
|
1048
|
+
if torch.cuda.is_available():
|
|
1049
|
+
torch.cuda.manual_seed_all(seed_value)
|
|
1050
|
+
|
|
1051
|
+
# Generate response
|
|
1052
|
+
# For Apple Silicon, move inputs to CPU if MPS causes issues
|
|
1053
|
+
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
1054
|
+
try:
|
|
1055
|
+
generated_ids = self.model_instance.generate(**inputs, **generation_kwargs)
|
|
1056
|
+
except RuntimeError as e:
|
|
1057
|
+
if "MPS: Unsupported Border padding mode" in str(e):
|
|
1058
|
+
self.logger.warning("MPS Border padding mode error detected, falling back to CPU")
|
|
1059
|
+
# Move model and inputs to CPU
|
|
1060
|
+
cpu_model = self.model_instance.to('cpu')
|
|
1061
|
+
cpu_inputs = {k: v.to('cpu') if hasattr(v, 'to') else v for k, v in inputs.items()}
|
|
1062
|
+
generated_ids = cpu_model.generate(**cpu_inputs, **generation_kwargs)
|
|
1063
|
+
# Move model back to original device
|
|
1064
|
+
self.model_instance.to(self.model_instance.device)
|
|
1065
|
+
else:
|
|
1066
|
+
raise e
|
|
1067
|
+
else:
|
|
1068
|
+
generated_ids = self.model_instance.generate(**inputs, **generation_kwargs)
|
|
1069
|
+
|
|
1070
|
+
# Decode response
|
|
1071
|
+
output_text = self.processor.decode(
|
|
1072
|
+
generated_ids[0][inputs["input_ids"].shape[1]:],
|
|
1073
|
+
skip_special_tokens=True
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
# Calculate generation time
|
|
1077
|
+
gen_time = (time.time() - start_time) * 1000
|
|
1078
|
+
|
|
1079
|
+
# Calculate token usage
|
|
1080
|
+
input_tokens = inputs["input_ids"].shape[1]
|
|
1081
|
+
output_tokens = len(generated_ids[0]) - input_tokens
|
|
1082
|
+
|
|
1083
|
+
return GenerateResponse(
|
|
1084
|
+
content=output_text.strip(),
|
|
1085
|
+
model=self.model,
|
|
1086
|
+
finish_reason="stop",
|
|
1087
|
+
usage={
|
|
1088
|
+
"input_tokens": input_tokens,
|
|
1089
|
+
"output_tokens": output_tokens,
|
|
1090
|
+
"total_tokens": input_tokens + output_tokens,
|
|
1091
|
+
"prompt_tokens": input_tokens,
|
|
1092
|
+
"completion_tokens": output_tokens
|
|
1093
|
+
},
|
|
1094
|
+
gen_time=gen_time
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
except Exception as e:
|
|
1098
|
+
gen_time = (time.time() - start_time) * 1000 if 'start_time' in locals() else 0.0
|
|
1099
|
+
return GenerateResponse(
|
|
1100
|
+
content=f"Error in vision model generation: {str(e)}",
|
|
1101
|
+
model=self.model,
|
|
1102
|
+
finish_reason="error",
|
|
1103
|
+
gen_time=gen_time
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
def _patch_deepseek_for_mps(self):
|
|
1107
|
+
"""Patch DeepSeek-OCR model to work with MPS instead of CUDA"""
|
|
1108
|
+
import types
|
|
1109
|
+
|
|
1110
|
+
def patched_infer(self, tokenizer, prompt='', image_file='', output_path='', base_size=1024, image_size=640, crop_mode=True, test_compress=False, save_results=False, eval_mode=False):
|
|
1111
|
+
"""Patched infer method that uses MPS instead of CUDA"""
|
|
1112
|
+
import torch
|
|
1113
|
+
|
|
1114
|
+
# Determine the best available device
|
|
1115
|
+
if torch.backends.mps.is_available():
|
|
1116
|
+
device = torch.device('mps')
|
|
1117
|
+
elif torch.cuda.is_available():
|
|
1118
|
+
device = torch.device('cuda')
|
|
1119
|
+
else:
|
|
1120
|
+
device = torch.device('cpu')
|
|
1121
|
+
|
|
1122
|
+
# Call the original infer method but patch tensor.cuda() calls
|
|
1123
|
+
original_cuda = torch.Tensor.cuda
|
|
1124
|
+
|
|
1125
|
+
def patched_cuda(tensor, device=None, non_blocking=False, **kwargs):
|
|
1126
|
+
"""Redirect .cuda() calls to the appropriate device"""
|
|
1127
|
+
if device == 'mps' or (device is None and torch.backends.mps.is_available()):
|
|
1128
|
+
return tensor.to('mps', non_blocking=non_blocking)
|
|
1129
|
+
elif torch.cuda.is_available():
|
|
1130
|
+
return original_cuda(tensor, device, non_blocking, **kwargs)
|
|
1131
|
+
else:
|
|
1132
|
+
return tensor.to('cpu', non_blocking=non_blocking)
|
|
1133
|
+
|
|
1134
|
+
# Temporarily patch the cuda method
|
|
1135
|
+
torch.Tensor.cuda = patched_cuda
|
|
1136
|
+
|
|
1137
|
+
try:
|
|
1138
|
+
# Move model to the appropriate device first
|
|
1139
|
+
self.to(device)
|
|
1140
|
+
|
|
1141
|
+
# Call original infer with device patching
|
|
1142
|
+
return self._original_infer(tokenizer, prompt, image_file, output_path, base_size, image_size, crop_mode, test_compress, save_results, eval_mode)
|
|
1143
|
+
finally:
|
|
1144
|
+
# Restore original cuda method
|
|
1145
|
+
torch.Tensor.cuda = original_cuda
|
|
1146
|
+
|
|
1147
|
+
# Only patch if not already patched
|
|
1148
|
+
if not hasattr(self.model_instance, '_original_infer'):
|
|
1149
|
+
self.model_instance._original_infer = self.model_instance.infer
|
|
1150
|
+
self.model_instance.infer = types.MethodType(patched_infer, self.model_instance)
|
|
1151
|
+
|
|
641
1152
|
def _generate_gguf(self,
|
|
642
1153
|
prompt: str,
|
|
643
1154
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
@@ -949,10 +1460,13 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
949
1460
|
try:
|
|
950
1461
|
# Set seed for deterministic generation if provided
|
|
951
1462
|
if seed is not None:
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
torch.cuda.
|
|
1463
|
+
try:
|
|
1464
|
+
import torch
|
|
1465
|
+
torch.manual_seed(seed)
|
|
1466
|
+
if torch.cuda.is_available():
|
|
1467
|
+
torch.cuda.manual_seed_all(seed)
|
|
1468
|
+
except ImportError:
|
|
1469
|
+
pass # Skip seeding if torch not available
|
|
956
1470
|
|
|
957
1471
|
# Track generation time
|
|
958
1472
|
start_time = time.time()
|
|
@@ -1238,8 +1752,20 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1238
1752
|
|
|
1239
1753
|
@classmethod
|
|
1240
1754
|
def list_available_models(cls, **kwargs) -> List[str]:
|
|
1241
|
-
"""
|
|
1755
|
+
"""
|
|
1756
|
+
List available HuggingFace models from local cache (excluding MLX models).
|
|
1757
|
+
|
|
1758
|
+
Args:
|
|
1759
|
+
**kwargs: Optional parameters including:
|
|
1760
|
+
- input_capabilities: List of ModelInputCapability enums to filter by input capability
|
|
1761
|
+
- output_capabilities: List of ModelOutputCapability enums to filter by output capability
|
|
1762
|
+
|
|
1763
|
+
Returns:
|
|
1764
|
+
List of model names, optionally filtered by capabilities
|
|
1765
|
+
"""
|
|
1242
1766
|
try:
|
|
1767
|
+
from .model_capabilities import filter_models_by_capabilities
|
|
1768
|
+
|
|
1243
1769
|
hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
|
|
1244
1770
|
if not hf_cache.exists():
|
|
1245
1771
|
return []
|
|
@@ -1255,7 +1781,21 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1255
1781
|
if "mlx" not in model_name.lower():
|
|
1256
1782
|
models.append(model_name)
|
|
1257
1783
|
|
|
1258
|
-
|
|
1784
|
+
models = sorted(models)
|
|
1785
|
+
|
|
1786
|
+
# Apply new capability filtering if provided
|
|
1787
|
+
input_capabilities = kwargs.get('input_capabilities')
|
|
1788
|
+
output_capabilities = kwargs.get('output_capabilities')
|
|
1789
|
+
|
|
1790
|
+
if input_capabilities or output_capabilities:
|
|
1791
|
+
models = filter_models_by_capabilities(
|
|
1792
|
+
models,
|
|
1793
|
+
input_capabilities=input_capabilities,
|
|
1794
|
+
output_capabilities=output_capabilities
|
|
1795
|
+
)
|
|
1796
|
+
|
|
1797
|
+
|
|
1798
|
+
return models
|
|
1259
1799
|
|
|
1260
1800
|
except Exception:
|
|
1261
1801
|
return []
|