abstractcore 2.4.3__py3-none-any.whl → 2.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/cli/__init__.py +9 -0
- abstractcore/cli/main.py +759 -0
- abstractcore/cli/vision_config.py +491 -0
- abstractcore/media/handlers/__init__.py +16 -0
- abstractcore/media/handlers/anthropic_handler.py +326 -0
- abstractcore/media/handlers/local_handler.py +541 -0
- abstractcore/media/handlers/openai_handler.py +281 -0
- abstractcore/media/processors/__init__.py +13 -0
- abstractcore/media/processors/image_processor.py +610 -0
- abstractcore/media/processors/office_processor.py +490 -0
- abstractcore/media/processors/pdf_processor.py +485 -0
- abstractcore/media/processors/text_processor.py +557 -0
- abstractcore/media/utils/__init__.py +22 -0
- abstractcore/media/utils/image_scaler.py +306 -0
- abstractcore/providers/base.py +97 -0
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/METADATA +1 -1
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/RECORD +23 -9
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/entry_points.txt +2 -0
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.3.dist-info → abstractcore-2.4.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vision Configuration CLI Commands
|
|
3
|
+
|
|
4
|
+
Handles CLI commands for vision fallback configuration:
|
|
5
|
+
- abstractcore --set-vision-caption
|
|
6
|
+
- abstractcore --set-vision-provider
|
|
7
|
+
- abstractcore --vision-status
|
|
8
|
+
- abstractcore --list-vision
|
|
9
|
+
- abstractcore --download-vision-model
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import argparse
|
|
14
|
+
from typing import Optional, Dict, Any
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
def handle_vision_commands(args) -> bool:
|
|
18
|
+
"""
|
|
19
|
+
Handle vision-related CLI commands.
|
|
20
|
+
|
|
21
|
+
Returns True if a vision command was processed, False otherwise.
|
|
22
|
+
"""
|
|
23
|
+
from ..media.vision_fallback import VisionFallbackHandler
|
|
24
|
+
|
|
25
|
+
handler = VisionFallbackHandler()
|
|
26
|
+
|
|
27
|
+
if hasattr(args, 'set_vision_caption') and args.set_vision_caption:
|
|
28
|
+
return handle_set_vision_caption(handler, args.set_vision_caption)
|
|
29
|
+
|
|
30
|
+
elif hasattr(args, 'set_vision_provider') and args.set_vision_provider:
|
|
31
|
+
provider, model = args.set_vision_provider
|
|
32
|
+
return handle_set_vision_provider(handler, provider, model)
|
|
33
|
+
|
|
34
|
+
elif hasattr(args, 'vision_status') and args.vision_status:
|
|
35
|
+
return handle_vision_status(handler)
|
|
36
|
+
|
|
37
|
+
elif hasattr(args, 'list_vision') and args.list_vision:
|
|
38
|
+
return handle_list_vision(handler)
|
|
39
|
+
|
|
40
|
+
elif hasattr(args, 'download_vision_model') and args.download_vision_model:
|
|
41
|
+
model_name = args.download_vision_model if args.download_vision_model != True else "blip-base-caption"
|
|
42
|
+
return handle_download_vision_model(handler, model_name)
|
|
43
|
+
|
|
44
|
+
elif hasattr(args, 'configure') and args.configure == 'vision':
|
|
45
|
+
return handle_configure_vision(handler)
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
def handle_set_vision_caption(handler: 'VisionFallbackHandler', model: str) -> bool:
|
|
50
|
+
"""Handle --set-vision-caption command."""
|
|
51
|
+
print(f"🔧 Setting vision caption model: {model}")
|
|
52
|
+
|
|
53
|
+
# Try to determine provider from model name
|
|
54
|
+
provider = detect_provider_from_model(model)
|
|
55
|
+
if not provider:
|
|
56
|
+
print("❌ Could not determine provider from model name.")
|
|
57
|
+
print("💡 Use --set-vision-provider instead: abstractcore --set-vision-provider ollama --model qwen2.5vl:7b")
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
success = handler.set_vision_provider(provider, model)
|
|
61
|
+
if success:
|
|
62
|
+
print(f"✅ Vision caption model set to {provider}/{model}")
|
|
63
|
+
print("🎯 Vision fallback is now enabled for text-only models")
|
|
64
|
+
print("\n💡 Test it: Use any text-only model with an image")
|
|
65
|
+
else:
|
|
66
|
+
print(f"❌ Failed to set vision caption model {provider}/{model}")
|
|
67
|
+
print("💡 Check that the provider and model are available")
|
|
68
|
+
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
def handle_set_vision_provider(handler: 'VisionFallbackHandler', provider: str, model: str) -> bool:
|
|
72
|
+
"""Handle --set-vision-provider command."""
|
|
73
|
+
print(f"🔧 Setting vision provider: {provider}/{model}")
|
|
74
|
+
|
|
75
|
+
success = handler.set_vision_provider(provider, model)
|
|
76
|
+
if success:
|
|
77
|
+
print(f"✅ Vision provider set to {provider}/{model}")
|
|
78
|
+
print("🎯 Vision fallback is now enabled for text-only models")
|
|
79
|
+
print("\n💡 Test it: Use any text-only model with an image")
|
|
80
|
+
else:
|
|
81
|
+
print(f"❌ Failed to set vision provider {provider}/{model}")
|
|
82
|
+
print("💡 Check that the provider and model are available")
|
|
83
|
+
print("💡 Make sure the model supports vision capabilities")
|
|
84
|
+
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
def handle_vision_status(handler: 'VisionFallbackHandler') -> bool:
|
|
88
|
+
"""Handle --vision-status command."""
|
|
89
|
+
print("🔍 Vision Configuration Status")
|
|
90
|
+
print("=" * 50)
|
|
91
|
+
|
|
92
|
+
status = handler.get_status()
|
|
93
|
+
|
|
94
|
+
# Strategy
|
|
95
|
+
strategy = status.get('strategy', 'unknown')
|
|
96
|
+
print(f"📋 Strategy: {strategy}")
|
|
97
|
+
|
|
98
|
+
# Primary provider
|
|
99
|
+
primary = status.get('primary_provider')
|
|
100
|
+
if primary:
|
|
101
|
+
provider_str = f"{primary['provider']}/{primary['model']}"
|
|
102
|
+
status_icon = "✅" if primary['status'] == 'available' else "❌"
|
|
103
|
+
print(f"🎯 Primary: {status_icon} {provider_str}")
|
|
104
|
+
else:
|
|
105
|
+
print("🎯 Primary: ❌ Not configured")
|
|
106
|
+
|
|
107
|
+
# Fallback providers
|
|
108
|
+
fallbacks = status.get('fallback_providers', [])
|
|
109
|
+
if fallbacks:
|
|
110
|
+
print(f"🔄 Fallbacks:")
|
|
111
|
+
for fallback in fallbacks:
|
|
112
|
+
provider_str = f"{fallback['provider']}/{fallback['model']}"
|
|
113
|
+
status_icon = "✅" if fallback['status'] == 'available' else "❌"
|
|
114
|
+
print(f" {status_icon} {provider_str}")
|
|
115
|
+
else:
|
|
116
|
+
print("🔄 Fallbacks: None configured")
|
|
117
|
+
|
|
118
|
+
# Local models
|
|
119
|
+
local_models = status.get('local_models', [])
|
|
120
|
+
if local_models:
|
|
121
|
+
print(f"💾 Local Models:")
|
|
122
|
+
for model in local_models:
|
|
123
|
+
print(f" ✅ {model['name']}")
|
|
124
|
+
else:
|
|
125
|
+
print("💾 Local Models: None downloaded")
|
|
126
|
+
|
|
127
|
+
# Recommendations
|
|
128
|
+
recommendations = status.get('recommendations', [])
|
|
129
|
+
if recommendations:
|
|
130
|
+
print(f"\n💡 Recommendations:")
|
|
131
|
+
for rec in recommendations:
|
|
132
|
+
print(f" • {rec}")
|
|
133
|
+
|
|
134
|
+
print("=" * 50)
|
|
135
|
+
return True
|
|
136
|
+
|
|
137
|
+
def handle_list_vision(handler: 'VisionFallbackHandler') -> bool:
|
|
138
|
+
"""Handle --list-vision command."""
|
|
139
|
+
print("📋 Available Vision Configuration Options")
|
|
140
|
+
print("=" * 60)
|
|
141
|
+
|
|
142
|
+
print("\n🔧 PROVIDERS & MODELS")
|
|
143
|
+
print("-" * 30)
|
|
144
|
+
|
|
145
|
+
# Common vision models by provider
|
|
146
|
+
options = {
|
|
147
|
+
"ollama": [
|
|
148
|
+
"qwen2.5vl:7b - Qwen 2.5 Vision 7B (recommended)",
|
|
149
|
+
"llama3.2-vision:11b - LLaMA 3.2 Vision 11B",
|
|
150
|
+
"granite3.2-vision:2b - IBM Granite Vision 2B"
|
|
151
|
+
],
|
|
152
|
+
"openai": [
|
|
153
|
+
"gpt-4o - GPT-4 Omni (premium)",
|
|
154
|
+
"gpt-4o-mini - GPT-4 Omni Mini (cost-effective)",
|
|
155
|
+
"gpt-4-turbo-with-vision - GPT-4 Turbo Vision"
|
|
156
|
+
],
|
|
157
|
+
"anthropic": [
|
|
158
|
+
"claude-3.5-sonnet - Claude 3.5 Sonnet",
|
|
159
|
+
"claude-3.5-haiku - Claude 3.5 Haiku",
|
|
160
|
+
"claude-3-opus - Claude 3 Opus"
|
|
161
|
+
],
|
|
162
|
+
"huggingface": [
|
|
163
|
+
"unsloth/Qwen2.5-VL-7B-Instruct-GGUF - GGUF format",
|
|
164
|
+
],
|
|
165
|
+
"lmstudio": [
|
|
166
|
+
"qwen/qwen2.5-vl-7b - Qwen 2.5 Vision 7B",
|
|
167
|
+
"google/gemma-3n-e4b - Gemma 3n Vision",
|
|
168
|
+
"mistralai/magistral-small-2509 - Mistral Vision"
|
|
169
|
+
]
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
for provider, models in options.items():
|
|
173
|
+
print(f"\n{provider.upper()}:")
|
|
174
|
+
for model in models:
|
|
175
|
+
print(f" • {model}")
|
|
176
|
+
|
|
177
|
+
print("\n💾 DOWNLOADABLE MODELS")
|
|
178
|
+
print("-" * 30)
|
|
179
|
+
download_models = [
|
|
180
|
+
"blip-base-caption (~990MB) - Basic image captioning",
|
|
181
|
+
"git-base (~400MB) - Lightweight Microsoft GIT model",
|
|
182
|
+
"vit-gpt2 (~500MB) - ViT + GPT-2 captioning model"
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
for model in download_models:
|
|
186
|
+
print(f" • {model}")
|
|
187
|
+
|
|
188
|
+
print("\n📖 CONFIGURATION COMMANDS")
|
|
189
|
+
print("-" * 30)
|
|
190
|
+
print(" abstractcore --set-vision-caption qwen2.5vl:7b")
|
|
191
|
+
print(" abstractcore --set-vision-provider ollama --model qwen2.5vl:7b")
|
|
192
|
+
print(" abstractcore --set-vision-provider openai --model gpt-4o")
|
|
193
|
+
print(" abstractcore --download-vision-model")
|
|
194
|
+
print(" abstractcore --download-vision-model blip-base-caption")
|
|
195
|
+
print(" abstractcore --vision-status")
|
|
196
|
+
print(" abstractcore --configure vision")
|
|
197
|
+
|
|
198
|
+
print("\n💡 QUICK START")
|
|
199
|
+
print("-" * 30)
|
|
200
|
+
print(" 1. For local models: abstractcore --set-vision-caption qwen2.5vl:7b")
|
|
201
|
+
print(" 2. For cloud APIs: abstractcore --set-vision-provider openai --model gpt-4o")
|
|
202
|
+
print(" 3. For offline use: abstractcore --download-vision-model")
|
|
203
|
+
|
|
204
|
+
print("=" * 60)
|
|
205
|
+
return True
|
|
206
|
+
|
|
207
|
+
def handle_download_vision_model(handler: 'VisionFallbackHandler', model_name: str) -> bool:
|
|
208
|
+
"""Handle --download-vision-model command."""
|
|
209
|
+
print(f"📥 Downloading vision model: {model_name}")
|
|
210
|
+
|
|
211
|
+
# Available models for download
|
|
212
|
+
AVAILABLE_MODELS = {
|
|
213
|
+
"blip-base-caption": {
|
|
214
|
+
"url": "Salesforce/blip-image-captioning-base",
|
|
215
|
+
"size": "990MB",
|
|
216
|
+
"description": "Basic image captioning model"
|
|
217
|
+
},
|
|
218
|
+
"git-base": {
|
|
219
|
+
"url": "microsoft/git-base",
|
|
220
|
+
"size": "400MB",
|
|
221
|
+
"description": "Lightweight Microsoft GIT model"
|
|
222
|
+
},
|
|
223
|
+
"vit-gpt2": {
|
|
224
|
+
"url": "nlpconnect/vit-gpt2-image-captioning",
|
|
225
|
+
"size": "500MB",
|
|
226
|
+
"description": "ViT + GPT-2 captioning model"
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if model_name not in AVAILABLE_MODELS:
|
|
231
|
+
print(f"❌ Model '{model_name}' not available for download")
|
|
232
|
+
print("\n📋 Available models:")
|
|
233
|
+
for name, info in AVAILABLE_MODELS.items():
|
|
234
|
+
print(f" • {name} ({info['size']}) - {info['description']}")
|
|
235
|
+
return True
|
|
236
|
+
|
|
237
|
+
model_info = AVAILABLE_MODELS[model_name]
|
|
238
|
+
print(f"📊 Model: {model_info['description']}")
|
|
239
|
+
print(f"📦 Size: {model_info['size']}")
|
|
240
|
+
print(f"🔗 Source: {model_info['url']}")
|
|
241
|
+
|
|
242
|
+
# Check if transformers is available
|
|
243
|
+
try:
|
|
244
|
+
import transformers
|
|
245
|
+
except ImportError:
|
|
246
|
+
print("❌ transformers library not installed")
|
|
247
|
+
print("💡 Install with: pip install transformers torch")
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
# Create models directory
|
|
251
|
+
models_dir = Path(handler.config.local_models_path).expanduser()
|
|
252
|
+
model_path = models_dir / model_name
|
|
253
|
+
|
|
254
|
+
if model_path.exists():
|
|
255
|
+
print(f"✅ Model already downloaded at {model_path}")
|
|
256
|
+
|
|
257
|
+
# Enable the downloaded model
|
|
258
|
+
handler.config.strategy = "two_stage"
|
|
259
|
+
handler._save_config(handler.config)
|
|
260
|
+
print("🎯 Vision fallback enabled with local model")
|
|
261
|
+
return True
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
print("🔄 Downloading model...")
|
|
265
|
+
models_dir.mkdir(parents=True, exist_ok=True)
|
|
266
|
+
|
|
267
|
+
# Download using transformers
|
|
268
|
+
from transformers import AutoProcessor, AutoModel
|
|
269
|
+
|
|
270
|
+
# Download model and processor
|
|
271
|
+
processor = AutoProcessor.from_pretrained(model_info['url'], use_fast=False)
|
|
272
|
+
model = AutoModel.from_pretrained(model_info['url'])
|
|
273
|
+
|
|
274
|
+
# Save to local directory
|
|
275
|
+
processor.save_pretrained(str(model_path))
|
|
276
|
+
model.save_pretrained(str(model_path))
|
|
277
|
+
|
|
278
|
+
print(f"✅ Model downloaded successfully to {model_path}")
|
|
279
|
+
|
|
280
|
+
# Enable vision fallback with this model
|
|
281
|
+
handler.config.strategy = "two_stage"
|
|
282
|
+
handler._save_config(handler.config)
|
|
283
|
+
|
|
284
|
+
print("🎯 Vision fallback enabled with local model")
|
|
285
|
+
print("\n💡 Test it: Use any text-only model with an image")
|
|
286
|
+
|
|
287
|
+
except Exception as e:
|
|
288
|
+
print(f"❌ Download failed: {e}")
|
|
289
|
+
print("💡 Check internet connection and disk space")
|
|
290
|
+
|
|
291
|
+
# Clean up partial download
|
|
292
|
+
if model_path.exists():
|
|
293
|
+
import shutil
|
|
294
|
+
shutil.rmtree(model_path)
|
|
295
|
+
|
|
296
|
+
return True
|
|
297
|
+
|
|
298
|
+
def handle_configure_vision(handler: 'VisionFallbackHandler') -> bool:
|
|
299
|
+
"""Handle --configure vision command (interactive setup)."""
|
|
300
|
+
print("🔧 Interactive Vision Configuration")
|
|
301
|
+
print("=" * 50)
|
|
302
|
+
|
|
303
|
+
print("\nChoose your vision configuration strategy:")
|
|
304
|
+
print("1. Use existing local model (Ollama/LMStudio)")
|
|
305
|
+
print("2. Use cloud API (OpenAI/Anthropic)")
|
|
306
|
+
print("3. Download lightweight local model")
|
|
307
|
+
print("4. Show current status")
|
|
308
|
+
print("5. Disable vision fallback")
|
|
309
|
+
|
|
310
|
+
try:
|
|
311
|
+
choice = input("\nEnter choice (1-5): ").strip()
|
|
312
|
+
|
|
313
|
+
if choice == "1":
|
|
314
|
+
return configure_local_provider(handler)
|
|
315
|
+
elif choice == "2":
|
|
316
|
+
return configure_cloud_provider(handler)
|
|
317
|
+
elif choice == "3":
|
|
318
|
+
return configure_download_model(handler)
|
|
319
|
+
elif choice == "4":
|
|
320
|
+
return handle_vision_status(handler)
|
|
321
|
+
elif choice == "5":
|
|
322
|
+
handler.disable()
|
|
323
|
+
print("✅ Vision fallback disabled")
|
|
324
|
+
return True
|
|
325
|
+
else:
|
|
326
|
+
print("❌ Invalid choice")
|
|
327
|
+
return True
|
|
328
|
+
|
|
329
|
+
except KeyboardInterrupt:
|
|
330
|
+
print("\n👋 Configuration cancelled")
|
|
331
|
+
return True
|
|
332
|
+
|
|
333
|
+
def configure_local_provider(handler: 'VisionFallbackHandler') -> bool:
|
|
334
|
+
"""Interactive configuration for local providers."""
|
|
335
|
+
print("\n🔧 Configure Local Provider")
|
|
336
|
+
print("-" * 30)
|
|
337
|
+
|
|
338
|
+
providers = ["ollama", "lmstudio", "huggingface"]
|
|
339
|
+
print("Available providers:")
|
|
340
|
+
for i, provider in enumerate(providers, 1):
|
|
341
|
+
print(f"{i}. {provider}")
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
provider_choice = input("Choose provider (1-3): ").strip()
|
|
345
|
+
provider_idx = int(provider_choice) - 1
|
|
346
|
+
|
|
347
|
+
if provider_idx < 0 or provider_idx >= len(providers):
|
|
348
|
+
print("❌ Invalid provider choice")
|
|
349
|
+
return True
|
|
350
|
+
|
|
351
|
+
provider = providers[provider_idx]
|
|
352
|
+
|
|
353
|
+
# Suggest models based on provider
|
|
354
|
+
model_suggestions = {
|
|
355
|
+
"ollama": ["qwen2.5vl:7b", "llama3.2-vision:11b", "granite3.2-vision:2b"],
|
|
356
|
+
"lmstudio": ["qwen/qwen2.5-vl-7b", "google/gemma-3n-e4b"],
|
|
357
|
+
"huggingface": ["unsloth/Qwen2.5-VL-7B-Instruct-GGUF"]
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
print(f"\nSuggested models for {provider}:")
|
|
361
|
+
for i, model in enumerate(model_suggestions[provider], 1):
|
|
362
|
+
print(f"{i}. {model}")
|
|
363
|
+
|
|
364
|
+
model = input(f"Enter model name: ").strip()
|
|
365
|
+
if not model:
|
|
366
|
+
print("❌ Model name required")
|
|
367
|
+
return True
|
|
368
|
+
|
|
369
|
+
success = handler.set_vision_provider(provider, model)
|
|
370
|
+
if success:
|
|
371
|
+
print(f"✅ Vision provider configured: {provider}/{model}")
|
|
372
|
+
else:
|
|
373
|
+
print(f"❌ Failed to configure {provider}/{model}")
|
|
374
|
+
|
|
375
|
+
except (ValueError, KeyboardInterrupt):
|
|
376
|
+
print("❌ Invalid input or cancelled")
|
|
377
|
+
|
|
378
|
+
return True
|
|
379
|
+
|
|
380
|
+
def configure_cloud_provider(handler: 'VisionFallbackHandler') -> bool:
|
|
381
|
+
"""Interactive configuration for cloud providers."""
|
|
382
|
+
print("\n☁️ Configure Cloud Provider")
|
|
383
|
+
print("-" * 30)
|
|
384
|
+
|
|
385
|
+
providers = ["openai", "anthropic"]
|
|
386
|
+
print("Available cloud providers:")
|
|
387
|
+
for i, provider in enumerate(providers, 1):
|
|
388
|
+
print(f"{i}. {provider}")
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
provider_choice = input("Choose provider (1-2): ").strip()
|
|
392
|
+
provider_idx = int(provider_choice) - 1
|
|
393
|
+
|
|
394
|
+
if provider_idx < 0 or provider_idx >= len(providers):
|
|
395
|
+
print("❌ Invalid provider choice")
|
|
396
|
+
return True
|
|
397
|
+
|
|
398
|
+
provider = providers[provider_idx]
|
|
399
|
+
|
|
400
|
+
# Suggest models based on provider
|
|
401
|
+
model_suggestions = {
|
|
402
|
+
"openai": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo-with-vision"],
|
|
403
|
+
"anthropic": ["claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-opus"]
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
print(f"\nSuggested models for {provider}:")
|
|
407
|
+
for i, model in enumerate(model_suggestions[provider], 1):
|
|
408
|
+
print(f"{i}. {model}")
|
|
409
|
+
|
|
410
|
+
model = input(f"Enter model name: ").strip()
|
|
411
|
+
if not model:
|
|
412
|
+
print("❌ Model name required")
|
|
413
|
+
return True
|
|
414
|
+
|
|
415
|
+
# Check for API key
|
|
416
|
+
api_key_var = f"{provider.upper()}_API_KEY"
|
|
417
|
+
if not os.getenv(api_key_var):
|
|
418
|
+
print(f"⚠️ {api_key_var} environment variable not set")
|
|
419
|
+
print(f"💡 Set it with: export {api_key_var}=your_api_key")
|
|
420
|
+
|
|
421
|
+
success = handler.set_vision_provider(provider, model)
|
|
422
|
+
if success:
|
|
423
|
+
print(f"✅ Vision provider configured: {provider}/{model}")
|
|
424
|
+
else:
|
|
425
|
+
print(f"❌ Failed to configure {provider}/{model}")
|
|
426
|
+
|
|
427
|
+
except (ValueError, KeyboardInterrupt):
|
|
428
|
+
print("❌ Invalid input or cancelled")
|
|
429
|
+
|
|
430
|
+
return True
|
|
431
|
+
|
|
432
|
+
def configure_download_model(handler: 'VisionFallbackHandler') -> bool:
|
|
433
|
+
"""Interactive configuration for downloading models."""
|
|
434
|
+
print("\n📥 Download Vision Model")
|
|
435
|
+
print("-" * 30)
|
|
436
|
+
|
|
437
|
+
models = ["blip-base-caption", "git-base", "vit-gpt2"]
|
|
438
|
+
print("Available models for download:")
|
|
439
|
+
for i, model in enumerate(models, 1):
|
|
440
|
+
print(f"{i}. {model}")
|
|
441
|
+
|
|
442
|
+
try:
|
|
443
|
+
model_choice = input("Choose model (1-3): ").strip()
|
|
444
|
+
model_idx = int(model_choice) - 1
|
|
445
|
+
|
|
446
|
+
if model_idx < 0 or model_idx >= len(models):
|
|
447
|
+
print("❌ Invalid model choice")
|
|
448
|
+
return True
|
|
449
|
+
|
|
450
|
+
model = models[model_idx]
|
|
451
|
+
return handle_download_vision_model(handler, model)
|
|
452
|
+
|
|
453
|
+
except (ValueError, KeyboardInterrupt):
|
|
454
|
+
print("❌ Invalid input or cancelled")
|
|
455
|
+
|
|
456
|
+
return True
|
|
457
|
+
|
|
458
|
+
def detect_provider_from_model(model: str) -> Optional[str]:
|
|
459
|
+
"""Try to detect provider from model name patterns."""
|
|
460
|
+
model_lower = model.lower()
|
|
461
|
+
|
|
462
|
+
# Common model name patterns
|
|
463
|
+
if any(pattern in model_lower for pattern in ['qwen2.5vl', 'llama3.2-vision', 'granite']):
|
|
464
|
+
return "ollama"
|
|
465
|
+
elif any(pattern in model_lower for pattern in ['gpt-', 'o1-']):
|
|
466
|
+
return "openai"
|
|
467
|
+
elif any(pattern in model_lower for pattern in ['claude-']):
|
|
468
|
+
return "anthropic"
|
|
469
|
+
elif '/' in model and any(pattern in model_lower for pattern in ['unsloth', 'gguf']):
|
|
470
|
+
return "huggingface"
|
|
471
|
+
elif '/' in model:
|
|
472
|
+
return "lmstudio"
|
|
473
|
+
|
|
474
|
+
return None
|
|
475
|
+
|
|
476
|
+
def add_vision_arguments(parser: argparse.ArgumentParser):
|
|
477
|
+
"""Add vision-related arguments to argument parser."""
|
|
478
|
+
vision_group = parser.add_argument_group('vision configuration')
|
|
479
|
+
|
|
480
|
+
vision_group.add_argument('--set-vision-caption', metavar='MODEL',
|
|
481
|
+
help='Set vision caption model (auto-detects provider)')
|
|
482
|
+
vision_group.add_argument('--set-vision-provider', nargs=2, metavar=('PROVIDER', 'MODEL'),
|
|
483
|
+
help='Set vision provider and model explicitly')
|
|
484
|
+
vision_group.add_argument('--vision-status', action='store_true',
|
|
485
|
+
help='Show current vision configuration status')
|
|
486
|
+
vision_group.add_argument('--list-vision', action='store_true',
|
|
487
|
+
help='List available vision configuration options')
|
|
488
|
+
vision_group.add_argument('--download-vision-model', nargs='?', const=True, metavar='MODEL',
|
|
489
|
+
help='Download vision model for offline use (default: blip-base-caption)')
|
|
490
|
+
vision_group.add_argument('--configure', choices=['vision'],
|
|
491
|
+
help='Interactive configuration mode')
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider-specific media handlers.
|
|
3
|
+
|
|
4
|
+
This module contains implementations for formatting media content
|
|
5
|
+
according to each provider's specific API requirements.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .openai_handler import OpenAIMediaHandler
|
|
9
|
+
from .anthropic_handler import AnthropicMediaHandler
|
|
10
|
+
from .local_handler import LocalMediaHandler
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'OpenAIMediaHandler',
|
|
14
|
+
'AnthropicMediaHandler',
|
|
15
|
+
'LocalMediaHandler'
|
|
16
|
+
]
|