abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +781 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +31 -19
- abstractcore/config/manager.py +389 -11
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +35 -923
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +461 -13
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.2.dist-info/METADATA +562 -0
- abstractcore-2.11.2.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
MLX provider implementation for Apple Silicon.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import json
|
|
5
6
|
import time
|
|
6
7
|
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
7
8
|
|
|
@@ -50,21 +51,297 @@ class MLXProvider(BaseProvider):
|
|
|
50
51
|
self.tokenizer = None
|
|
51
52
|
self._load_model()
|
|
52
53
|
|
|
54
|
+
def supports_prompt_cache(self) -> bool:
|
|
55
|
+
"""MLX supports KV prompt caches via `mlx_lm.models.cache`."""
|
|
56
|
+
return True
|
|
57
|
+
|
|
58
|
+
def _prompt_cache_backend_create(self) -> Optional[Any]:
|
|
59
|
+
try:
|
|
60
|
+
from mlx_lm.models.cache import make_prompt_cache
|
|
61
|
+
except Exception:
|
|
62
|
+
return None
|
|
63
|
+
try:
|
|
64
|
+
return make_prompt_cache(self.llm)
|
|
65
|
+
except Exception:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def _prompt_cache_backend_clone(self, cache_value: Any) -> Optional[Any]:
|
|
69
|
+
"""Best-effort deep clone of an MLX prompt cache."""
|
|
70
|
+
if cache_value is None:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
def _clone_layer(layer: Any) -> Any:
|
|
74
|
+
if hasattr(layer, "state") and hasattr(layer.__class__, "from_state"):
|
|
75
|
+
try:
|
|
76
|
+
return layer.__class__.from_state(layer.state())
|
|
77
|
+
except Exception:
|
|
78
|
+
return None
|
|
79
|
+
if hasattr(layer, "copy"):
|
|
80
|
+
try:
|
|
81
|
+
return layer.copy()
|
|
82
|
+
except Exception:
|
|
83
|
+
return None
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
# MLX-LM prompt caches are typically a list of per-layer KVCache objects.
|
|
87
|
+
if isinstance(cache_value, list):
|
|
88
|
+
cloned: List[Any] = []
|
|
89
|
+
for layer in cache_value:
|
|
90
|
+
c = _clone_layer(layer)
|
|
91
|
+
if c is None:
|
|
92
|
+
return None
|
|
93
|
+
cloned.append(c)
|
|
94
|
+
return cloned
|
|
95
|
+
|
|
96
|
+
if isinstance(cache_value, tuple):
|
|
97
|
+
cloned_layers: List[Any] = []
|
|
98
|
+
for layer in cache_value:
|
|
99
|
+
c = _clone_layer(layer)
|
|
100
|
+
if c is None:
|
|
101
|
+
return None
|
|
102
|
+
cloned_layers.append(c)
|
|
103
|
+
return tuple(cloned_layers)
|
|
104
|
+
|
|
105
|
+
# Fallback: single cache object.
|
|
106
|
+
return _clone_layer(cache_value)
|
|
107
|
+
|
|
108
|
+
def _prompt_cache_backend_token_count(self, cache_value: Any) -> Optional[int]:
|
|
109
|
+
if cache_value is None:
|
|
110
|
+
return 0
|
|
111
|
+
try:
|
|
112
|
+
if isinstance(cache_value, (list, tuple)):
|
|
113
|
+
for layer in cache_value:
|
|
114
|
+
if hasattr(layer, "size"):
|
|
115
|
+
try:
|
|
116
|
+
s = int(layer.size())
|
|
117
|
+
except Exception:
|
|
118
|
+
s = None
|
|
119
|
+
if isinstance(s, int) and s > 0:
|
|
120
|
+
return s
|
|
121
|
+
if hasattr(layer, "offset"):
|
|
122
|
+
try:
|
|
123
|
+
off = int(getattr(layer, "offset", 0))
|
|
124
|
+
except Exception:
|
|
125
|
+
off = 0
|
|
126
|
+
if off > 0:
|
|
127
|
+
return off
|
|
128
|
+
return 0
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
def _build_prompt_fragment(
|
|
134
|
+
self,
|
|
135
|
+
*,
|
|
136
|
+
prompt: str = "",
|
|
137
|
+
messages: Optional[List[Dict[str, Any]]] = None,
|
|
138
|
+
system_prompt: Optional[str] = None,
|
|
139
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
140
|
+
add_generation_prompt: bool = False,
|
|
141
|
+
) -> str:
|
|
142
|
+
"""Build a prompt fragment intended to be appended to an existing prompt_cache."""
|
|
143
|
+
|
|
144
|
+
final_system_prompt = system_prompt
|
|
145
|
+
if tools and self.tool_handler.supports_prompted:
|
|
146
|
+
include_tool_list = True
|
|
147
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
148
|
+
include_tool_list = False
|
|
149
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
150
|
+
if final_system_prompt:
|
|
151
|
+
final_system_prompt += f"\n\n{tool_prompt}"
|
|
152
|
+
else:
|
|
153
|
+
final_system_prompt = tool_prompt
|
|
154
|
+
|
|
155
|
+
def _as_text(val: Any) -> str:
|
|
156
|
+
if val is None:
|
|
157
|
+
return ""
|
|
158
|
+
if isinstance(val, str):
|
|
159
|
+
return val
|
|
160
|
+
try:
|
|
161
|
+
return json.dumps(val, ensure_ascii=False)
|
|
162
|
+
except Exception:
|
|
163
|
+
return str(val)
|
|
164
|
+
|
|
165
|
+
is_qwen = "qwen" in self.model.lower()
|
|
166
|
+
parts: List[str] = []
|
|
167
|
+
|
|
168
|
+
if final_system_prompt:
|
|
169
|
+
if is_qwen:
|
|
170
|
+
parts.append(f"<|im_start|>system\n{final_system_prompt}<|im_end|>\n")
|
|
171
|
+
else:
|
|
172
|
+
parts.append(f"{final_system_prompt.strip()}\n\n")
|
|
173
|
+
|
|
174
|
+
if messages:
|
|
175
|
+
for msg in messages:
|
|
176
|
+
if not isinstance(msg, dict):
|
|
177
|
+
continue
|
|
178
|
+
role = str(msg.get("role") or "user")
|
|
179
|
+
content = _as_text(msg.get("content"))
|
|
180
|
+
if is_qwen:
|
|
181
|
+
parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
|
|
182
|
+
else:
|
|
183
|
+
parts.append(f"{role}: {content}\n")
|
|
184
|
+
|
|
185
|
+
if isinstance(prompt, str) and prompt:
|
|
186
|
+
if is_qwen:
|
|
187
|
+
parts.append(f"<|im_start|>user\n{prompt}<|im_end|>\n")
|
|
188
|
+
else:
|
|
189
|
+
parts.append(f"user: {prompt}\n")
|
|
190
|
+
|
|
191
|
+
if add_generation_prompt:
|
|
192
|
+
parts.append("<|im_start|>assistant\n" if is_qwen else "assistant:")
|
|
193
|
+
|
|
194
|
+
return "".join(parts)
|
|
195
|
+
|
|
196
|
+
def _prompt_cache_backend_append(
|
|
197
|
+
self,
|
|
198
|
+
cache_value: Any,
|
|
199
|
+
*,
|
|
200
|
+
prompt: str = "",
|
|
201
|
+
messages: Optional[List[Dict[str, Any]]] = None,
|
|
202
|
+
system_prompt: Optional[str] = None,
|
|
203
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
204
|
+
add_generation_prompt: bool = False,
|
|
205
|
+
**kwargs,
|
|
206
|
+
) -> bool:
|
|
207
|
+
_ = kwargs
|
|
208
|
+
if cache_value is None:
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
fragment = self._build_prompt_fragment(
|
|
212
|
+
prompt=str(prompt or ""),
|
|
213
|
+
messages=messages,
|
|
214
|
+
system_prompt=system_prompt,
|
|
215
|
+
tools=tools,
|
|
216
|
+
add_generation_prompt=bool(add_generation_prompt),
|
|
217
|
+
)
|
|
218
|
+
if not fragment:
|
|
219
|
+
return True
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
from mlx_lm.models.cache import trim_prompt_cache
|
|
223
|
+
except Exception:
|
|
224
|
+
trim_prompt_cache = None
|
|
225
|
+
|
|
226
|
+
# Best-effort prefill: MLX-LM generates at least one token; trim it to end exactly at the fragment boundary.
|
|
227
|
+
generated = 0
|
|
228
|
+
try:
|
|
229
|
+
gen = self.stream_generate_fn(
|
|
230
|
+
self.llm,
|
|
231
|
+
self.tokenizer,
|
|
232
|
+
prompt=fragment,
|
|
233
|
+
prompt_cache=cache_value,
|
|
234
|
+
max_tokens=1,
|
|
235
|
+
)
|
|
236
|
+
for _chunk in gen:
|
|
237
|
+
generated += 1
|
|
238
|
+
except TypeError:
|
|
239
|
+
try:
|
|
240
|
+
gen = self.stream_generate_fn(
|
|
241
|
+
self.llm,
|
|
242
|
+
self.tokenizer,
|
|
243
|
+
fragment,
|
|
244
|
+
prompt_cache=cache_value,
|
|
245
|
+
max_tokens=1,
|
|
246
|
+
)
|
|
247
|
+
for _chunk in gen:
|
|
248
|
+
generated += 1
|
|
249
|
+
except Exception:
|
|
250
|
+
return False
|
|
251
|
+
except Exception:
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
if trim_prompt_cache is not None and generated > 0:
|
|
255
|
+
try:
|
|
256
|
+
trim_prompt_cache(cache_value, generated)
|
|
257
|
+
except Exception:
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
return True
|
|
261
|
+
|
|
262
|
+
def prompt_cache_set(
|
|
263
|
+
self,
|
|
264
|
+
key: str,
|
|
265
|
+
*,
|
|
266
|
+
make_default: bool = True,
|
|
267
|
+
warm_prompt: Optional[str] = None,
|
|
268
|
+
ttl_s: Optional[float] = None,
|
|
269
|
+
**kwargs,
|
|
270
|
+
) -> bool:
|
|
271
|
+
"""Create/reset a prompt cache for the given key (best-effort)."""
|
|
272
|
+
_ = kwargs
|
|
273
|
+
normalized = self._normalize_prompt_cache_key(key)
|
|
274
|
+
if normalized is None:
|
|
275
|
+
return False
|
|
276
|
+
if not super().prompt_cache_set(normalized, make_default=make_default):
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
from mlx_lm.models.cache import make_prompt_cache, trim_prompt_cache
|
|
281
|
+
except Exception:
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
cache_obj = make_prompt_cache(self.llm)
|
|
285
|
+
|
|
286
|
+
# Best-effort warm: MLX-LM always generates at least 1 token, so we trim it back.
|
|
287
|
+
if isinstance(warm_prompt, str) and warm_prompt.strip():
|
|
288
|
+
try:
|
|
289
|
+
gen = self.stream_generate_fn(
|
|
290
|
+
self.llm,
|
|
291
|
+
self.tokenizer,
|
|
292
|
+
prompt=warm_prompt,
|
|
293
|
+
prompt_cache=cache_obj,
|
|
294
|
+
max_tokens=1,
|
|
295
|
+
)
|
|
296
|
+
for _ in gen:
|
|
297
|
+
break
|
|
298
|
+
try:
|
|
299
|
+
trim_prompt_cache(cache_obj, 1)
|
|
300
|
+
except Exception:
|
|
301
|
+
pass
|
|
302
|
+
except Exception:
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
self._prompt_cache_store.set(normalized, cache_obj, ttl_s=ttl_s, meta={"backend": "mlx"})
|
|
307
|
+
except Exception:
|
|
308
|
+
return False
|
|
309
|
+
return True
|
|
310
|
+
|
|
53
311
|
def _load_model(self):
|
|
54
312
|
"""Load MLX model and tokenizer"""
|
|
55
313
|
try:
|
|
56
314
|
from mlx_lm import load, generate, stream_generate
|
|
57
|
-
import
|
|
315
|
+
import mlx.core as mx
|
|
58
316
|
import os
|
|
59
317
|
from contextlib import redirect_stdout, redirect_stderr
|
|
318
|
+
from pathlib import Path
|
|
319
|
+
|
|
320
|
+
# Upstream compatibility: mlx-lm may call `mx.metal.device_info()` which is deprecated in recent MLX.
|
|
321
|
+
# Patch the deprecated entrypoint to the supported API so the warning is fixed (not silenced).
|
|
322
|
+
try:
|
|
323
|
+
if hasattr(mx, "device_info") and hasattr(mx, "metal") and hasattr(mx.metal, "device_info"):
|
|
324
|
+
mx.metal.device_info = mx.device_info # type: ignore[attr-defined]
|
|
325
|
+
except Exception:
|
|
326
|
+
pass
|
|
60
327
|
|
|
61
328
|
# Clean model name - remove trailing slashes that cause HuggingFace validation errors
|
|
62
329
|
clean_model_name = self.model.rstrip('/')
|
|
63
330
|
|
|
331
|
+
# Prefer an existing local directory (including LM Studio's cache) over a remote HF repo id.
|
|
332
|
+
load_target: str = clean_model_name
|
|
333
|
+
explicit_path = Path(clean_model_name).expanduser()
|
|
334
|
+
if explicit_path.is_dir():
|
|
335
|
+
load_target = str(explicit_path)
|
|
336
|
+
else:
|
|
337
|
+
lmstudio_path = Path.home() / ".lmstudio" / "models" / clean_model_name
|
|
338
|
+
if lmstudio_path.is_dir():
|
|
339
|
+
load_target = str(lmstudio_path)
|
|
340
|
+
|
|
64
341
|
# Silence the "Fetching" progress bar by redirecting stdout/stderr
|
|
65
342
|
with open(os.devnull, 'w') as devnull:
|
|
66
343
|
with redirect_stdout(devnull), redirect_stderr(devnull):
|
|
67
|
-
self.llm, self.tokenizer = load(
|
|
344
|
+
self.llm, self.tokenizer = load(load_target)
|
|
68
345
|
|
|
69
346
|
self.generate_fn = generate
|
|
70
347
|
self.stream_generate_fn = stream_generate
|
|
@@ -80,7 +357,7 @@ class MLXProvider(BaseProvider):
|
|
|
80
357
|
else:
|
|
81
358
|
raise Exception(f"Failed to load MLX model {self.model}: {str(e)}")
|
|
82
359
|
|
|
83
|
-
def
|
|
360
|
+
def unload_model(self, model_name: str) -> None:
|
|
84
361
|
"""
|
|
85
362
|
Unload the MLX model from memory.
|
|
86
363
|
|
|
@@ -180,7 +457,7 @@ class MLXProvider(BaseProvider):
|
|
|
180
457
|
# Check if Outlines is required but unavailable
|
|
181
458
|
if self.structured_output_method == "native_outlines" and not OUTLINES_AVAILABLE:
|
|
182
459
|
return GenerateResponse(
|
|
183
|
-
content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install abstractcore[mlx]",
|
|
460
|
+
content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install \"abstractcore[mlx]\"",
|
|
184
461
|
model=self.model,
|
|
185
462
|
finish_reason="error"
|
|
186
463
|
)
|
|
@@ -228,6 +505,7 @@ class MLXProvider(BaseProvider):
|
|
|
228
505
|
|
|
229
506
|
# Handle media content first if present
|
|
230
507
|
processed_prompt = prompt
|
|
508
|
+
media_enrichment = None
|
|
231
509
|
if media:
|
|
232
510
|
try:
|
|
233
511
|
from ..media.handlers import LocalMediaHandler
|
|
@@ -235,6 +513,7 @@ class MLXProvider(BaseProvider):
|
|
|
235
513
|
|
|
236
514
|
# Create multimodal message combining text and media
|
|
237
515
|
multimodal_message = media_handler.create_multimodal_message(prompt, media)
|
|
516
|
+
media_enrichment = getattr(media_handler, "media_enrichment", None)
|
|
238
517
|
|
|
239
518
|
# For MLX (local provider), we get text-embedded content
|
|
240
519
|
if isinstance(multimodal_message, str):
|
|
@@ -253,7 +532,7 @@ class MLXProvider(BaseProvider):
|
|
|
253
532
|
else:
|
|
254
533
|
processed_prompt = str(multimodal_message["content"])
|
|
255
534
|
except ImportError:
|
|
256
|
-
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
535
|
+
self.logger.warning("Media processing not available. Install with: pip install \"abstractcore[media]\"")
|
|
257
536
|
except Exception as e:
|
|
258
537
|
self.logger.warning(f"Failed to process media content: {e}")
|
|
259
538
|
|
|
@@ -263,15 +542,37 @@ class MLXProvider(BaseProvider):
|
|
|
263
542
|
# MLX generation parameters using unified system
|
|
264
543
|
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
265
544
|
max_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
266
|
-
temperature =
|
|
545
|
+
temperature = generation_kwargs.get("temperature", self.temperature)
|
|
267
546
|
top_p = kwargs.get("top_p", 0.9)
|
|
268
|
-
seed_value =
|
|
547
|
+
seed_value = generation_kwargs.get("seed")
|
|
548
|
+
prompt_cache = None
|
|
549
|
+
prompt_cache_key = kwargs.get("prompt_cache_key")
|
|
550
|
+
if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
|
|
551
|
+
prompt_cache = self._prompt_cache_store.get(prompt_cache_key.strip())
|
|
552
|
+
if prompt_cache is None:
|
|
553
|
+
self.prompt_cache_set(prompt_cache_key.strip(), make_default=False)
|
|
554
|
+
prompt_cache = self._prompt_cache_store.get(prompt_cache_key.strip())
|
|
269
555
|
|
|
270
556
|
try:
|
|
271
557
|
if stream:
|
|
272
|
-
return self._stream_generate_with_tools(
|
|
558
|
+
return self._stream_generate_with_tools(
|
|
559
|
+
full_prompt,
|
|
560
|
+
max_tokens,
|
|
561
|
+
temperature,
|
|
562
|
+
top_p,
|
|
563
|
+
tools,
|
|
564
|
+
kwargs.get('tool_call_tags'),
|
|
565
|
+
seed_value,
|
|
566
|
+
prompt_cache,
|
|
567
|
+
)
|
|
273
568
|
else:
|
|
274
|
-
response = self._single_generate(
|
|
569
|
+
response = self._single_generate(
|
|
570
|
+
full_prompt, max_tokens, temperature, top_p, seed_value, prompt_cache
|
|
571
|
+
)
|
|
572
|
+
if media_enrichment:
|
|
573
|
+
from ..media.enrichment import merge_enrichment_metadata
|
|
574
|
+
|
|
575
|
+
response.metadata = merge_enrichment_metadata(response.metadata, media_enrichment)
|
|
275
576
|
|
|
276
577
|
# Handle tool execution for prompted models
|
|
277
578
|
if tools and self.tool_handler.supports_prompted and response.content:
|
|
@@ -334,7 +635,15 @@ class MLXProvider(BaseProvider):
|
|
|
334
635
|
|
|
335
636
|
return full_prompt
|
|
336
637
|
|
|
337
|
-
def _single_generate(
|
|
638
|
+
def _single_generate(
|
|
639
|
+
self,
|
|
640
|
+
prompt: str,
|
|
641
|
+
max_tokens: int,
|
|
642
|
+
temperature: float,
|
|
643
|
+
top_p: float,
|
|
644
|
+
seed: Optional[int] = None,
|
|
645
|
+
prompt_cache: Optional[Any] = None,
|
|
646
|
+
) -> GenerateResponse:
|
|
338
647
|
"""Generate single response"""
|
|
339
648
|
|
|
340
649
|
# Handle seed parameter (MLX supports seed via mx.random.seed)
|
|
@@ -354,7 +663,8 @@ class MLXProvider(BaseProvider):
|
|
|
354
663
|
self.tokenizer,
|
|
355
664
|
prompt=prompt,
|
|
356
665
|
max_tokens=max_tokens,
|
|
357
|
-
verbose=False
|
|
666
|
+
verbose=False,
|
|
667
|
+
prompt_cache=prompt_cache,
|
|
358
668
|
)
|
|
359
669
|
except TypeError:
|
|
360
670
|
try:
|
|
@@ -398,7 +708,16 @@ class MLXProvider(BaseProvider):
|
|
|
398
708
|
"completion_tokens": output_tokens
|
|
399
709
|
}
|
|
400
710
|
|
|
401
|
-
def _stream_generate(
|
|
711
|
+
def _stream_generate(
|
|
712
|
+
self,
|
|
713
|
+
prompt: str,
|
|
714
|
+
max_tokens: int,
|
|
715
|
+
temperature: float,
|
|
716
|
+
top_p: float,
|
|
717
|
+
tool_call_tags: Optional[str] = None,
|
|
718
|
+
seed: Optional[int] = None,
|
|
719
|
+
prompt_cache: Optional[Any] = None,
|
|
720
|
+
) -> Iterator[GenerateResponse]:
|
|
402
721
|
"""Generate real streaming response using MLX stream_generate with tool tag rewriting support"""
|
|
403
722
|
try:
|
|
404
723
|
# Handle seed parameter (MLX supports seed via mx.random.seed)
|
|
@@ -422,7 +741,8 @@ class MLXProvider(BaseProvider):
|
|
|
422
741
|
self.llm,
|
|
423
742
|
self.tokenizer,
|
|
424
743
|
prompt,
|
|
425
|
-
max_tokens=max_tokens
|
|
744
|
+
max_tokens=max_tokens,
|
|
745
|
+
prompt_cache=prompt_cache,
|
|
426
746
|
):
|
|
427
747
|
# Each response has a .text attribute with the new token(s)
|
|
428
748
|
content = response.text
|
|
@@ -462,16 +782,25 @@ class MLXProvider(BaseProvider):
|
|
|
462
782
|
return kwargs.get("max_output_tokens", self.max_output_tokens)
|
|
463
783
|
|
|
464
784
|
|
|
465
|
-
def _stream_generate_with_tools(
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
785
|
+
def _stream_generate_with_tools(
|
|
786
|
+
self,
|
|
787
|
+
full_prompt: str,
|
|
788
|
+
max_tokens: int,
|
|
789
|
+
temperature: float,
|
|
790
|
+
top_p: float,
|
|
791
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
792
|
+
tool_call_tags: Optional[str] = None,
|
|
793
|
+
seed: Optional[int] = None,
|
|
794
|
+
prompt_cache: Optional[Any] = None,
|
|
795
|
+
) -> Iterator[GenerateResponse]:
|
|
469
796
|
"""Stream generate with tool execution at the end"""
|
|
470
797
|
collected_content = ""
|
|
471
798
|
|
|
472
799
|
# Stream the response content
|
|
473
|
-
for chunk in self._stream_generate(
|
|
474
|
-
|
|
800
|
+
for chunk in self._stream_generate(
|
|
801
|
+
full_prompt, max_tokens, temperature, top_p, tool_call_tags, seed, prompt_cache
|
|
802
|
+
):
|
|
803
|
+
collected_content += chunk.content or ""
|
|
475
804
|
yield chunk
|
|
476
805
|
|
|
477
806
|
# Handle tool execution if we have tools and content
|
|
@@ -498,7 +827,11 @@ class MLXProvider(BaseProvider):
|
|
|
498
827
|
@classmethod
|
|
499
828
|
def list_available_models(cls, **kwargs) -> List[str]:
|
|
500
829
|
"""
|
|
501
|
-
List available MLX models from
|
|
830
|
+
List available MLX models from local caches.
|
|
831
|
+
|
|
832
|
+
This includes:
|
|
833
|
+
- HuggingFace hub cache (~/.cache/huggingface/hub) for any repo containing "mlx"
|
|
834
|
+
- LM Studio cache (~/.lmstudio/models) for any org/model containing "mlx"
|
|
502
835
|
|
|
503
836
|
Args:
|
|
504
837
|
**kwargs: Optional parameters including:
|
|
@@ -512,22 +845,36 @@ class MLXProvider(BaseProvider):
|
|
|
512
845
|
from .model_capabilities import filter_models_by_capabilities
|
|
513
846
|
|
|
514
847
|
try:
|
|
515
|
-
|
|
516
|
-
if not hf_cache.exists():
|
|
517
|
-
return []
|
|
518
|
-
|
|
519
|
-
models = []
|
|
520
|
-
for item in hf_cache.iterdir():
|
|
521
|
-
if item.is_dir() and item.name.startswith("models--"):
|
|
522
|
-
# Convert models--mlx-community--Qwen3-Coder-30B-A3B-Instruct-4bit to mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit
|
|
523
|
-
model_name = item.name.replace("models--", "").replace("--", "/")
|
|
848
|
+
model_set = set()
|
|
524
849
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
850
|
+
hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
|
|
851
|
+
if hf_cache.exists():
|
|
852
|
+
for item in hf_cache.iterdir():
|
|
853
|
+
if item.is_dir() and item.name.startswith("models--"):
|
|
854
|
+
# Convert models--mlx-community--Qwen3-Coder-30B-A3B-Instruct-4bit to mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit
|
|
855
|
+
model_name = item.name.replace("models--", "").replace("--", "/")
|
|
856
|
+
|
|
857
|
+
# Include ANY model with "mlx" in the name (case-insensitive)
|
|
858
|
+
# This captures: mlx-community/*, */mlx-*, *-mlx-*, etc.
|
|
859
|
+
if "mlx" in model_name.lower():
|
|
860
|
+
model_set.add(model_name)
|
|
861
|
+
|
|
862
|
+
lmstudio_models = Path.home() / ".lmstudio" / "models"
|
|
863
|
+
if lmstudio_models.exists():
|
|
864
|
+
# LM Studio stores models under: ~/.lmstudio/models/<org>/<model>/*
|
|
865
|
+
for org_dir in lmstudio_models.iterdir():
|
|
866
|
+
if not org_dir.is_dir():
|
|
867
|
+
continue
|
|
868
|
+
# These org folders are MLX by design (model names may not include "mlx")
|
|
869
|
+
include_all_in_org = org_dir.name.lower() in {"mlx-community", "lmstudio-community"}
|
|
870
|
+
for model_dir in org_dir.iterdir():
|
|
871
|
+
if not model_dir.is_dir():
|
|
872
|
+
continue
|
|
873
|
+
model_name = f"{org_dir.name}/{model_dir.name}"
|
|
874
|
+
if include_all_in_org or "mlx" in model_name.lower():
|
|
875
|
+
model_set.add(model_name)
|
|
876
|
+
|
|
877
|
+
models = sorted(model_set)
|
|
531
878
|
|
|
532
879
|
# Apply new capability filtering if provided
|
|
533
880
|
input_capabilities = kwargs.get('input_capabilities')
|
|
@@ -134,7 +134,11 @@ def get_model_input_capabilities(model_name: str) -> List[ModelInputCapability]:
|
|
|
134
134
|
if capabilities.get("audio_support", False):
|
|
135
135
|
input_caps.append(ModelInputCapability.AUDIO)
|
|
136
136
|
|
|
137
|
-
|
|
137
|
+
video_mode = capabilities.get("video_input_mode")
|
|
138
|
+
if isinstance(video_mode, str) and video_mode.strip().lower() in {"frames", "native"}:
|
|
139
|
+
input_caps.append(ModelInputCapability.VIDEO)
|
|
140
|
+
elif capabilities.get("video_support", False):
|
|
141
|
+
# Backwards compatibility: legacy boolean indicates native video support.
|
|
138
142
|
input_caps.append(ModelInputCapability.VIDEO)
|
|
139
143
|
|
|
140
144
|
return input_caps
|