abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/extractor.py +33 -100
  3. abstractcore/apps/intent.py +19 -0
  4. abstractcore/apps/judge.py +20 -1
  5. abstractcore/apps/summarizer.py +20 -1
  6. abstractcore/architectures/detection.py +34 -1
  7. abstractcore/architectures/response_postprocessing.py +313 -0
  8. abstractcore/assets/architecture_formats.json +38 -8
  9. abstractcore/assets/model_capabilities.json +781 -160
  10. abstractcore/compression/__init__.py +1 -2
  11. abstractcore/compression/glyph_processor.py +6 -4
  12. abstractcore/config/main.py +31 -19
  13. abstractcore/config/manager.py +389 -11
  14. abstractcore/config/vision_config.py +5 -5
  15. abstractcore/core/interface.py +151 -3
  16. abstractcore/core/session.py +16 -10
  17. abstractcore/download.py +1 -1
  18. abstractcore/embeddings/manager.py +20 -6
  19. abstractcore/endpoint/__init__.py +2 -0
  20. abstractcore/endpoint/app.py +458 -0
  21. abstractcore/mcp/client.py +3 -1
  22. abstractcore/media/__init__.py +52 -17
  23. abstractcore/media/auto_handler.py +42 -22
  24. abstractcore/media/base.py +44 -1
  25. abstractcore/media/capabilities.py +12 -33
  26. abstractcore/media/enrichment.py +105 -0
  27. abstractcore/media/handlers/anthropic_handler.py +19 -28
  28. abstractcore/media/handlers/local_handler.py +124 -70
  29. abstractcore/media/handlers/openai_handler.py +19 -31
  30. abstractcore/media/processors/__init__.py +4 -2
  31. abstractcore/media/processors/audio_processor.py +57 -0
  32. abstractcore/media/processors/office_processor.py +8 -3
  33. abstractcore/media/processors/pdf_processor.py +46 -3
  34. abstractcore/media/processors/text_processor.py +22 -24
  35. abstractcore/media/processors/video_processor.py +58 -0
  36. abstractcore/media/types.py +97 -4
  37. abstractcore/media/utils/image_scaler.py +20 -2
  38. abstractcore/media/utils/video_frames.py +219 -0
  39. abstractcore/media/vision_fallback.py +136 -22
  40. abstractcore/processing/__init__.py +32 -3
  41. abstractcore/processing/basic_deepsearch.py +15 -10
  42. abstractcore/processing/basic_intent.py +3 -2
  43. abstractcore/processing/basic_judge.py +3 -2
  44. abstractcore/processing/basic_summarizer.py +1 -1
  45. abstractcore/providers/__init__.py +3 -1
  46. abstractcore/providers/anthropic_provider.py +95 -8
  47. abstractcore/providers/base.py +1516 -81
  48. abstractcore/providers/huggingface_provider.py +546 -69
  49. abstractcore/providers/lmstudio_provider.py +35 -923
  50. abstractcore/providers/mlx_provider.py +382 -35
  51. abstractcore/providers/model_capabilities.py +5 -1
  52. abstractcore/providers/ollama_provider.py +99 -15
  53. abstractcore/providers/openai_compatible_provider.py +406 -180
  54. abstractcore/providers/openai_provider.py +188 -44
  55. abstractcore/providers/openrouter_provider.py +76 -0
  56. abstractcore/providers/registry.py +61 -5
  57. abstractcore/providers/streaming.py +138 -33
  58. abstractcore/providers/vllm_provider.py +92 -817
  59. abstractcore/server/app.py +461 -13
  60. abstractcore/server/audio_endpoints.py +139 -0
  61. abstractcore/server/vision_endpoints.py +1319 -0
  62. abstractcore/structured/handler.py +316 -41
  63. abstractcore/tools/common_tools.py +5501 -2012
  64. abstractcore/tools/comms_tools.py +1641 -0
  65. abstractcore/tools/core.py +37 -7
  66. abstractcore/tools/handler.py +4 -9
  67. abstractcore/tools/parser.py +49 -2
  68. abstractcore/tools/tag_rewriter.py +2 -1
  69. abstractcore/tools/telegram_tdlib.py +407 -0
  70. abstractcore/tools/telegram_tools.py +261 -0
  71. abstractcore/utils/cli.py +1085 -72
  72. abstractcore/utils/token_utils.py +2 -0
  73. abstractcore/utils/truncation.py +29 -0
  74. abstractcore/utils/version.py +3 -4
  75. abstractcore/utils/vlm_token_calculator.py +12 -2
  76. abstractcore-2.11.2.dist-info/METADATA +562 -0
  77. abstractcore-2.11.2.dist-info/RECORD +133 -0
  78. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
  79. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
  80. abstractcore-2.9.1.dist-info/METADATA +0 -1190
  81. abstractcore-2.9.1.dist-info/RECORD +0 -119
  82. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
  83. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
  MLX provider implementation for Apple Silicon.
3
3
  """
4
4
 
5
+ import json
5
6
  import time
6
7
  from typing import List, Dict, Any, Optional, Union, Iterator, Type
7
8
 
@@ -50,21 +51,297 @@ class MLXProvider(BaseProvider):
50
51
  self.tokenizer = None
51
52
  self._load_model()
52
53
 
54
+ def supports_prompt_cache(self) -> bool:
55
+ """MLX supports KV prompt caches via `mlx_lm.models.cache`."""
56
+ return True
57
+
58
+ def _prompt_cache_backend_create(self) -> Optional[Any]:
59
+ try:
60
+ from mlx_lm.models.cache import make_prompt_cache
61
+ except Exception:
62
+ return None
63
+ try:
64
+ return make_prompt_cache(self.llm)
65
+ except Exception:
66
+ return None
67
+
68
+ def _prompt_cache_backend_clone(self, cache_value: Any) -> Optional[Any]:
69
+ """Best-effort deep clone of an MLX prompt cache."""
70
+ if cache_value is None:
71
+ return None
72
+
73
+ def _clone_layer(layer: Any) -> Any:
74
+ if hasattr(layer, "state") and hasattr(layer.__class__, "from_state"):
75
+ try:
76
+ return layer.__class__.from_state(layer.state())
77
+ except Exception:
78
+ return None
79
+ if hasattr(layer, "copy"):
80
+ try:
81
+ return layer.copy()
82
+ except Exception:
83
+ return None
84
+ return None
85
+
86
+ # MLX-LM prompt caches are typically a list of per-layer KVCache objects.
87
+ if isinstance(cache_value, list):
88
+ cloned: List[Any] = []
89
+ for layer in cache_value:
90
+ c = _clone_layer(layer)
91
+ if c is None:
92
+ return None
93
+ cloned.append(c)
94
+ return cloned
95
+
96
+ if isinstance(cache_value, tuple):
97
+ cloned_layers: List[Any] = []
98
+ for layer in cache_value:
99
+ c = _clone_layer(layer)
100
+ if c is None:
101
+ return None
102
+ cloned_layers.append(c)
103
+ return tuple(cloned_layers)
104
+
105
+ # Fallback: single cache object.
106
+ return _clone_layer(cache_value)
107
+
108
+ def _prompt_cache_backend_token_count(self, cache_value: Any) -> Optional[int]:
109
+ if cache_value is None:
110
+ return 0
111
+ try:
112
+ if isinstance(cache_value, (list, tuple)):
113
+ for layer in cache_value:
114
+ if hasattr(layer, "size"):
115
+ try:
116
+ s = int(layer.size())
117
+ except Exception:
118
+ s = None
119
+ if isinstance(s, int) and s > 0:
120
+ return s
121
+ if hasattr(layer, "offset"):
122
+ try:
123
+ off = int(getattr(layer, "offset", 0))
124
+ except Exception:
125
+ off = 0
126
+ if off > 0:
127
+ return off
128
+ return 0
129
+ except Exception:
130
+ pass
131
+ return None
132
+
133
+ def _build_prompt_fragment(
134
+ self,
135
+ *,
136
+ prompt: str = "",
137
+ messages: Optional[List[Dict[str, Any]]] = None,
138
+ system_prompt: Optional[str] = None,
139
+ tools: Optional[List[Dict[str, Any]]] = None,
140
+ add_generation_prompt: bool = False,
141
+ ) -> str:
142
+ """Build a prompt fragment intended to be appended to an existing prompt_cache."""
143
+
144
+ final_system_prompt = system_prompt
145
+ if tools and self.tool_handler.supports_prompted:
146
+ include_tool_list = True
147
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
148
+ include_tool_list = False
149
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
150
+ if final_system_prompt:
151
+ final_system_prompt += f"\n\n{tool_prompt}"
152
+ else:
153
+ final_system_prompt = tool_prompt
154
+
155
+ def _as_text(val: Any) -> str:
156
+ if val is None:
157
+ return ""
158
+ if isinstance(val, str):
159
+ return val
160
+ try:
161
+ return json.dumps(val, ensure_ascii=False)
162
+ except Exception:
163
+ return str(val)
164
+
165
+ is_qwen = "qwen" in self.model.lower()
166
+ parts: List[str] = []
167
+
168
+ if final_system_prompt:
169
+ if is_qwen:
170
+ parts.append(f"<|im_start|>system\n{final_system_prompt}<|im_end|>\n")
171
+ else:
172
+ parts.append(f"{final_system_prompt.strip()}\n\n")
173
+
174
+ if messages:
175
+ for msg in messages:
176
+ if not isinstance(msg, dict):
177
+ continue
178
+ role = str(msg.get("role") or "user")
179
+ content = _as_text(msg.get("content"))
180
+ if is_qwen:
181
+ parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
182
+ else:
183
+ parts.append(f"{role}: {content}\n")
184
+
185
+ if isinstance(prompt, str) and prompt:
186
+ if is_qwen:
187
+ parts.append(f"<|im_start|>user\n{prompt}<|im_end|>\n")
188
+ else:
189
+ parts.append(f"user: {prompt}\n")
190
+
191
+ if add_generation_prompt:
192
+ parts.append("<|im_start|>assistant\n" if is_qwen else "assistant:")
193
+
194
+ return "".join(parts)
195
+
196
+ def _prompt_cache_backend_append(
197
+ self,
198
+ cache_value: Any,
199
+ *,
200
+ prompt: str = "",
201
+ messages: Optional[List[Dict[str, Any]]] = None,
202
+ system_prompt: Optional[str] = None,
203
+ tools: Optional[List[Dict[str, Any]]] = None,
204
+ add_generation_prompt: bool = False,
205
+ **kwargs,
206
+ ) -> bool:
207
+ _ = kwargs
208
+ if cache_value is None:
209
+ return False
210
+
211
+ fragment = self._build_prompt_fragment(
212
+ prompt=str(prompt or ""),
213
+ messages=messages,
214
+ system_prompt=system_prompt,
215
+ tools=tools,
216
+ add_generation_prompt=bool(add_generation_prompt),
217
+ )
218
+ if not fragment:
219
+ return True
220
+
221
+ try:
222
+ from mlx_lm.models.cache import trim_prompt_cache
223
+ except Exception:
224
+ trim_prompt_cache = None
225
+
226
+ # Best-effort prefill: MLX-LM generates at least one token; trim it to end exactly at the fragment boundary.
227
+ generated = 0
228
+ try:
229
+ gen = self.stream_generate_fn(
230
+ self.llm,
231
+ self.tokenizer,
232
+ prompt=fragment,
233
+ prompt_cache=cache_value,
234
+ max_tokens=1,
235
+ )
236
+ for _chunk in gen:
237
+ generated += 1
238
+ except TypeError:
239
+ try:
240
+ gen = self.stream_generate_fn(
241
+ self.llm,
242
+ self.tokenizer,
243
+ fragment,
244
+ prompt_cache=cache_value,
245
+ max_tokens=1,
246
+ )
247
+ for _chunk in gen:
248
+ generated += 1
249
+ except Exception:
250
+ return False
251
+ except Exception:
252
+ return False
253
+
254
+ if trim_prompt_cache is not None and generated > 0:
255
+ try:
256
+ trim_prompt_cache(cache_value, generated)
257
+ except Exception:
258
+ pass
259
+
260
+ return True
261
+
262
+ def prompt_cache_set(
263
+ self,
264
+ key: str,
265
+ *,
266
+ make_default: bool = True,
267
+ warm_prompt: Optional[str] = None,
268
+ ttl_s: Optional[float] = None,
269
+ **kwargs,
270
+ ) -> bool:
271
+ """Create/reset a prompt cache for the given key (best-effort)."""
272
+ _ = kwargs
273
+ normalized = self._normalize_prompt_cache_key(key)
274
+ if normalized is None:
275
+ return False
276
+ if not super().prompt_cache_set(normalized, make_default=make_default):
277
+ return False
278
+
279
+ try:
280
+ from mlx_lm.models.cache import make_prompt_cache, trim_prompt_cache
281
+ except Exception:
282
+ return False
283
+
284
+ cache_obj = make_prompt_cache(self.llm)
285
+
286
+ # Best-effort warm: MLX-LM always generates at least 1 token, so we trim it back.
287
+ if isinstance(warm_prompt, str) and warm_prompt.strip():
288
+ try:
289
+ gen = self.stream_generate_fn(
290
+ self.llm,
291
+ self.tokenizer,
292
+ prompt=warm_prompt,
293
+ prompt_cache=cache_obj,
294
+ max_tokens=1,
295
+ )
296
+ for _ in gen:
297
+ break
298
+ try:
299
+ trim_prompt_cache(cache_obj, 1)
300
+ except Exception:
301
+ pass
302
+ except Exception:
303
+ pass
304
+
305
+ try:
306
+ self._prompt_cache_store.set(normalized, cache_obj, ttl_s=ttl_s, meta={"backend": "mlx"})
307
+ except Exception:
308
+ return False
309
+ return True
310
+
53
311
  def _load_model(self):
54
312
  """Load MLX model and tokenizer"""
55
313
  try:
56
314
  from mlx_lm import load, generate, stream_generate
57
- import sys
315
+ import mlx.core as mx
58
316
  import os
59
317
  from contextlib import redirect_stdout, redirect_stderr
318
+ from pathlib import Path
319
+
320
+ # Upstream compatibility: mlx-lm may call `mx.metal.device_info()` which is deprecated in recent MLX.
321
+ # Patch the deprecated entrypoint to the supported API so the warning is fixed (not silenced).
322
+ try:
323
+ if hasattr(mx, "device_info") and hasattr(mx, "metal") and hasattr(mx.metal, "device_info"):
324
+ mx.metal.device_info = mx.device_info # type: ignore[attr-defined]
325
+ except Exception:
326
+ pass
60
327
 
61
328
  # Clean model name - remove trailing slashes that cause HuggingFace validation errors
62
329
  clean_model_name = self.model.rstrip('/')
63
330
 
331
+ # Prefer an existing local directory (including LM Studio's cache) over a remote HF repo id.
332
+ load_target: str = clean_model_name
333
+ explicit_path = Path(clean_model_name).expanduser()
334
+ if explicit_path.is_dir():
335
+ load_target = str(explicit_path)
336
+ else:
337
+ lmstudio_path = Path.home() / ".lmstudio" / "models" / clean_model_name
338
+ if lmstudio_path.is_dir():
339
+ load_target = str(lmstudio_path)
340
+
64
341
  # Silence the "Fetching" progress bar by redirecting stdout/stderr
65
342
  with open(os.devnull, 'w') as devnull:
66
343
  with redirect_stdout(devnull), redirect_stderr(devnull):
67
- self.llm, self.tokenizer = load(clean_model_name)
344
+ self.llm, self.tokenizer = load(load_target)
68
345
 
69
346
  self.generate_fn = generate
70
347
  self.stream_generate_fn = stream_generate
@@ -80,7 +357,7 @@ class MLXProvider(BaseProvider):
80
357
  else:
81
358
  raise Exception(f"Failed to load MLX model {self.model}: {str(e)}")
82
359
 
83
- def unload(self) -> None:
360
+ def unload_model(self, model_name: str) -> None:
84
361
  """
85
362
  Unload the MLX model from memory.
86
363
 
@@ -180,7 +457,7 @@ class MLXProvider(BaseProvider):
180
457
  # Check if Outlines is required but unavailable
181
458
  if self.structured_output_method == "native_outlines" and not OUTLINES_AVAILABLE:
182
459
  return GenerateResponse(
183
- content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install abstractcore[mlx]",
460
+ content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install \"abstractcore[mlx]\"",
184
461
  model=self.model,
185
462
  finish_reason="error"
186
463
  )
@@ -228,6 +505,7 @@ class MLXProvider(BaseProvider):
228
505
 
229
506
  # Handle media content first if present
230
507
  processed_prompt = prompt
508
+ media_enrichment = None
231
509
  if media:
232
510
  try:
233
511
  from ..media.handlers import LocalMediaHandler
@@ -235,6 +513,7 @@ class MLXProvider(BaseProvider):
235
513
 
236
514
  # Create multimodal message combining text and media
237
515
  multimodal_message = media_handler.create_multimodal_message(prompt, media)
516
+ media_enrichment = getattr(media_handler, "media_enrichment", None)
238
517
 
239
518
  # For MLX (local provider), we get text-embedded content
240
519
  if isinstance(multimodal_message, str):
@@ -253,7 +532,7 @@ class MLXProvider(BaseProvider):
253
532
  else:
254
533
  processed_prompt = str(multimodal_message["content"])
255
534
  except ImportError:
256
- self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
535
+ self.logger.warning("Media processing not available. Install with: pip install \"abstractcore[media]\"")
257
536
  except Exception as e:
258
537
  self.logger.warning(f"Failed to process media content: {e}")
259
538
 
@@ -263,15 +542,37 @@ class MLXProvider(BaseProvider):
263
542
  # MLX generation parameters using unified system
264
543
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
265
544
  max_tokens = self._get_provider_max_tokens_param(generation_kwargs)
266
- temperature = kwargs.get("temperature", self.temperature)
545
+ temperature = generation_kwargs.get("temperature", self.temperature)
267
546
  top_p = kwargs.get("top_p", 0.9)
268
- seed_value = kwargs.get("seed", self.seed)
547
+ seed_value = generation_kwargs.get("seed")
548
+ prompt_cache = None
549
+ prompt_cache_key = kwargs.get("prompt_cache_key")
550
+ if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
551
+ prompt_cache = self._prompt_cache_store.get(prompt_cache_key.strip())
552
+ if prompt_cache is None:
553
+ self.prompt_cache_set(prompt_cache_key.strip(), make_default=False)
554
+ prompt_cache = self._prompt_cache_store.get(prompt_cache_key.strip())
269
555
 
270
556
  try:
271
557
  if stream:
272
- return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
558
+ return self._stream_generate_with_tools(
559
+ full_prompt,
560
+ max_tokens,
561
+ temperature,
562
+ top_p,
563
+ tools,
564
+ kwargs.get('tool_call_tags'),
565
+ seed_value,
566
+ prompt_cache,
567
+ )
273
568
  else:
274
- response = self._single_generate(full_prompt, max_tokens, temperature, top_p, seed_value)
569
+ response = self._single_generate(
570
+ full_prompt, max_tokens, temperature, top_p, seed_value, prompt_cache
571
+ )
572
+ if media_enrichment:
573
+ from ..media.enrichment import merge_enrichment_metadata
574
+
575
+ response.metadata = merge_enrichment_metadata(response.metadata, media_enrichment)
275
576
 
276
577
  # Handle tool execution for prompted models
277
578
  if tools and self.tool_handler.supports_prompted and response.content:
@@ -334,7 +635,15 @@ class MLXProvider(BaseProvider):
334
635
 
335
636
  return full_prompt
336
637
 
337
- def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
638
+ def _single_generate(
639
+ self,
640
+ prompt: str,
641
+ max_tokens: int,
642
+ temperature: float,
643
+ top_p: float,
644
+ seed: Optional[int] = None,
645
+ prompt_cache: Optional[Any] = None,
646
+ ) -> GenerateResponse:
338
647
  """Generate single response"""
339
648
 
340
649
  # Handle seed parameter (MLX supports seed via mx.random.seed)
@@ -354,7 +663,8 @@ class MLXProvider(BaseProvider):
354
663
  self.tokenizer,
355
664
  prompt=prompt,
356
665
  max_tokens=max_tokens,
357
- verbose=False
666
+ verbose=False,
667
+ prompt_cache=prompt_cache,
358
668
  )
359
669
  except TypeError:
360
670
  try:
@@ -398,7 +708,16 @@ class MLXProvider(BaseProvider):
398
708
  "completion_tokens": output_tokens
399
709
  }
400
710
 
401
- def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
711
+ def _stream_generate(
712
+ self,
713
+ prompt: str,
714
+ max_tokens: int,
715
+ temperature: float,
716
+ top_p: float,
717
+ tool_call_tags: Optional[str] = None,
718
+ seed: Optional[int] = None,
719
+ prompt_cache: Optional[Any] = None,
720
+ ) -> Iterator[GenerateResponse]:
402
721
  """Generate real streaming response using MLX stream_generate with tool tag rewriting support"""
403
722
  try:
404
723
  # Handle seed parameter (MLX supports seed via mx.random.seed)
@@ -422,7 +741,8 @@ class MLXProvider(BaseProvider):
422
741
  self.llm,
423
742
  self.tokenizer,
424
743
  prompt,
425
- max_tokens=max_tokens
744
+ max_tokens=max_tokens,
745
+ prompt_cache=prompt_cache,
426
746
  ):
427
747
  # Each response has a .text attribute with the new token(s)
428
748
  content = response.text
@@ -462,16 +782,25 @@ class MLXProvider(BaseProvider):
462
782
  return kwargs.get("max_output_tokens", self.max_output_tokens)
463
783
 
464
784
 
465
- def _stream_generate_with_tools(self, full_prompt: str, max_tokens: int,
466
- temperature: float, top_p: float,
467
- tools: Optional[List[Dict[str, Any]]] = None,
468
- tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
785
+ def _stream_generate_with_tools(
786
+ self,
787
+ full_prompt: str,
788
+ max_tokens: int,
789
+ temperature: float,
790
+ top_p: float,
791
+ tools: Optional[List[Dict[str, Any]]] = None,
792
+ tool_call_tags: Optional[str] = None,
793
+ seed: Optional[int] = None,
794
+ prompt_cache: Optional[Any] = None,
795
+ ) -> Iterator[GenerateResponse]:
469
796
  """Stream generate with tool execution at the end"""
470
797
  collected_content = ""
471
798
 
472
799
  # Stream the response content
473
- for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags, seed):
474
- collected_content += chunk.content
800
+ for chunk in self._stream_generate(
801
+ full_prompt, max_tokens, temperature, top_p, tool_call_tags, seed, prompt_cache
802
+ ):
803
+ collected_content += chunk.content or ""
475
804
  yield chunk
476
805
 
477
806
  # Handle tool execution if we have tools and content
@@ -498,7 +827,11 @@ class MLXProvider(BaseProvider):
498
827
  @classmethod
499
828
  def list_available_models(cls, **kwargs) -> List[str]:
500
829
  """
501
- List available MLX models from HuggingFace cache.
830
+ List available MLX models from local caches.
831
+
832
+ This includes:
833
+ - HuggingFace hub cache (~/.cache/huggingface/hub) for any repo containing "mlx"
834
+ - LM Studio cache (~/.lmstudio/models) for any org/model containing "mlx"
502
835
 
503
836
  Args:
504
837
  **kwargs: Optional parameters including:
@@ -512,22 +845,36 @@ class MLXProvider(BaseProvider):
512
845
  from .model_capabilities import filter_models_by_capabilities
513
846
 
514
847
  try:
515
- hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
516
- if not hf_cache.exists():
517
- return []
518
-
519
- models = []
520
- for item in hf_cache.iterdir():
521
- if item.is_dir() and item.name.startswith("models--"):
522
- # Convert models--mlx-community--Qwen3-Coder-30B-A3B-Instruct-4bit to mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit
523
- model_name = item.name.replace("models--", "").replace("--", "/")
848
+ model_set = set()
524
849
 
525
- # Include ANY model with "mlx" in the name (case-insensitive)
526
- # This captures: mlx-community/*, */mlx-*, *-mlx-*, etc.
527
- if "mlx" in model_name.lower():
528
- models.append(model_name)
529
-
530
- models = sorted(models)
850
+ hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
851
+ if hf_cache.exists():
852
+ for item in hf_cache.iterdir():
853
+ if item.is_dir() and item.name.startswith("models--"):
854
+ # Convert models--mlx-community--Qwen3-Coder-30B-A3B-Instruct-4bit to mlx-community/Qwen3-Coder-30B-A3B-Instruct-4bit
855
+ model_name = item.name.replace("models--", "").replace("--", "/")
856
+
857
+ # Include ANY model with "mlx" in the name (case-insensitive)
858
+ # This captures: mlx-community/*, */mlx-*, *-mlx-*, etc.
859
+ if "mlx" in model_name.lower():
860
+ model_set.add(model_name)
861
+
862
+ lmstudio_models = Path.home() / ".lmstudio" / "models"
863
+ if lmstudio_models.exists():
864
+ # LM Studio stores models under: ~/.lmstudio/models/<org>/<model>/*
865
+ for org_dir in lmstudio_models.iterdir():
866
+ if not org_dir.is_dir():
867
+ continue
868
+ # These org folders are MLX by design (model names may not include "mlx")
869
+ include_all_in_org = org_dir.name.lower() in {"mlx-community", "lmstudio-community"}
870
+ for model_dir in org_dir.iterdir():
871
+ if not model_dir.is_dir():
872
+ continue
873
+ model_name = f"{org_dir.name}/{model_dir.name}"
874
+ if include_all_in_org or "mlx" in model_name.lower():
875
+ model_set.add(model_name)
876
+
877
+ models = sorted(model_set)
531
878
 
532
879
  # Apply new capability filtering if provided
533
880
  input_capabilities = kwargs.get('input_capabilities')
@@ -134,7 +134,11 @@ def get_model_input_capabilities(model_name: str) -> List[ModelInputCapability]:
134
134
  if capabilities.get("audio_support", False):
135
135
  input_caps.append(ModelInputCapability.AUDIO)
136
136
 
137
- if capabilities.get("video_support", False):
137
+ video_mode = capabilities.get("video_input_mode")
138
+ if isinstance(video_mode, str) and video_mode.strip().lower() in {"frames", "native"}:
139
+ input_caps.append(ModelInputCapability.VIDEO)
140
+ elif capabilities.get("video_support", False):
141
+ # Backwards compatibility: legacy boolean indicates native video support.
138
142
  input_caps.append(ModelInputCapability.VIDEO)
139
143
 
140
144
  return input_caps