PyPI - abstractcore - Versions diffs - 2.9.1__py3-none-any.whl → 2.11.4__py3-none-any.whl - Mend

abstractcore 2.9.1py3-none-any.whl → 2.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

abstractcore/__init__.py +7 -27
abstractcore/apps/deepsearch.py +9 -4
abstractcore/apps/extractor.py +33 -100
abstractcore/apps/intent.py +19 -0
abstractcore/apps/judge.py +20 -1
abstractcore/apps/summarizer.py +20 -1
abstractcore/architectures/detection.py +34 -1
abstractcore/architectures/response_postprocessing.py +313 -0
abstractcore/assets/architecture_formats.json +38 -8
abstractcore/assets/model_capabilities.json +882 -160
abstractcore/compression/__init__.py +1 -2
abstractcore/compression/glyph_processor.py +6 -4
abstractcore/config/main.py +52 -20
abstractcore/config/manager.py +390 -12
abstractcore/config/vision_config.py +5 -5
abstractcore/core/interface.py +151 -3
abstractcore/core/session.py +16 -10
abstractcore/download.py +1 -1
abstractcore/embeddings/manager.py +20 -6
abstractcore/endpoint/__init__.py +2 -0
abstractcore/endpoint/app.py +458 -0
abstractcore/mcp/client.py +3 -1
abstractcore/media/__init__.py +52 -17
abstractcore/media/auto_handler.py +42 -22
abstractcore/media/base.py +44 -1
abstractcore/media/capabilities.py +12 -33
abstractcore/media/enrichment.py +105 -0
abstractcore/media/handlers/anthropic_handler.py +19 -28
abstractcore/media/handlers/local_handler.py +124 -70
abstractcore/media/handlers/openai_handler.py +19 -31
abstractcore/media/processors/__init__.py +4 -2
abstractcore/media/processors/audio_processor.py +57 -0
abstractcore/media/processors/office_processor.py +8 -3
abstractcore/media/processors/pdf_processor.py +46 -3
abstractcore/media/processors/text_processor.py +22 -24
abstractcore/media/processors/video_processor.py +58 -0
abstractcore/media/types.py +97 -4
abstractcore/media/utils/image_scaler.py +20 -2
abstractcore/media/utils/video_frames.py +219 -0
abstractcore/media/vision_fallback.py +136 -22
abstractcore/processing/__init__.py +32 -3
abstractcore/processing/basic_deepsearch.py +15 -10
abstractcore/processing/basic_intent.py +3 -2
abstractcore/processing/basic_judge.py +3 -2
abstractcore/processing/basic_summarizer.py +1 -1
abstractcore/providers/__init__.py +3 -1
abstractcore/providers/anthropic_provider.py +95 -8
abstractcore/providers/base.py +1516 -81
abstractcore/providers/huggingface_provider.py +546 -69
abstractcore/providers/lmstudio_provider.py +30 -916
abstractcore/providers/mlx_provider.py +382 -35
abstractcore/providers/model_capabilities.py +5 -1
abstractcore/providers/ollama_provider.py +99 -15
abstractcore/providers/openai_compatible_provider.py +406 -180
abstractcore/providers/openai_provider.py +188 -44
abstractcore/providers/openrouter_provider.py +76 -0
abstractcore/providers/registry.py +61 -5
abstractcore/providers/streaming.py +138 -33
abstractcore/providers/vllm_provider.py +92 -817
abstractcore/server/app.py +478 -28
abstractcore/server/audio_endpoints.py +139 -0
abstractcore/server/vision_endpoints.py +1319 -0
abstractcore/structured/handler.py +316 -41
abstractcore/tools/common_tools.py +5501 -2012
abstractcore/tools/comms_tools.py +1641 -0
abstractcore/tools/core.py +37 -7
abstractcore/tools/handler.py +4 -9
abstractcore/tools/parser.py +49 -2
abstractcore/tools/tag_rewriter.py +2 -1
abstractcore/tools/telegram_tdlib.py +407 -0
abstractcore/tools/telegram_tools.py +261 -0
abstractcore/utils/cli.py +1085 -72
abstractcore/utils/structured_logging.py +29 -8
abstractcore/utils/token_utils.py +2 -0
abstractcore/utils/truncation.py +29 -0
abstractcore/utils/version.py +3 -4
abstractcore/utils/vlm_token_calculator.py +12 -2
abstractcore-2.11.4.dist-info/METADATA +562 -0
abstractcore-2.11.4.dist-info/RECORD +133 -0
{abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/WHEEL +1 -1
{abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/entry_points.txt +1 -0
abstractcore-2.9.1.dist-info/METADATA +0 -1190
abstractcore-2.9.1.dist-info/RECORD +0 -119
{abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/top_level.txt +0 -0

abstractcore/assets/model_capabilities.json CHANGED Viewed

@@ -11,7 +11,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-4",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "none"
     },
     "gpt-4-turbo": {
       "max_output_tokens": 4096,
@@ -30,7 +32,9 @@
       "aliases": [
         "gpt-4-turbo-preview"
       ],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-4-turbo-with-vision": {
       "max_output_tokens": 4096,
@@ -50,7 +54,9 @@
         "gpt-4-turbo-vision",
         "gpt-4-vision-preview"
       ],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-4o": {
       "max_output_tokens": 16384,
@@ -59,8 +65,8 @@
       "parallel_tools": true,
       "max_tools": -1,
       "vision_support": true,
-      "audio_support": true,
-      "video_support": true,
+      "audio_support": false,
+      "video_support": false,
       "image_resolutions": [
         "variable"
       ],
@@ -80,7 +86,8 @@
       "source": "OpenAI official docs 2025",
       "canonical_name": "gpt-4o",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_input_mode": "frames"
     },
     "gpt-4o-long-output": {
       "max_output_tokens": 64000,
@@ -89,12 +96,14 @@
       "parallel_tools": true,
       "max_tools": -1,
       "vision_support": true,
-      "audio_support": true,
+      "audio_support": false,
       "notes": "16x output capacity variant",
       "source": "OpenAI official docs",
       "canonical_name": "gpt-4o-long-output",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-4o-mini": {
       "max_output_tokens": 16000,
@@ -103,11 +112,13 @@
       "parallel_tools": true,
       "max_tools": -1,
       "vision_support": true,
-      "audio_support": true,
+      "audio_support": false,
       "source": "OpenAI official docs",
       "canonical_name": "gpt-4o-mini",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-3.5-turbo": {
       "max_output_tokens": 4096,
@@ -120,7 +131,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-3.5-turbo",
       "aliases": [],
-      "max_tokens": 16385
+      "max_tokens": 16385,
+      "video_support": false,
+      "video_input_mode": "none"
     },
     "o1": {
       "max_output_tokens": 32768,
@@ -133,7 +146,10 @@
       "source": "OpenAI official docs",
       "canonical_name": "o1",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "o1-mini": {
       "max_output_tokens": 65536,
@@ -145,7 +161,10 @@
       "source": "OpenAI official docs",
       "canonical_name": "o1-mini",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "o3": {
       "max_output_tokens": 32768,
@@ -159,7 +178,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "o3",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "none"
     },
     "o3-mini": {
       "max_output_tokens": 32768,
@@ -173,7 +194,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "o3-mini",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "video_input_mode": "none"
     },
     "claude-3.5-sonnet": {
       "max_output_tokens": 8192,
@@ -196,7 +219,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-3.5-sonnet",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-3.7-sonnet": {
       "max_output_tokens": 128000,
@@ -213,7 +238,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-3.7-sonnet",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-3.5-haiku": {
       "max_output_tokens": 8192,
@@ -232,7 +259,9 @@
       "aliases": [
         "claude-3-5-haiku-20241022"
       ],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-3-opus": {
       "max_output_tokens": 4096,
@@ -248,7 +277,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-3-opus",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-3-sonnet": {
       "max_output_tokens": 4096,
@@ -264,7 +295,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-3-sonnet",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-3-haiku": {
       "max_output_tokens": 4096,
@@ -280,7 +313,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-3-haiku",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-haiku-4-5": {
       "max_output_tokens": 64000,
@@ -300,7 +335,9 @@
         "claude-haiku-4-5-20251001",
         "anthropic/claude-haiku-4-5"
       ],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-4-opus": {
       "max_output_tokens": 4096,
@@ -317,7 +354,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-4-opus",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-4.1-opus": {
       "max_output_tokens": 4096,
@@ -334,7 +373,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-4.1-opus",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-4-sonnet": {
       "max_output_tokens": 8192,
@@ -351,7 +392,9 @@
       "source": "Anthropic official docs",
       "canonical_name": "claude-4-sonnet",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-4.5-sonnet": {
       "max_output_tokens": 64000,
@@ -372,7 +415,9 @@
         "claude-sonnet-4-5-20250929",
         "anthropic/claude-sonnet-4-5"
       ],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "claude-opus-4-5": {
       "max_output_tokens": 64000,
@@ -392,7 +437,110 @@
         "claude-opus-4-5-20251101",
         "anthropic/claude-opus-4-5"
       ],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
+    },
+    "cogito:3b": {
+      "max_output_tokens": 4096,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "notes": "Cogito v1 preview (Llama-based) 3B; 128k context; tool calling capable. Max output tokens not specified in model card (conservative 2048 default).",
+      "source": "DeepCogito HF model card + Ollama library",
+      "canonical_name": "cogito:3b",
+      "aliases": [
+        "cogito",
+        "cogito:3b-v1-preview-llama-q8_0",
+        "cogito-v1-preview-llama-3B",
+        "deepcogito/cogito-v1-preview-llama-3B"
+      ],
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "cogito:8b": {
+      "max_output_tokens": 4096,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "notes": "Cogito v1 preview (Llama-based) 8B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
+      "source": "DeepCogito HF model card + Ollama library",
+      "canonical_name": "cogito:8b",
+      "aliases": [
+        "cogito:8b-v1-preview-llama",
+        "cogito-v1-preview-llama-8B",
+        "deepcogito/cogito-v1-preview-llama-8B"
+      ],
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "cogito:14b": {
+      "max_output_tokens": 4096,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "notes": "Cogito v1 preview (Qwen2-based) 14B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
+      "source": "DeepCogito HF model card + Ollama library",
+      "canonical_name": "cogito:14b",
+      "aliases": [
+        "cogito:14b-v1-preview-qwen",
+        "cogito-v1-preview-qwen-14B",
+        "deepcogito/cogito-v1-preview-qwen-14B"
+      ],
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "cogito:32b": {
+      "max_output_tokens": 4096,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "notes": "Cogito v1 preview (Qwen-based) 32B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
+      "source": "DeepCogito HF model card + Ollama library",
+      "canonical_name": "cogito:32b",
+      "aliases": [
+        "cogito:32b-v1-preview-qwen",
+        "cogito-v1-preview-qwen-32B",
+        "deepcogito/cogito-v1-preview-qwen-32B"
+      ],
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "cogito:70b": {
+      "max_output_tokens": 4096,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "notes": "Cogito v1 preview (Llama 3.1-based) 70B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
+      "source": "DeepCogito HF model card + Ollama library",
+      "canonical_name": "cogito:70b",
+      "aliases": [
+        "cogito:70b-v1-preview-llama",
+        "cogito-v1-preview-llama-70B",
+        "deepcogito/cogito-v1-preview-llama-70B"
+      ],
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-3.2-1b": {
       "max_output_tokens": 2048,
@@ -405,7 +553,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.2-1b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-3.2-3b": {
       "max_output_tokens": 2048,
@@ -418,7 +569,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.2-3b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-3.2-11b-vision": {
       "max_output_tokens": 2048,
@@ -434,7 +588,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.2-11b-vision",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama-3.3-70b": {
       "max_output_tokens": 8192,
@@ -447,7 +604,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.3-70b",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-3.1-8b": {
       "max_output_tokens": 8192,
@@ -460,7 +620,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.1-8b",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-3.1-70b": {
       "max_output_tokens": 8192,
@@ -473,7 +636,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.1-70b",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-3.1-405b": {
       "max_output_tokens": 8192,
@@ -486,7 +652,10 @@
       "source": "Meta official docs",
       "canonical_name": "llama-3.1-405b",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-4": {
       "max_output_tokens": 8192,
@@ -499,7 +668,29 @@
       "source": "Meta announcement",
       "canonical_name": "llama-4",
       "aliases": [],
-      "max_tokens": 10000000
+      "max_tokens": 10000000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
+    },
+    "llava-next-video-7b-hf": {
+      "max_output_tokens": 1024,
+      "tool_support": "prompted",
+      "structured_output": "prompted",
+      "parallel_tools": false,
+      "vision_support": true,
+      "video_support": true,
+      "audio_support": false,
+      "notes": "LLaVA-NeXT-Video 7B (HuggingFace Transformers video-input VLM)",
+      "source": "HuggingFace model card / Transformers docs",
+      "canonical_name": "llava-next-video-7b-hf",
+      "aliases": [
+        "llava-hf/LLaVA-NeXT-Video-7B-hf",
+        "LLaVA-NeXT-Video-7B-hf"
+      ],
+      "max_tokens": 10250,
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "qwen2.5-0.5b": {
       "max_output_tokens": 8192,
@@ -512,7 +703,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-0.5b",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2.5-1.5b": {
       "max_output_tokens": 8192,
@@ -525,7 +719,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-1.5b",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2.5-3b": {
       "max_output_tokens": 8192,
@@ -538,7 +735,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-3b",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2.5-7b": {
       "max_output_tokens": 8192,
@@ -551,7 +751,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-7b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2.5-14b": {
       "max_output_tokens": 8192,
@@ -564,7 +767,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-14b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2.5-32b": {
       "max_output_tokens": 8192,
@@ -577,7 +783,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-32b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2.5-72b": {
       "max_output_tokens": 8192,
@@ -590,7 +799,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-72b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-0.6b": {
       "max_output_tokens": 8192,
@@ -600,11 +812,15 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 base model with thinking capabilities",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-0.6b",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-1.7b": {
       "max_output_tokens": 8192,
@@ -614,11 +830,15 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 1.7B model with thinking capabilities",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-1.7b",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-4b": {
       "max_output_tokens": 8192,
@@ -628,11 +848,54 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 4B model with extended context via YaRN scaling",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-4b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "qwen3-4b-2507": {
+      "max_output_tokens": 8192,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": false,
+      "notes": "Qwen3-4B-2507 non-thinking instruct variant. Supports only non-thinking mode; does not generate <think></think> blocks.",
+      "source": "LM Studio model card (Qwen/Qwen3-4B-2507) and Qwen3 2507 release notes",
+      "canonical_name": "qwen3-4b-2507",
+      "aliases": [
+        "qwen/qwen3-4b-2507"
+      ],
+      "max_tokens": 262144,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "qwen3-4b-thinking-2507": {
+      "max_output_tokens": 8192,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": true,
+      "thinking_output_field": "reasoning_content",
+      "notes": "Qwen3-4B-Thinking-2507 thinking-only variant. The decoded output often contains only the closing </think> tag, with the opening <think> supplied by the chat template.",
+      "source": "LM Studio model card (Qwen/Qwen3-4B-Thinking-2507) and Qwen3 Thinking 2507 docs",
+      "canonical_name": "qwen3-4b-thinking-2507",
+      "aliases": [
+        "qwen/qwen3-4b-thinking-2507"
+      ],
+      "max_tokens": 262144,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-32b": {
       "max_output_tokens": 8192,
@@ -642,11 +905,107 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 32B model with advanced thinking capabilities",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-32b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "sera-32b": {
+      "max_output_tokens": 8192,
+      "tool_support": "prompted",
+      "structured_output": "prompted",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": true,
+      "thinking_control": "/no_think",
+      "notes": "AllenAI SERA-32B coding agent model (Qwen3-32B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
+      "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-32B)",
+      "canonical_name": "sera-32b",
+      "aliases": [
+        "allenai/SERA-32B",
+        "SERA-32B",
+        "SERA32B",
+        "SERA_32B"
+      ],
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "sera-32b-ga": {
+      "max_output_tokens": 8192,
+      "tool_support": "prompted",
+      "structured_output": "prompted",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": true,
+      "thinking_control": "/no_think",
+      "notes": "AllenAI SERA-32B-GA coding agent model (Qwen3-32B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
+      "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-32B-GA)",
+      "canonical_name": "sera-32b-ga",
+      "aliases": [
+        "allenai/SERA-32B-GA",
+        "SERA-32B-GA",
+        "SERA32BGA",
+        "SERA_32B_GA"
+      ],
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "sera-8b": {
+      "max_output_tokens": 8192,
+      "tool_support": "prompted",
+      "structured_output": "prompted",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": true,
+      "thinking_control": "/no_think",
+      "notes": "AllenAI SERA-8B coding agent model (Qwen3-8B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
+      "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-8B)",
+      "canonical_name": "sera-8b",
+      "aliases": [
+        "allenai/SERA-8B",
+        "SERA-8B",
+        "SERA8B",
+        "SERA_8B"
+      ],
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "sera-8b-ga": {
+      "max_output_tokens": 8192,
+      "tool_support": "prompted",
+      "structured_output": "prompted",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": true,
+      "thinking_control": "/no_think",
+      "notes": "AllenAI SERA-8B-GA coding agent model (Qwen3-8B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
+      "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-8B-GA)",
+      "canonical_name": "sera-8b-ga",
+      "aliases": [
+        "allenai/SERA-8B-GA",
+        "SERA-8B-GA",
+        "SERA8BGA",
+        "SERA_8B_GA"
+      ],
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-30b-a3b": {
       "max_output_tokens": 8192,
@@ -656,11 +1015,15 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 MoE model with 4-bit precision, 30B total/3B active parameters",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-30b-a3b",
       "aliases": [],
-      "max_tokens": 40960
+      "max_tokens": 40960,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-30b-a3b-2507": {
       "max_output_tokens": 8192,
@@ -676,7 +1039,10 @@
       "aliases": [
         "qwen/qwen3-30b-a3b-2507"
       ],
-      "max_tokens": 262144
+      "max_tokens": 262144,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-coder-30b": {
       "max_output_tokens": 65536,
@@ -698,7 +1064,10 @@
         "qwen3-coder-30b-a3b",
         "qwen3-coder-30b-a3b-instruct"
       ],
-      "max_tokens": 262144
+      "max_tokens": 262144,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen2-vl": {
       "max_output_tokens": 8192,
@@ -713,7 +1082,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2-vl",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "qwen2.5-vl": {
       "max_output_tokens": 8192,
@@ -729,7 +1101,10 @@
       "source": "Alibaba official docs",
       "canonical_name": "qwen2.5-vl",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "phi-2": {
       "max_output_tokens": 2048,
@@ -741,7 +1116,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-2",
       "aliases": [],
-      "max_tokens": 2048
+      "max_tokens": 2048,
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "none"
     },
     "phi-3-mini": {
       "max_output_tokens": 4096,
@@ -754,7 +1132,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-3-mini",
       "aliases": [],
-      "max_tokens": 4096
+      "max_tokens": 4096,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "phi-3-small": {
       "max_output_tokens": 8192,
@@ -766,7 +1147,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-3-small",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "phi-3-medium": {
       "max_output_tokens": 4096,
@@ -778,7 +1162,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-3-medium",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "phi-3.5-mini": {
       "max_output_tokens": 4096,
@@ -790,7 +1177,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-3.5-mini",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "phi-3.5-moe": {
       "max_output_tokens": 4096,
@@ -803,7 +1193,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-3.5-moe",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "phi-3-vision": {
       "max_output_tokens": 4096,
@@ -818,7 +1211,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-3-vision",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "phi-4": {
       "max_output_tokens": 16000,
@@ -831,7 +1227,10 @@
       "source": "Microsoft official docs",
       "canonical_name": "phi-4",
       "aliases": [],
-      "max_tokens": 16000
+      "max_tokens": 16000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "mistral-7b": {
       "max_output_tokens": 8192,
@@ -843,7 +1242,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "mistral-7b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "mixtral-8x7b": {
       "max_output_tokens": 32768,
@@ -856,7 +1258,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "mixtral-8x7b",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "mixtral-8x22b": {
       "max_output_tokens": 65536,
@@ -868,7 +1273,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "mixtral-8x22b",
       "aliases": [],
-      "max_tokens": 65536
+      "max_tokens": 65536,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "mistral-small": {
       "max_output_tokens": 32768,
@@ -880,7 +1288,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "mistral-small",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "mistral-medium": {
       "max_output_tokens": 32768,
@@ -892,7 +1303,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "mistral-medium",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "mistral-large": {
       "max_output_tokens": 128000,
@@ -904,7 +1318,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "mistral-large",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "codestral": {
       "max_output_tokens": 32768,
@@ -917,7 +1334,10 @@
       "source": "Mistral AI docs",
       "canonical_name": "codestral",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "magistral-small-2509": {
       "max_output_tokens": 8192,
@@ -937,7 +1357,9 @@
       "aliases": [
         "mistralai/magistral-small-2509"
       ],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "Qwen/Qwen3-VL-8B-Instruct-FP8": {
       "max_output_tokens": 8192,
@@ -961,7 +1383,9 @@
         "qwen3-vl-8b-fp8",
         "qwen3-vl-8b-instruct-fp8"
       ],
-      "max_tokens": 262144
+      "max_tokens": 262144,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama3.2-vision:11b": {
       "max_output_tokens": 4096,
@@ -1012,7 +1436,9 @@
         "llama3.2-vision-11b",
         "llama-3.2-vision:11b"
       ],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama3.2-vision:70b": {
       "max_output_tokens": 4096,
@@ -1038,7 +1464,9 @@
         "llama3.2-vision-70b",
         "llama-3.2-vision:70b"
       ],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama3.2-vision:90b": {
       "max_output_tokens": 4096,
@@ -1064,7 +1492,9 @@
         "llama3.2-vision-90b",
         "llama-3.2-vision:90b"
       ],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "gemma-2b": {
       "max_output_tokens": 8192,
@@ -1076,7 +1506,10 @@
       "source": "Google docs",
       "canonical_name": "gemma-2b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "none"
     },
     "gemma-7b": {
       "max_output_tokens": 8192,
@@ -1088,7 +1521,10 @@
       "source": "Google docs",
       "canonical_name": "gemma-7b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "none"
     },
     "gemma2-9b": {
       "max_output_tokens": 8192,
@@ -1100,7 +1536,10 @@
       "source": "Google docs",
       "canonical_name": "gemma2-9b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "gemma2-27b": {
       "max_output_tokens": 8192,
@@ -1112,7 +1551,10 @@
       "source": "Google docs",
       "canonical_name": "gemma2-27b",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "gemma3": {
       "max_output_tokens": 8192,
@@ -1125,7 +1567,10 @@
       "source": "Google docs",
       "canonical_name": "gemma3",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "codegemma": {
       "max_output_tokens": 8192,
@@ -1138,7 +1583,10 @@
       "source": "Google docs",
       "canonical_name": "codegemma",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "none"
     },
     "paligemma": {
       "max_output_tokens": 1024,
@@ -1156,7 +1604,10 @@
       "source": "Google docs",
       "canonical_name": "paligemma",
       "aliases": [],
-      "max_tokens": 8192
+      "max_tokens": 8192,
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "frames"
     },
     "glm-4": {
       "max_output_tokens": 4096,
@@ -1169,7 +1620,10 @@
       "source": "Model documentation",
       "canonical_name": "glm-4",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "glm-4-9b": {
       "max_output_tokens": 4096,
@@ -1182,7 +1636,10 @@
       "source": "Model documentation",
       "canonical_name": "glm-4-9b",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "glm-4-9b-0414-4bit": {
       "max_output_tokens": 4096,
@@ -1195,7 +1652,10 @@
       "source": "Model documentation",
       "canonical_name": "glm-4-9b-0414-4bit",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "deepseek-r1": {
       "max_output_tokens": 8192,
@@ -1208,7 +1668,10 @@
       "source": "MLX community",
       "canonical_name": "deepseek-r1",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3": {
       "max_output_tokens": 8192,
@@ -1221,7 +1684,10 @@
       "source": "MLX community",
       "canonical_name": "qwen3",
       "aliases": [],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-14b": {
       "max_output_tokens": 8192,
@@ -1231,11 +1697,15 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 14B model with thinking capabilities",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-14b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-next-80b-a3b": {
       "max_output_tokens": 16384,
@@ -1249,9 +1719,13 @@
       "source": "Alibaba Qwen3-Next technical report",
       "canonical_name": "qwen3-next-80b-a3b",
       "aliases": [
-        "qwen/qwen3-next-80b"
+        "qwen/qwen3-next-80b",
+        "qwen3-next-80b"
       ],
-      "max_tokens": 262144
+      "max_tokens": 262144,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "gpt-5": {
       "max_output_tokens": 8192,
@@ -1265,7 +1739,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-5",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-5-turbo": {
       "max_output_tokens": 4096,
@@ -1279,7 +1755,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-5-turbo",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-5-pro": {
       "max_output_tokens": 16384,
@@ -1293,7 +1771,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-5-pro",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-5-mini": {
       "max_output_tokens": 8192,
@@ -1307,7 +1787,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-5-mini",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "gpt-5-vision": {
       "max_output_tokens": 8192,
@@ -1324,7 +1806,9 @@
       "source": "OpenAI official docs",
       "canonical_name": "gpt-5-vision",
       "aliases": [],
-      "max_tokens": 200000
+      "max_tokens": 200000,
+      "video_support": false,
+      "video_input_mode": "frames"
     },
     "qwen3-8b": {
       "max_output_tokens": 8192,
@@ -1334,11 +1818,15 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 8B model with thinking capabilities",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-8b",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-235b-a22b": {
       "max_output_tokens": 8192,
@@ -1348,11 +1836,15 @@
       "vision_support": false,
       "audio_support": false,
       "thinking_support": true,
+      "thinking_control": "/no_think",
       "notes": "Qwen3 MoE model with 4-bit precision, 235B total/22B active parameters",
       "source": "Alibaba Qwen3 technical report",
       "canonical_name": "qwen3-235b-a22b",
       "aliases": [],
-      "max_tokens": 40960
+      "max_tokens": 40960,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-vl": {
       "max_output_tokens": 8192,
@@ -1372,7 +1864,9 @@
       "source": "Alibaba Qwen3-VL technical report",
       "canonical_name": "qwen3-vl",
       "aliases": [],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "qwen2.5-vl-7b": {
       "max_output_tokens": 8192,
@@ -1400,8 +1894,11 @@
         "qwen/qwen2.5-vl-7b",
         "unsloth/Qwen2.5-VL-7B-Instruct-GGUF"
       ],
-      "max_tokens": 128000
-    },
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
+    },
     "gemma3-4b": {
       "max_output_tokens": 8192,
       "tool_support": "native",
@@ -1429,7 +1926,9 @@
       "aliases": [
         "gemma3:4b"
       ],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "qwen2.5vl:7b": {
       "max_output_tokens": 8192,
@@ -1451,7 +1950,10 @@
       "aliases": [
         "qwen2.5vl"
       ],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "gemma3:4b-it-qat": {
       "max_output_tokens": 8192,
@@ -1472,7 +1974,9 @@
       "source": "Ollama model library",
       "canonical_name": "gemma3:4b-it-qat",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "gemma3n:e4b": {
       "max_output_tokens": 8192,
@@ -1501,7 +2005,9 @@
         "gemma3n:e2b:latest",
         "gemma3n:e2b"
       ],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "seed-oss": {
       "max_output_tokens": 8192,
@@ -1516,7 +2022,10 @@
       "source": "ByteDance SEED-OSS documentation",
       "canonical_name": "seed-oss",
       "aliases": [],
-      "max_tokens": 524288
+      "max_tokens": 524288,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "glm-4.5": {
       "max_output_tokens": 4096,
@@ -1530,7 +2039,10 @@
       "source": "Zhipu AI GLM-4.5 announcement",
       "canonical_name": "glm-4.5",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "glm-4.6": {
       "max_output_tokens": 4096,
@@ -1548,7 +2060,10 @@
         "zai-org/GLM-4.6-FP8",
         "glm-4.6-fp8"
       ],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "glm-4.5-air": {
       "max_output_tokens": 4096,
@@ -1562,7 +2077,55 @@
       "source": "Zhipu AI GLM-4.5-Air announcement",
       "canonical_name": "glm-4.5-air",
       "aliases": [],
-      "max_tokens": 128000
+      "max_tokens": 128000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
+    },
+    "glm-4.7-flash": {
+      "max_output_tokens": 131072,
+      "tool_support": "native",
+      "structured_output": "native",
+      "parallel_tools": true,
+      "vision_support": false,
+      "audio_support": false,
+      "thinking_support": true,
+      "thinking_modes": [
+        "interleaved_thinking",
+        "preserved_thinking",
+        "turn_level_thinking"
+      ],
+      "architecture": "mixture_of_experts",
+      "total_parameters": "30B",
+      "active_parameters": "3B",
+      "experts": 64,
+      "shared_experts": 1,
+      "experts_activated": 4,
+      "tensor_type": "BF16",
+      "attention_mechanism": "grouped_query_attention",
+      "positional_encoding": "rope",
+      "transformer_layers": 47,
+      "agentic_coding": true,
+      "ui_generation": true,
+      "notes": "GLM-4.7-Flash lightweight MoE model (30B total/3B active, 64 routed experts + 1 shared/4 activated) optimized for high-speed agentic coding and complex reasoning. Features Interleaved Thinking (reasoning before actions), Preserved Thinking (cross-turn consistency), and Turn-level Thinking (per-turn toggle). BF16 precision (~62.5GB). Compatible with vLLM, SGLang, and Hugging Face Transformers. Strong performance on SWE-bench Verified (59.2%), AIME 25 (91.6%), \u03c4\u00b2-Bench (79.5%), GPQA (75.2%), HLE (14.4%). MIT license. Recommended: temp 0.7 for coding, 0 for tool tasks.",
+      "source": "HuggingFace zai-org/GLM-4.7-Flash official model card",
+      "canonical_name": "glm-4.7-flash",
+      "aliases": [
+        "zai-org/glm-4.7-flash",
+        "z-ai/glm-4.7-flash"
+      ],
+      "max_tokens": 128000,
+      "license": "MIT",
+      "inference_parameters": {
+        "temperature": 1.0,
+        "top_p": 0.95,
+        "max_new_tokens": 131072,
+        "coding_temp": 0.7,
+        "agentic_temp": 0.0
+      },
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "llama-4-109b": {
       "max_output_tokens": 8192,
@@ -1575,7 +2138,10 @@
       "source": "Meta LLaMA 4 announcement",
       "canonical_name": "llama-4-109b",
       "aliases": [],
-      "max_tokens": 10000000
+      "max_tokens": 10000000,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "granite3.2:2b": {
       "max_output_tokens": 8192,
@@ -1590,7 +2156,10 @@
       "aliases": [
         "granite3.2-2b"
       ],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "granite3.2:8b": {
       "max_output_tokens": 8192,
@@ -1605,7 +2174,10 @@
       "aliases": [
         "granite3.2-8b"
       ],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "granite3.2-vision:2b": {
       "max_output_tokens": 8192,
@@ -1631,7 +2203,9 @@
         "granite-vision",
         "ibm-granite-vision"
       ],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "gemini-2.5-flash": {
       "max_output_tokens": 8192,
@@ -1657,7 +2231,8 @@
       "aliases": [
         "gemini-2.5-flash-001"
       ],
-      "max_tokens": 1000000
+      "max_tokens": 1000000,
+      "video_input_mode": "native"
     },
     "gemini-2.5-pro": {
       "max_output_tokens": 65536,
@@ -1673,7 +2248,7 @@
         "448x448",
         "1024x1024"
       ],
-      "max_image_resolution":  "768x768",
+      "max_image_resolution": "768x768",
       "image_tokenization_method": "gemini_vision_encoder",
       "thinking_support": true,
       "thinking_budget": true,
@@ -1683,7 +2258,8 @@
       "aliases": [
         "gemini-2.5-pro-001"
       ],
-      "max_tokens": 1048576
+      "max_tokens": 1048576,
+      "video_input_mode": "native"
     },
     "granite3.3:2b": {
       "max_output_tokens": 8192,
@@ -1698,7 +2274,10 @@
       "aliases": [
         "granite3.3-2b"
       ],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "granite3.3:8b": {
       "max_output_tokens": 8192,
@@ -1713,7 +2292,10 @@
       "aliases": [
         "granite3.3-8b"
       ],
-      "max_tokens": 32768
+      "max_tokens": 32768,
+      "video_support": false,
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "embeddinggemma:300m": {
       "max_output_tokens": 0,
@@ -1729,7 +2311,42 @@
         "google/embeddinggemma-300m"
       ],
       "max_tokens": 8192,
-      "model_type": "embedding"
+      "model_type": "embedding",
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "none"
+    },
+    "nomic-embed-text-v1.5": {
+      "max_output_tokens": 0,
+      "tool_support": "none",
+      "structured_output": "none",
+      "parallel_tools": false,
+      "vision_support": false,
+      "audio_support": false,
+      "notes": "Nomic Embed v1.5 text embedding model (Matryoshka). Embedding dims commonly used: 768 (and truncation-friendly sizes like 512/256/128/64). Not a text-generative model.",
+      "source": "Nomic AI documentation + HuggingFace model card",
+      "canonical_name": "nomic-embed-text-v1.5",
+      "aliases": [
+        "nomic-ai/nomic-embed-text-v1.5",
+        "nomic-embed-text-v1.5",
+        "text-embedding-nomic-embed-text-v1.5",
+        "text-embedding-nomic-embed-text-v1.5@q6_k",
+        "nomic-embed-text-v1.5@q6_k"
+      ],
+      "max_tokens": 8192,
+      "model_type": "embedding",
+      "embedding_dimension": 768,
+      "matryoshka_dims": [
+        768,
+        512,
+        256,
+        128,
+        64
+      ],
+      "embedding_support": true,
+      "video_support": false,
+      "max_tools": 0,
+      "video_input_mode": "none"
     },
     "blip-image-captioning-base": {
       "max_output_tokens": 512,
@@ -1754,7 +2371,9 @@
       "aliases": [
         "Salesforce/blip-image-captioning-base"
       ],
-      "max_tokens": 2048
+      "max_tokens": 2048,
+      "max_tools": 0,
+      "video_input_mode": "frames"
     },
     "glyph": {
       "max_output_tokens": 8192,
@@ -1780,7 +2399,7 @@
       "conversation_template": {
         "system_prefix": "<|system|>\n",
         "system_suffix": "\n",
-        "user_prefix": "<|user|>\n",
+        "user_prefix": "<|user|>\n",
         "user_suffix": "\n",
         "assistant_prefix": "<|assistant|>\n",
         "assistant_suffix": "\n"
@@ -1798,7 +2417,9 @@
       "max_tokens": 131072,
       "license": "MIT",
       "arxiv": "2510.17800",
-      "repository": "https://github.com/thu-coai/Glyph"
+      "repository": "https://github.com/thu-coai/Glyph",
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "glm-4.1v-9b-base": {
       "max_output_tokens": 8192,
@@ -1828,7 +2449,9 @@
       "aliases": [
         "zai-org/GLM-4.1V-9B-Base"
       ],
-      "max_tokens": 131072
+      "max_tokens": 131072,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "glm-4.1v-9b-thinking": {
       "max_output_tokens": 8192,
@@ -1865,7 +2488,9 @@
         "glm4.1v-9b-thinking"
       ],
       "max_tokens": 65536,
-      "arxiv": "2507.01006"
+      "arxiv": "2507.01006",
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "mistral-small-3.1-24b-instruct": {
       "max_output_tokens": 8192,
@@ -1888,7 +2513,9 @@
       ],
       "max_tokens": 131072,
       "total_parameters": "24B",
-      "release_date": "2025-03-17"
+      "release_date": "2025-03-17",
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "mistral-small-3.2-24b-instruct": {
       "max_output_tokens": 8192,
@@ -1913,7 +2540,9 @@
       ],
       "max_tokens": 131072,
       "total_parameters": "24B",
-      "release_date": "2025-06-01"
+      "release_date": "2025-06-01",
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama-4-scout": {
       "max_output_tokens": 8192,
@@ -1942,7 +2571,9 @@
       "max_tokens": 10000000,
       "release_date": "2025-04-05",
       "image_patch_size": 14,
-      "max_image_tokens": 6400
+      "max_image_tokens": 6400,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama-4-maverick": {
       "max_output_tokens": 8192,
@@ -1970,7 +2601,9 @@
       "max_tokens": 1000000,
       "release_date": "2025-04-05",
       "image_patch_size": 14,
-      "max_image_tokens": 6400
+      "max_image_tokens": 6400,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "llama-4-behemoth": {
       "max_output_tokens": 8192,
@@ -1999,7 +2632,9 @@
       "release_date": "2025-04-05",
       "status": "announced",
       "image_patch_size": 14,
-      "max_image_tokens": 6400
+      "max_image_tokens": 6400,
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "minimax-m2": {
       "max_output_tokens": 131072,
@@ -2021,7 +2656,6 @@
       "aliases": [
         "MiniMaxAI/MiniMax-M2",
         "mlx-community/minimax-m2",
-        "mlx-community/MiniMax-M2",
         "unsloth/MiniMax-M2-GGUF",
         "minimax-m2-230b",
         "minimax-m2-10b-active",
@@ -2035,7 +2669,9 @@
         "top_p": 0.95,
         "top_k": 40
       },
-      "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax."
+      "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax.",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "minimax-m2.1": {
       "max_output_tokens": 131072,
@@ -2058,7 +2694,6 @@
       "canonical_name": "minimax-m2.1",
       "aliases": [
         "MiniMaxAI/MiniMax-M2.1",
-        "minimaxai/minimax-m2.1",
         "minimax-m2.1-229b",
         "minimax-m2.1-10b-active",
         "minimax/minimax-m2.1"
@@ -2095,14 +2730,16 @@
         "SciCode": 41.0,
         "IFBench": 70.0,
         "AA-LCR": 62.0,
-        "τ²-Bench Telecom": 87.0
+        "\u03c4\u00b2-Bench Telecom": 87.0
       },
       "inference_parameters": {
         "temperature": 1.0,
         "top_p": 0.95,
         "top_k": 40
       },
-      "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax."
+      "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax.",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "glm-4.6v": {
       "max_output_tokens": 16384,
@@ -2134,7 +2771,10 @@
         "end": "<|end_of_box|>"
       },
       "thinking_control": "/nothink",
-      "thinking_tags": ["<think>", "</think>"],
+      "thinking_tags": [
+        "<think>",
+        "</think>"
+      ],
       "notes": "GLM-4.6V foundation model (106B params) for cloud deployment. Native multimodal function calling with vision-driven tool use using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Supports interleaved image-text generation, 128K context, multimodal document understanding, and frontend replication from screenshots. Generates reasoning in 'reasoning_content' field or <think></think> tags. Achieves SoTA performance in visual understanding among similar parameter scales. Thinking can be disabled with '/nothink' suffix in user message. See: https://github.com/zai-org/GLM-V",
       "source": "HuggingFace zai-org/GLM-4.6V and GLM-V GitHub",
       "canonical_name": "glm-4.6v",
@@ -2147,7 +2787,9 @@
       "max_tokens": 128000,
       "release_date": "2025-05-07",
       "arxiv": "2507.01006",
-      "license": "MIT"
+      "license": "MIT",
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "glm-4.6v-flash": {
       "max_output_tokens": 8192,
@@ -2178,7 +2820,10 @@
         "end": "<|end_of_box|>"
       },
       "thinking_control": "/nothink",
-      "thinking_tags": ["<think>", "</think>"],
+      "thinking_tags": [
+        "<think>",
+        "</think>"
+      ],
       "notes": "GLM-4.6V-Flash lightweight model (9B params) optimized for local deployment and low-latency applications. Maintains native multimodal function calling using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Generates reasoning in 'reasoning_content' field or <think></think> tags. Ideal for edge and resource-constrained environments while preserving core GLM-4.6V capabilities. Thinking can be disabled with '/nothink' suffix. See: https://github.com/zai-org/GLM-V",
       "source": "HuggingFace zai-org/GLM-4.6V-Flash and GLM-V GitHub",
       "canonical_name": "glm-4.6v-flash",
@@ -2191,7 +2836,9 @@
       "max_tokens": 128000,
       "release_date": "2025-05-07",
       "arxiv": "2507.01006",
-      "license": "MIT"
+      "license": "MIT",
+      "max_tools": -1,
+      "video_input_mode": "frames"
     },
     "glm-4.7": {
       "max_output_tokens": 32768,
@@ -2205,14 +2852,18 @@
       "architecture": "mixture_of_experts",
       "total_parameters": "358B",
       "thinking_paradigm": "multi_mode",
-      "thinking_modes": ["interleaved_thinking", "preserved_thinking", "turn_level_thinking"],
+      "thinking_modes": [
+        "interleaved_thinking",
+        "preserved_thinking",
+        "turn_level_thinking"
+      ],
       "native_function_calling": true,
       "agentic_coding": true,
       "terminal_tasks": true,
       "web_browsing": true,
       "tool_calling_parser": "glm47",
       "reasoning_parser": "glm45",
-      "notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (τ²-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
+      "notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (\u03c4\u00b2-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
       "source": "HuggingFace zai-org/GLM-4.7 and GLM technical blog",
       "canonical_name": "glm-4.7",
       "aliases": [
@@ -2240,7 +2891,7 @@
         "BrowseComp": 52.0,
         "BrowseComp (w/ Context Manage)": 67.5,
         "BrowseComp-Zh": 66.6,
-        "τ²-Bench": 87.4,
+        "\u03c4\u00b2-Bench": 87.4,
         "MMLU-Pro": 84.3,
         "GPQA-Diamond": 85.7
       },
@@ -2249,7 +2900,9 @@
         "top_p": 0.95,
         "enable_thinking": true,
         "clear_thinking": false
-      }
+      },
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "devstral-small-2-24b": {
       "max_output_tokens": 16384,
@@ -2269,12 +2922,17 @@
       "canonical_name": "devstral-small-2-24b",
       "aliases": [
         "mistralai/Devstral-Small-2-24B-Instruct-2512",
+        "mistralai/devstral-small-2-2512",
+        "mistralai/devstral-small-2",
         "devstral-small-2",
+        "devstral-small-2-2512",
         "devstral-small-2-24b-instruct"
       ],
       "max_tokens": 262144,
       "release_date": "2025-12",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "devstral-2-123b": {
       "max_output_tokens": 16384,
@@ -2294,12 +2952,15 @@
       "canonical_name": "devstral-2-123b",
       "aliases": [
         "mistralai/Devstral-2-123B-Instruct-2512",
+        "mistralai/devstral-2",
         "devstral-2",
         "devstral-2-123b-instruct"
       ],
       "max_tokens": 262144,
       "release_date": "2025-12",
-      "license": "Modified-MIT"
+      "license": "Modified-MIT",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-235b-a22b-2507": {
       "max_output_tokens": 16384,
@@ -2327,7 +2988,9 @@
       "max_tokens": 262144,
       "release_date": "2025-07",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-235b-a22b-2507-fp8": {
       "max_output_tokens": 16384,
@@ -2356,7 +3019,9 @@
       "max_tokens": 262144,
       "release_date": "2025-07",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "granite-4.0-h-tiny": {
       "max_output_tokens": 16384,
@@ -2388,7 +3053,9 @@
       ],
       "max_tokens": 131072,
       "release_date": "2025-10-02",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "gpt-oss-20b": {
       "max_output_tokens": 8192,
@@ -2406,7 +3073,11 @@
       "tensor_type": "BF16+U8",
       "quantization_method": "MXFP4",
       "response_format": "harmony",
-      "reasoning_levels": ["low", "medium", "high"],
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
       "agentic_capabilities": true,
       "function_calling": true,
       "web_browsing": true,
@@ -2422,7 +3093,9 @@
       "max_tokens": 128000,
       "release_date": "2025-08",
       "arxiv": "2508.10925",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "gpt-oss-120b": {
       "max_output_tokens": 8192,
@@ -2440,7 +3113,11 @@
       "tensor_type": "BF16+U8",
       "quantization_method": "MXFP4",
       "response_format": "harmony",
-      "reasoning_levels": ["low", "medium", "high"],
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
       "agentic_capabilities": true,
       "function_calling": true,
       "web_browsing": true,
@@ -2457,7 +3134,9 @@
       "max_tokens": 128000,
       "release_date": "2025-08",
       "arxiv": "2508.10925",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "none"
     },
     "qwen3-vl-2b": {
       "max_output_tokens": 8192,
@@ -2484,7 +3163,11 @@
       "spatial_perception": true,
       "document_understanding": true,
       "ocr_languages": 32,
-      "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
+      "architecture_updates": [
+        "Interleaved-MRoPE",
+        "DeepStack",
+        "Text-Timestamp-Alignment"
+      ],
       "notes": "Qwen3-VL 2B dense model with 256K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
       "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
       "canonical_name": "qwen3-vl-2b",
@@ -2495,7 +3178,9 @@
       "max_tokens": 262144,
       "release_date": "2025-05",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "qwen3-vl-4b": {
       "max_output_tokens": 8192,
@@ -2523,7 +3208,11 @@
       "document_understanding": true,
       "ocr_languages": 32,
       "total_parameters": "4.83B",
-      "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
+      "architecture_updates": [
+        "Interleaved-MRoPE",
+        "DeepStack",
+        "Text-Timestamp-Alignment"
+      ],
       "notes": "Qwen3-VL 4B dense model (4.83B params) with 256K context, optimized for LMStudio. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding. FP8 checkpoints available.",
       "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
       "canonical_name": "qwen3-vl-4b",
@@ -2535,7 +3224,9 @@
       "max_tokens": 262144,
       "release_date": "2025-05",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "qwen3-vl-8b": {
       "max_output_tokens": 8192,
@@ -2563,7 +3254,11 @@
       "document_understanding": true,
       "ocr_languages": 32,
       "total_parameters": "8.77B",
-      "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
+      "architecture_updates": [
+        "Interleaved-MRoPE",
+        "DeepStack",
+        "Text-Timestamp-Alignment"
+      ],
       "notes": "Qwen3-VL 8B dense model (8.77B params) with 256K context, optimized for LMStudio. Most powerful vision-language model in Qwen series. Visual agent for GUI operation, visual coding, advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. FP8 checkpoints available.",
       "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
       "canonical_name": "qwen3-vl-8b",
@@ -2575,7 +3270,9 @@
       "max_tokens": 262144,
       "release_date": "2025-05",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "qwen3-vl-30b-a3b": {
       "max_output_tokens": 8192,
@@ -2605,7 +3302,11 @@
       "architecture": "mixture_of_experts",
       "total_parameters": "30.5B",
       "active_parameters": "3.3B",
-      "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
+      "architecture_updates": [
+        "Interleaved-MRoPE",
+        "DeepStack",
+        "Text-Timestamp-Alignment"
+      ],
       "notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model in the series. 128K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
       "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
       "canonical_name": "qwen3-vl-30b-a3b",
@@ -2617,7 +3318,9 @@
       "max_tokens": 131072,
       "release_date": "2025-05",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "qwen3-vl-235b-a22b": {
       "max_output_tokens": 8192,
@@ -2649,7 +3352,11 @@
       "active_parameters": "22B",
       "experts": 128,
       "experts_activated": 8,
-      "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
+      "architecture_updates": [
+        "Interleaved-MRoPE",
+        "DeepStack",
+        "Text-Timestamp-Alignment"
+      ],
       "notes": "Qwen3-VL 235B MoE model (235B total/22B active, 128 experts/8 activated), flagship vision model. 256K context expandable to 1M. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs. Superior visual perception and reasoning.",
       "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
       "canonical_name": "qwen3-vl-235b-a22b",
@@ -2660,7 +3367,9 @@
       "max_tokens": 262144,
       "release_date": "2025-05",
       "arxiv": "2505.09388",
-      "license": "Apache-2.0"
+      "license": "Apache-2.0",
+      "max_tools": -1,
+      "video_input_mode": "native"
     },
     "nemotron-3-nano-30b-a3b": {
       "max_output_tokens": 8192,
@@ -2686,7 +3395,14 @@
       "agentic_capabilities": true,
       "function_calling": true,
       "tool_calling_format": "json",
-      "languages": ["English", "German", "Spanish", "French", "Italian", "Japanese"],
+      "languages": [
+        "English",
+        "German",
+        "Spanish",
+        "French",
+        "Italian",
+        "Japanese"
+      ],
       "notes": "NVIDIA Nemotron-3-Nano hybrid MoE model (30B total/3.5B active, 128 experts/6 activated + 1 shared). Combines 23 Mamba-2 layers with 6 Attention layers. Unified model for reasoning and non-reasoning tasks with configurable reasoning mode. Generates reasoning trace before final response. 256K context extendable to 1M with YaRN. Strong performance on AIME25 (99.2% with tools), SWE-Bench (38.8%), MiniF2F (50.0% pass@1). Native tool calling via chatml-function-calling format. Commercial use ready.",
       "source": "NVIDIA Nemotron HuggingFace and technical report",
       "canonical_name": "nemotron-3-nano-30b-a3b",
@@ -2720,7 +3436,9 @@
         "Scale AI Multi Challenge": 38.5,
         "Arena-Hard-V2 (Hard Prompt)": 72.1,
         "Arena-Hard-V2 (Average)": 67.7
-      }
+      },
+      "max_tools": -1,
+      "video_input_mode": "none"
     }
   },
   "tool_support_levels": {
@@ -2804,7 +3522,9 @@
     "source": "AbstractCore generic fallback",
     "canonical_name": "generic_vision_model",
     "aliases": [],
-    "max_tokens": 32768
+    "max_tokens": 32768,
+    "max_tools": -1,
+    "video_input_mode": "frames"
   },
   "default_capabilities": {
     "max_output_tokens": 4096,
@@ -2817,6 +3537,8 @@
     "thinking_budget": false,
     "video_support": false,
     "fim_support": false,
-    "max_tokens": 16384
+    "max_tokens": 16384,
+    "max_tools": 0,
+    "video_input_mode": "none"
   }
 }

abstractcore 2.9.1__py3-none-any.whl → 2.11.4__py3-none-any.whl

abstractcore 2.9.1py3-none-any.whl → 2.11.4py3-none-any.whl