abstractcore 2.6.9__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. abstractcore/apps/summarizer.py +69 -27
  2. abstractcore/architectures/detection.py +190 -25
  3. abstractcore/assets/architecture_formats.json +129 -6
  4. abstractcore/assets/model_capabilities.json +803 -141
  5. abstractcore/config/main.py +2 -2
  6. abstractcore/config/manager.py +3 -1
  7. abstractcore/events/__init__.py +7 -1
  8. abstractcore/mcp/__init__.py +30 -0
  9. abstractcore/mcp/client.py +213 -0
  10. abstractcore/mcp/factory.py +64 -0
  11. abstractcore/mcp/naming.py +28 -0
  12. abstractcore/mcp/stdio_client.py +336 -0
  13. abstractcore/mcp/tool_source.py +164 -0
  14. abstractcore/processing/__init__.py +2 -2
  15. abstractcore/processing/basic_deepsearch.py +1 -1
  16. abstractcore/processing/basic_summarizer.py +379 -93
  17. abstractcore/providers/anthropic_provider.py +91 -10
  18. abstractcore/providers/base.py +540 -16
  19. abstractcore/providers/huggingface_provider.py +17 -8
  20. abstractcore/providers/lmstudio_provider.py +170 -25
  21. abstractcore/providers/mlx_provider.py +13 -10
  22. abstractcore/providers/ollama_provider.py +42 -26
  23. abstractcore/providers/openai_compatible_provider.py +87 -22
  24. abstractcore/providers/openai_provider.py +12 -9
  25. abstractcore/providers/streaming.py +201 -39
  26. abstractcore/providers/vllm_provider.py +78 -21
  27. abstractcore/server/app.py +116 -30
  28. abstractcore/structured/retry.py +20 -7
  29. abstractcore/tools/__init__.py +46 -24
  30. abstractcore/tools/abstractignore.py +166 -0
  31. abstractcore/tools/arg_canonicalizer.py +61 -0
  32. abstractcore/tools/common_tools.py +2443 -742
  33. abstractcore/tools/core.py +109 -13
  34. abstractcore/tools/handler.py +17 -3
  35. abstractcore/tools/parser.py +894 -159
  36. abstractcore/tools/registry.py +122 -18
  37. abstractcore/tools/syntax_rewriter.py +68 -6
  38. abstractcore/tools/tag_rewriter.py +186 -1
  39. abstractcore/utils/jsonish.py +111 -0
  40. abstractcore/utils/version.py +1 -1
  41. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/METADATA +55 -2
  42. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/RECORD +46 -37
  43. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
  44. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
  45. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
  46. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
@@ -6,23 +6,39 @@ Usage:
6
6
  python -m abstractcore.apps.summarizer <file_path> [options]
7
7
 
8
8
  Options:
9
- --style <style> Summary style (structured, narrative, objective, analytical, executive, conversational)
10
- --length <length> Summary length (brief, standard, detailed, comprehensive)
11
- --focus <focus> Specific focus area for summarization
12
- --output <output> Output file path (optional, prints to console if not provided)
13
- --chunk-size <size> Chunk size in characters (default: 8000, max: 32000)
14
- --provider <provider> LLM provider (requires --model)
15
- --model <model> LLM model (requires --provider)
16
- --max-tokens <tokens> Maximum total tokens for LLM context (default: 32000)
17
- --max-output-tokens <tokens> Maximum tokens for LLM output generation (default: 8000)
18
- --verbose Show detailed progress information
19
- --help Show this help message
9
+ --style <style> Summary style (structured, narrative, objective, analytical, executive, conversational)
10
+ --length <length> Summary length (brief, standard, detailed, comprehensive)
11
+ --focus <focus> Specific focus area for summarization
12
+ --output <output> Output file path (optional, prints to console if not provided)
13
+ --chunk-size <size> Chunk size in characters (default: 8000, max: 32000)
14
+ --provider <provider> LLM provider (requires --model)
15
+ --model <model> LLM model (requires --provider)
16
+ --max-tokens <tokens|auto> Maximum total tokens for LLM context (default: auto)
17
+ - 'auto' or -1: Uses model's full context window
18
+ - Specific number: Hard limit for deployment constraint (GPU/RAM)
19
+ --max-output-tokens <tokens|auto> Maximum tokens for LLM output (default: auto)
20
+ --verbose Show detailed progress information
21
+ --help Show this help message
22
+
23
+ Memory Management:
24
+ --max-tokens controls token budget:
25
+ - Use 'auto' (default): Automatically uses model's full capability
26
+ - Use specific value: Hard limit for memory-constrained environments (e.g., --max-tokens 16000)
27
+
28
+ Example: 8GB GPU → --max-tokens 16000, 16GB GPU → --max-tokens 32000
20
29
 
21
30
  Examples:
31
+ # Auto mode (uses model's full capability)
22
32
  python -m abstractcore.apps.summarizer document.pdf
23
- python -m abstractcore.apps.summarizer report.txt --style executive --length brief --verbose
24
- python -m abstractcore.apps.summarizer data.md --focus "technical details" --output summary.txt
25
- python -m abstractcore.apps.summarizer large.txt --chunk-size 15000 --provider openai --model gpt-4o-mini
33
+
34
+ # Memory-constrained (8GB GPU)
35
+ python -m abstractcore.apps.summarizer report.txt --max-tokens 16000
36
+
37
+ # Large document with specific style
38
+ python -m abstractcore.apps.summarizer data.md --style executive --length brief
39
+
40
+ # Custom model with hard limit
41
+ python -m abstractcore.apps.summarizer large.txt --provider openai --model gpt-4o-mini --max-tokens 24000
26
42
  """
27
43
 
28
44
  import argparse
@@ -239,16 +255,14 @@ Default model setup:
239
255
 
240
256
  parser.add_argument(
241
257
  '--max-tokens',
242
- type=int,
243
- default=32000,
244
- help='Maximum total tokens for LLM context (default: 32000)'
258
+ default='auto',
259
+ help='Maximum total tokens for LLM context (default: auto). Use "auto" or -1 for model\'s full capability, or specific number for hard limit (e.g., 16000 for 8GB GPU)'
245
260
  )
246
261
 
247
262
  parser.add_argument(
248
263
  '--max-output-tokens',
249
- type=int,
250
- default=8000,
251
- help='Maximum tokens for LLM output generation (default: 8000)'
264
+ default='auto',
265
+ help='Maximum tokens for LLM output generation (default: auto). Use "auto" or -1 for model\'s capability, or specific number'
252
266
  )
253
267
 
254
268
  parser.add_argument(
@@ -329,19 +343,40 @@ Default model setup:
329
343
  provider, model = get_app_defaults('summarizer')
330
344
  config_source = "configured defaults"
331
345
 
332
- # Adjust max_tokens based on chunk size
333
- max_tokens = max(args.max_tokens, args.chunk_size)
346
+ # Parse max_tokens (support 'auto', -1, or specific number)
347
+ if args.max_tokens in ('auto', 'Auto', 'AUTO'):
348
+ max_tokens = -1
349
+ else:
350
+ try:
351
+ max_tokens = int(args.max_tokens)
352
+ except ValueError:
353
+ print(f"Error: --max-tokens must be 'auto' or a number, got: {args.max_tokens}")
354
+ sys.exit(1)
355
+
356
+ # Parse max_output_tokens (support 'auto', -1, or specific number)
357
+ if args.max_output_tokens in ('auto', 'Auto', 'AUTO'):
358
+ max_output_tokens = -1
359
+ else:
360
+ try:
361
+ max_output_tokens = int(args.max_output_tokens)
362
+ except ValueError:
363
+ print(f"Error: --max-output-tokens must be 'auto' or a number, got: {args.max_output_tokens}")
364
+ sys.exit(1)
334
365
 
335
366
  if args.verbose:
336
- print(f"Initializing summarizer ({provider}, {model}, {max_tokens} token context, {args.max_output_tokens} output tokens) - using {config_source}...")
367
+ max_tokens_display = "AUTO" if max_tokens == -1 else str(max_tokens)
368
+ max_output_display = "AUTO" if max_output_tokens == -1 else str(max_output_tokens)
369
+ print(f"Initializing summarizer ({provider}, {model}, {max_tokens_display} token context, {max_output_display} output tokens) - using {config_source}...")
337
370
 
338
371
  if args.debug:
372
+ max_tokens_display = "AUTO" if max_tokens == -1 else str(max_tokens)
373
+ max_output_display = "AUTO" if max_output_tokens == -1 else str(max_output_tokens)
339
374
  print(f"🐛 Debug - Configuration details:")
340
375
  print(f" Provider: {provider}")
341
376
  print(f" Model: {model}")
342
377
  print(f" Config source: {config_source}")
343
- print(f" Max tokens: {max_tokens}")
344
- print(f" Max output tokens: {args.max_output_tokens}")
378
+ print(f" Max tokens: {max_tokens_display}")
379
+ print(f" Max output tokens: {max_output_display}")
345
380
  print(f" Chunk size: {args.chunk_size}")
346
381
  print(f" Timeout: {args.timeout}")
347
382
  print(f" Style: {args.style}")
@@ -349,12 +384,19 @@ Default model setup:
349
384
  print(f" Focus: {args.focus}")
350
385
 
351
386
  try:
352
- llm = create_llm(provider, model=model, max_tokens=max_tokens, max_output_tokens=args.max_output_tokens, timeout=args.timeout)
387
+ # When using auto mode (-1), don't pass to create_llm (let provider use defaults)
388
+ llm_kwargs = {'timeout': args.timeout}
389
+ if max_tokens != -1:
390
+ llm_kwargs['max_tokens'] = max_tokens
391
+ if max_output_tokens != -1:
392
+ llm_kwargs['max_output_tokens'] = max_output_tokens
393
+
394
+ llm = create_llm(provider, model=model, **llm_kwargs)
353
395
  summarizer = BasicSummarizer(
354
396
  llm,
355
397
  max_chunk_size=args.chunk_size,
356
398
  max_tokens=max_tokens,
357
- max_output_tokens=args.max_output_tokens,
399
+ max_output_tokens=max_output_tokens,
358
400
  timeout=args.timeout
359
401
  )
360
402
  except Exception as e:
@@ -20,6 +20,41 @@ _model_capabilities: Optional[Dict[str, Any]] = None
20
20
  # Cache for resolved model names and architectures to reduce redundant logging
21
21
  _resolved_aliases_cache: Dict[str, str] = {}
22
22
  _detected_architectures_cache: Dict[str, str] = {}
23
+ # Cache to avoid repeating default-capabilities warnings for the same unknown model.
24
+ _default_capabilities_warning_cache: set[str] = set()
25
+
26
+
27
+ # Some callers pass provider/model as a single string (e.g. "lmstudio/qwen/qwen3-next-80b").
28
+ # For capability lookup we want the underlying model id, not the provider prefix.
29
+ _KNOWN_PROVIDER_PREFIXES = {
30
+ "anthropic",
31
+ "azure",
32
+ "bedrock",
33
+ "fireworks",
34
+ "gemini",
35
+ "google",
36
+ "groq",
37
+ "huggingface",
38
+ "lmstudio",
39
+ "local",
40
+ "mlx",
41
+ "nvidia",
42
+ "ollama",
43
+ "openai",
44
+ "openai-compatible",
45
+ "together",
46
+ "vllm",
47
+ }
48
+
49
+
50
+ def _strip_provider_prefix(model_name: str) -> str:
51
+ s = str(model_name or "").strip()
52
+ if not s or "/" not in s:
53
+ return s
54
+ head, rest = s.split("/", 1)
55
+ if head.strip().lower() in _KNOWN_PROVIDER_PREFIXES and rest.strip():
56
+ return rest.strip()
57
+ return s
23
58
 
24
59
 
25
60
  def _load_json_assets():
@@ -72,16 +107,36 @@ def detect_architecture(model_name: str) -> str:
72
107
  _detected_architectures_cache[model_name] = "generic"
73
108
  return "generic"
74
109
 
75
- model_lower = model_name.lower()
110
+ # Normalize model names for better pattern matching:
111
+ # - HuggingFace cache names use `--` as `/` separators (models--org--name).
112
+ # - Claude versions sometimes appear as `claude-3-5-sonnet` (normalize to `claude-3.5-sonnet`).
113
+ model_lower = model_name.lower().replace("--", "/")
114
+ import re
115
+ model_lower = re.sub(r'(claude-\d+)-(\d+)(?=-|$)', r'\1.\2', model_lower)
116
+
117
+ # Choose the most specific matching architecture.
118
+ # Many architectures include broad patterns (e.g. "gpt") that can accidentally
119
+ # match more specific models (e.g. "gpt-oss"). We resolve this by selecting the
120
+ # longest matching pattern across all architectures.
121
+ best_arch = "generic"
122
+ best_pattern = ""
76
123
 
77
- # Check each architecture's patterns
78
124
  for arch_name, arch_config in _architecture_formats["architectures"].items():
79
125
  patterns = arch_config.get("patterns", [])
80
126
  for pattern in patterns:
81
- if pattern.lower() in model_lower:
82
- logger.debug(f"Detected architecture '{arch_name}' for model '{model_name}' (pattern: '{pattern}')")
83
- _detected_architectures_cache[model_name] = arch_name
84
- return arch_name
127
+ pat = str(pattern).lower()
128
+ if not pat:
129
+ continue
130
+ if pat in model_lower and len(pat) > len(best_pattern):
131
+ best_arch = arch_name
132
+ best_pattern = pat
133
+
134
+ if best_arch != "generic":
135
+ logger.debug(
136
+ f"Detected architecture '{best_arch}' for model '{model_name}' (pattern: '{best_pattern}')"
137
+ )
138
+ _detected_architectures_cache[model_name] = best_arch
139
+ return best_arch
85
140
 
86
141
  # Fallback to generic
87
142
  logger.debug(f"No specific architecture detected for '{model_name}', using generic")
@@ -147,22 +202,69 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
147
202
  if normalized_model_name != model_name:
148
203
  logger.debug(f"Normalized model name '{model_name}' to '{normalized_model_name}'")
149
204
 
150
- # Check if normalized name is a canonical name
151
- if normalized_model_name in models:
152
- _resolved_aliases_cache[model_name] = normalized_model_name
153
- return normalized_model_name
154
-
155
- # Check if it's an alias of any model (try both original and normalized)
205
+ # Also support "provider/model" strings by stripping known provider prefixes.
206
+ stripped_model_name = _strip_provider_prefix(model_name)
207
+ stripped_normalized_name = _strip_provider_prefix(normalized_model_name)
208
+
209
+ def _tail(name: str) -> str:
210
+ s = str(name or "").strip()
211
+ if not s or "/" not in s:
212
+ return s
213
+ return s.split("/")[-1].strip()
214
+
215
+ def _candidates(*names: str) -> List[str]:
216
+ out: List[str] = []
217
+ for n in names:
218
+ s = str(n or "").strip()
219
+ if not s:
220
+ continue
221
+ out.append(s)
222
+ t = _tail(s)
223
+ if t and t != s:
224
+ out.append(t)
225
+ # Deduplicate while preserving order
226
+ uniq: List[str] = []
227
+ seen: set[str] = set()
228
+ for s in out:
229
+ if s in seen:
230
+ continue
231
+ seen.add(s)
232
+ uniq.append(s)
233
+ return uniq
234
+
235
+ # Check if any normalized/stripped name is a canonical name.
236
+ for candidate in _candidates(normalized_model_name, stripped_normalized_name, stripped_model_name):
237
+ if candidate in models:
238
+ _resolved_aliases_cache[model_name] = candidate
239
+ return candidate
240
+
241
+ # Check if it's an alias of any model (try both original and normalized).
242
+ # Some JSON entries intentionally share aliases (e.g. base + variant). Prefer the
243
+ # most specific canonical model name deterministically.
244
+ alias_matches: List[str] = []
156
245
  for canonical_name, model_info in models.items():
157
246
  aliases = model_info.get("aliases", [])
158
- if model_name in aliases or normalized_model_name in aliases:
159
- logger.debug(f"Resolved alias '{model_name}' to canonical name '{canonical_name}'")
160
- _resolved_aliases_cache[model_name] = canonical_name
161
- return canonical_name
247
+ if not isinstance(aliases, list) or not aliases:
248
+ continue
249
+ candidates = _candidates(model_name, normalized_model_name, stripped_model_name, stripped_normalized_name)
250
+ alias_set = {str(a).strip().lower() for a in aliases if isinstance(a, str) and str(a).strip()}
251
+ cand_set = {str(c).strip().lower() for c in candidates if isinstance(c, str) and str(c).strip()}
252
+ if alias_set and cand_set and alias_set.intersection(cand_set):
253
+ alias_matches.append(canonical_name)
254
+
255
+ if alias_matches:
256
+ best = max(alias_matches, key=lambda n: (len(str(n)), str(n)))
257
+ logger.debug(f"Resolved alias '{model_name}' to canonical name '{best}'")
258
+ _resolved_aliases_cache[model_name] = best
259
+ return best
162
260
 
163
261
  # Return normalized name if no alias found
164
- _resolved_aliases_cache[model_name] = normalized_model_name
165
- return normalized_model_name
262
+ fallback = stripped_normalized_name or normalized_model_name
263
+ fallback_tail = _tail(fallback)
264
+ if fallback_tail:
265
+ fallback = fallback_tail
266
+ _resolved_aliases_cache[model_name] = fallback
267
+ return fallback
166
268
 
167
269
 
168
270
  def get_model_capabilities(model_name: str) -> Dict[str, Any]:
@@ -199,15 +301,44 @@ def get_model_capabilities(model_name: str) -> Dict[str, Any]:
199
301
  # Step 3: Try partial matches for common model naming patterns
200
302
  # Use canonical_name (which has been normalized) for better matching
201
303
  canonical_lower = canonical_name.lower()
202
- for model_key, capabilities in models.items():
203
- if model_key.lower() in canonical_lower or canonical_lower in model_key.lower():
304
+ candidates_name_in_key: List[tuple[int, int, str]] = []
305
+ candidates_key_in_name: List[tuple[int, str]] = []
306
+ for model_key in models.keys():
307
+ if not isinstance(model_key, str) or not model_key.strip():
308
+ continue
309
+ key_lower = model_key.lower()
310
+
311
+ # Prefer a close "superstring" match where the canonical name is missing a suffix.
312
+ # Example: "qwen3-next-80b" -> "qwen3-next-80b-a3b"
313
+ if canonical_lower and canonical_lower in key_lower:
314
+ extra = max(0, len(key_lower) - len(canonical_lower))
315
+ candidates_name_in_key.append((extra, len(key_lower), model_key))
316
+ continue
317
+
318
+ # Otherwise, prefer the most specific substring match (e.g. provider/model prefixes).
319
+ if key_lower in canonical_lower:
320
+ candidates_key_in_name.append((len(key_lower), model_key))
321
+
322
+ best_key: Optional[str] = None
323
+ best_mode: Optional[str] = None
324
+ if candidates_name_in_key:
325
+ candidates_name_in_key.sort(key=lambda x: (x[0], -x[1]))
326
+ best_key = candidates_name_in_key[0][2]
327
+ best_mode = "name_in_key"
328
+ elif candidates_key_in_name:
329
+ best_key = max(candidates_key_in_name, key=lambda x: x[0])[1]
330
+ best_mode = "key_in_name"
331
+
332
+ if best_key is not None:
333
+ capabilities = models.get(best_key)
334
+ if isinstance(capabilities, dict):
204
335
  result = capabilities.copy()
205
336
  # Remove alias-specific fields
206
337
  result.pop("canonical_name", None)
207
338
  result.pop("aliases", None)
208
339
  if "architecture" not in result:
209
340
  result["architecture"] = detect_architecture(model_name)
210
- logger.debug(f"Using capabilities from '{model_key}' for '{model_name}'")
341
+ logger.debug(f"Using capabilities from '{best_key}' for '{model_name}' (partial match: {best_mode})")
211
342
  return result
212
343
 
213
344
  # Step 4: Fallback to default capabilities based on architecture
@@ -215,16 +346,50 @@ def get_model_capabilities(model_name: str) -> Dict[str, Any]:
215
346
  default_caps = _model_capabilities.get("default_capabilities", {}).copy()
216
347
  default_caps["architecture"] = architecture
217
348
 
218
- # Enhance defaults based on architecture
349
+ # Enhance defaults based on architecture.
350
+ #
351
+ # NOTE: `architecture_formats.json.tool_format` describes the *prompted transcript syntax*
352
+ # for tool calls (e.g. XML-wrapped, <|tool_call|> blocks, etc). Some architectures/models
353
+ # also support *native tool APIs* (provider-level `tools` payloads) even when their prompted
354
+ # transcript format is non-native. For those cases, architectures can set an explicit
355
+ # `default_tool_support` to avoid relying on tool_format heuristics.
219
356
  arch_format = get_architecture_format(architecture)
220
- if arch_format.get("tool_format") == "native":
357
+
358
+ explicit_support = str(arch_format.get("default_tool_support") or "").strip().lower()
359
+ if explicit_support in {"native", "prompted", "none"}:
360
+ default_caps["tool_support"] = explicit_support
361
+ elif arch_format.get("tool_format") == "native":
221
362
  default_caps["tool_support"] = "native"
222
- elif arch_format.get("tool_format") in ["special_token", "json", "xml", "pythonic"]:
363
+ elif arch_format.get("tool_format") in ["special_token", "json", "xml", "pythonic", "glm_xml"]:
223
364
  default_caps["tool_support"] = "prompted"
224
365
  else:
225
366
  default_caps["tool_support"] = "none"
226
367
 
368
+ # Propagate architecture-level output wrappers into default capabilities.
369
+ wrappers = arch_format.get("output_wrappers")
370
+ if isinstance(wrappers, dict) and wrappers:
371
+ default_caps["output_wrappers"] = dict(wrappers)
372
+
227
373
  logger.debug(f"Using default capabilities for '{model_name}' (architecture: {architecture})")
374
+
375
+ # Emit a one-time warning for unknown models to keep model_capabilities.json up to date.
376
+ try:
377
+ raw_name = str(model_name).strip()
378
+ except Exception:
379
+ raw_name = ""
380
+
381
+ if raw_name and raw_name not in _default_capabilities_warning_cache:
382
+ _default_capabilities_warning_cache.add(raw_name)
383
+ logger.warning(
384
+ "Model not found in model_capabilities.json; falling back to architecture defaults",
385
+ model_name=raw_name,
386
+ detected_architecture=architecture,
387
+ default_tool_support=default_caps.get("tool_support"),
388
+ next_steps=(
389
+ "Add this model (or an alias) to abstractcore/abstractcore/assets/model_capabilities.json "
390
+ "or email contact@abstractcore.ai with the exact model id and provider."
391
+ ),
392
+ )
228
393
  return default_caps
229
394
 
230
395
 
@@ -539,4 +704,4 @@ def check_vision_model_compatibility(model_name: str, provider: str = None) -> D
539
704
  result['warnings'].append("No max_image_tokens specified")
540
705
  result['recommendations'].append("Add max_image_tokens to model capabilities")
541
706
 
542
- return result
707
+ return result
@@ -88,7 +88,7 @@
88
88
  "patterns": ["qwen3-next"]
89
89
  },
90
90
  "qwen3_vl": {
91
- "description": "Alibaba's Qwen3-VL multimodal architecture (September 2025)",
91
+ "description": "Alibaba's Qwen3-VL multimodal architecture (May 2025)",
92
92
  "message_format": "im_start_end",
93
93
  "system_prefix": "<|im_start|>system\n",
94
94
  "system_suffix": "<|im_end|>\n",
@@ -96,8 +96,20 @@
96
96
  "user_suffix": "<|im_end|>\n",
97
97
  "assistant_prefix": "<|im_start|>assistant\n",
98
98
  "assistant_suffix": "<|im_end|>\n",
99
- "tool_format": "prompted",
100
- "patterns": ["qwen3-vl"]
99
+ "tool_format": "native",
100
+ "patterns": ["qwen3-vl-2b", "qwen3-vl-4b", "qwen3-vl-8b"]
101
+ },
102
+ "qwen3_vl_moe": {
103
+ "description": "Alibaba's Qwen3-VL MoE multimodal architecture (May 2025)",
104
+ "message_format": "im_start_end",
105
+ "system_prefix": "<|im_start|>system\n",
106
+ "system_suffix": "<|im_end|>\n",
107
+ "user_prefix": "<|im_start|>user\n",
108
+ "user_suffix": "<|im_end|>\n",
109
+ "assistant_prefix": "<|im_start|>assistant\n",
110
+ "assistant_suffix": "<|im_end|>\n",
111
+ "tool_format": "native",
112
+ "patterns": ["qwen3-vl-30b-a3b", "qwen3-vl-235b-a22b"]
101
113
  },
102
114
  "qwen3_moe": {
103
115
  "description": "Alibaba's Qwen3 MoE architecture (April 2025)",
@@ -211,6 +223,30 @@
211
223
  "tool_format": "native",
212
224
  "patterns": ["codestral"]
213
225
  },
226
+ "mistral3": {
227
+ "description": "Mistral AI Mistral 3 architecture with rope-scaling (December 2024)",
228
+ "message_format": "inst",
229
+ "system_prefix": "",
230
+ "system_suffix": "\n\n",
231
+ "user_prefix": "[INST] ",
232
+ "user_suffix": " [/INST]",
233
+ "assistant_prefix": "",
234
+ "assistant_suffix": "",
235
+ "tool_format": "native",
236
+ "patterns": ["mistral-small-3", "devstral-small-2"]
237
+ },
238
+ "ministral3": {
239
+ "description": "Mistral AI Ministral 3 architecture (December 2024)",
240
+ "message_format": "inst",
241
+ "system_prefix": "",
242
+ "system_suffix": "\n\n",
243
+ "user_prefix": "[INST] ",
244
+ "user_suffix": " [/INST]",
245
+ "assistant_prefix": "",
246
+ "assistant_suffix": "",
247
+ "tool_format": "native",
248
+ "patterns": ["devstral-2-123b"]
249
+ },
214
250
  "phi": {
215
251
  "description": "Microsoft's Phi architecture family",
216
252
  "message_format": "basic",
@@ -295,6 +331,26 @@
295
331
  "tool_format": "none",
296
332
  "patterns": ["gemma-2b", "gemma-7b"]
297
333
  },
334
+ "glm4v_moe": {
335
+ "description": "Zhipu AI's GLM-4.6V multimodal MoE architecture (May 2025)",
336
+ "message_format": "glm_special_tokens",
337
+ "system_prefix": "<|system|>\n",
338
+ "system_suffix": "\n",
339
+ "user_prefix": "<|user|>\n",
340
+ "user_suffix": "\n",
341
+ "assistant_prefix": "<|assistant|>\n",
342
+ "assistant_suffix": "\n",
343
+ "tool_format": "glm_xml",
344
+ "tool_calling_format": "<tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>",
345
+ "output_wrappers": {
346
+ "start": "<|begin_of_box|>",
347
+ "end": "<|end_of_box|>"
348
+ },
349
+ "thinking_output_field": "reasoning_content",
350
+ "thinking_tags": ["<think>", "</think>"],
351
+ "thinking_control": "/nothink",
352
+ "patterns": ["glm-4.6v", "glm4.6v", "zai-org/glm-4.6v", "glm-4.6v-flash", "glm-4.6v-fp8", "glm-4.6v-flash-fp8"]
353
+ },
298
354
  "glm4_moe": {
299
355
  "description": "Zhipu AI's GLM-4.5+ MoE architecture (July 2025)",
300
356
  "message_format": "im_start_end",
@@ -306,7 +362,7 @@
306
362
  "assistant_suffix": "\n",
307
363
  "tool_format": "special_token",
308
364
  "tool_prefix": "<|tool_call|>",
309
- "patterns": ["glm-4.5", "glm-4.6", "glm-4.5-air"]
365
+ "patterns": ["glm-4.7", "glm-4.6", "glm-4.5", "glm-4.5-air"]
310
366
  },
311
367
  "glm4v": {
312
368
  "description": "Zhipu AI's GLM-4V multimodal architecture (June 2024)",
@@ -345,6 +401,19 @@
345
401
  "tool_prefix": "<|tool_call|>",
346
402
  "patterns": ["granite", "granite3.3:2b"]
347
403
  },
404
+ "granitemoehybrid": {
405
+ "description": "IBM's Granite 4.0 hybrid MoE architecture with Mamba2 (October 2025)",
406
+ "message_format": "special_tokens",
407
+ "system_prefix": "<|system|>\n",
408
+ "system_suffix": "\n",
409
+ "user_prefix": "<|user|>\n",
410
+ "user_suffix": "\n",
411
+ "assistant_prefix": "<|assistant|>\n",
412
+ "assistant_suffix": "\n",
413
+ "tool_format": "native",
414
+ "tool_prefix": "<|tool_call|>",
415
+ "patterns": ["granite-4.0-h", "granite-h-tiny", "granite-h-micro"]
416
+ },
348
417
  "deepseek": {
349
418
  "description": "DeepSeek architecture family",
350
419
  "message_format": "im_start_end",
@@ -357,6 +426,32 @@
357
426
  "tool_format": "json",
358
427
  "patterns": ["deepseek"]
359
428
  },
429
+ "minimax_m2_1": {
430
+ "description": "MiniMax M2.1 enhanced MoE architecture with improved coding and agentic capabilities (December 2024)",
431
+ "message_format": "im_start_end",
432
+ "system_prefix": "<|im_start|>system\n",
433
+ "system_suffix": "<|im_end|>\n",
434
+ "user_prefix": "<|im_start|>user\n",
435
+ "user_suffix": "<|im_end|>\n",
436
+ "assistant_prefix": "<|im_start|>assistant\n",
437
+ "assistant_suffix": "<|im_end|>\n",
438
+ "tool_format": "native",
439
+ "thinking_format": "<think>...</think>",
440
+ "patterns": ["minimax-m2.1", "minimaxai/minimax-m2.1", "minimax/minimax-m2.1"]
441
+ },
442
+ "minimax_m2": {
443
+ "description": "MiniMax M2 MoE architecture with interleaved thinking (January 2025)",
444
+ "message_format": "im_start_end",
445
+ "system_prefix": "<|im_start|>system\n",
446
+ "system_suffix": "<|im_end|>\n",
447
+ "user_prefix": "<|im_start|>user\n",
448
+ "user_suffix": "<|im_end|>\n",
449
+ "assistant_prefix": "<|im_start|>assistant\n",
450
+ "assistant_suffix": "<|im_end|>\n",
451
+ "tool_format": "native",
452
+ "thinking_format": "<think>...</think>",
453
+ "patterns": ["minimax-m2", "minimaxai/minimax-m2", "minimax/minimax-m2"]
454
+ },
360
455
  "seed_oss": {
361
456
  "description": "ByteDance's Seed-OSS long-context architecture (August 2025)",
362
457
  "message_format": "im_start_end",
@@ -390,6 +485,7 @@
390
485
  "user_suffix": "\n",
391
486
  "assistant_prefix": "Assistant: ",
392
487
  "assistant_suffix": "\n",
488
+ "default_tool_support": "native",
393
489
  "tool_format": "xml",
394
490
  "patterns": ["claude", "claude-3", "claude-4", "claude-4.1", "claude-4.5"]
395
491
  },
@@ -399,6 +495,32 @@
399
495
  "tool_format": "openai_functions",
400
496
  "patterns": ["gpt", "chatgpt", "gpt-5"]
401
497
  },
498
+ "gpt_oss": {
499
+ "description": "OpenAI GPT-OSS open-weight MoE architecture with harmony format (August 2025)",
500
+ "message_format": "harmony",
501
+ "system_prefix": "<|system|>\n",
502
+ "system_suffix": "\n",
503
+ "user_prefix": "<|user|>\n",
504
+ "user_suffix": "\n",
505
+ "assistant_prefix": "<|assistant|>\n",
506
+ "assistant_suffix": "\n",
507
+ "tool_format": "native",
508
+ "reasoning_levels": ["low", "medium", "high"],
509
+ "patterns": ["gpt-oss", "openai/gpt-oss"]
510
+ },
511
+ "nemotron_hybrid_moe": {
512
+ "description": "NVIDIA Nemotron-3-Nano hybrid MoE architecture with Mamba-2 and Attention layers (December 2025)",
513
+ "message_format": "im_start_end",
514
+ "system_prefix": "<|im_start|>system\n",
515
+ "system_suffix": "<|im_end|>\n",
516
+ "user_prefix": "<|im_start|>user\n",
517
+ "user_suffix": "<|im_end|>\n",
518
+ "assistant_prefix": "<|im_start|>assistant\n",
519
+ "assistant_suffix": "<|im_end|>\n",
520
+ "tool_format": "json",
521
+ "reasoning_support": true,
522
+ "patterns": ["nemotron-3-nano", "nemotron-nano", "nvidia/nemotron"]
523
+ },
402
524
  "generic": {
403
525
  "description": "Generic/unknown architecture fallback",
404
526
  "message_format": "basic",
@@ -420,7 +542,8 @@
420
542
  "human_assistant": "Human/Assistant format",
421
543
  "openai_chat": "OpenAI chat completion format",
422
544
  "llama3_header": "LLaMA 3+ format with <|start_header_id|> and <|eot_id|>",
423
- "glm_special_tokens": "GLM format with <|system|>, <|user|>, <|assistant|> tokens"
545
+ "glm_special_tokens": "GLM format with <|system|>, <|user|>, <|assistant|> tokens",
546
+ "harmony": "OpenAI harmony response format for GPT-OSS models"
424
547
  },
425
548
  "tool_formats": {
426
549
  "pythonic": "Python function call syntax: [func(arg=val)]",
@@ -432,4 +555,4 @@
432
555
  "prompted": "Tool use through careful prompting",
433
556
  "none": "No tool support"
434
557
  }
435
- }
558
+ }