abstractcore 2.6.9__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +803 -141
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/__init__.py +2 -2
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +379 -93
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +540 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +116 -30
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +46 -24
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2443 -742
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +894 -159
- abstractcore/tools/registry.py +122 -18
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/METADATA +56 -2
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/RECORD +46 -37
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/top_level.txt +0 -0
abstractcore/apps/summarizer.py
CHANGED
|
@@ -6,23 +6,39 @@ Usage:
|
|
|
6
6
|
python -m abstractcore.apps.summarizer <file_path> [options]
|
|
7
7
|
|
|
8
8
|
Options:
|
|
9
|
-
--style <style>
|
|
10
|
-
--length <length>
|
|
11
|
-
--focus <focus>
|
|
12
|
-
--output <output>
|
|
13
|
-
--chunk-size <size>
|
|
14
|
-
--provider <provider>
|
|
15
|
-
--model <model>
|
|
16
|
-
--max-tokens <tokens> Maximum total tokens for LLM context (default:
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
--
|
|
9
|
+
--style <style> Summary style (structured, narrative, objective, analytical, executive, conversational)
|
|
10
|
+
--length <length> Summary length (brief, standard, detailed, comprehensive)
|
|
11
|
+
--focus <focus> Specific focus area for summarization
|
|
12
|
+
--output <output> Output file path (optional, prints to console if not provided)
|
|
13
|
+
--chunk-size <size> Chunk size in characters (default: 8000, max: 32000)
|
|
14
|
+
--provider <provider> LLM provider (requires --model)
|
|
15
|
+
--model <model> LLM model (requires --provider)
|
|
16
|
+
--max-tokens <tokens|auto> Maximum total tokens for LLM context (default: auto)
|
|
17
|
+
- 'auto' or -1: Uses model's full context window
|
|
18
|
+
- Specific number: Hard limit for deployment constraint (GPU/RAM)
|
|
19
|
+
--max-output-tokens <tokens|auto> Maximum tokens for LLM output (default: auto)
|
|
20
|
+
--verbose Show detailed progress information
|
|
21
|
+
--help Show this help message
|
|
22
|
+
|
|
23
|
+
Memory Management:
|
|
24
|
+
--max-tokens controls token budget:
|
|
25
|
+
- Use 'auto' (default): Automatically uses model's full capability
|
|
26
|
+
- Use specific value: Hard limit for memory-constrained environments (e.g., --max-tokens 16000)
|
|
27
|
+
|
|
28
|
+
Example: 8GB GPU → --max-tokens 16000, 16GB GPU → --max-tokens 32000
|
|
20
29
|
|
|
21
30
|
Examples:
|
|
31
|
+
# Auto mode (uses model's full capability)
|
|
22
32
|
python -m abstractcore.apps.summarizer document.pdf
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
python -m abstractcore.apps.summarizer
|
|
33
|
+
|
|
34
|
+
# Memory-constrained (8GB GPU)
|
|
35
|
+
python -m abstractcore.apps.summarizer report.txt --max-tokens 16000
|
|
36
|
+
|
|
37
|
+
# Large document with specific style
|
|
38
|
+
python -m abstractcore.apps.summarizer data.md --style executive --length brief
|
|
39
|
+
|
|
40
|
+
# Custom model with hard limit
|
|
41
|
+
python -m abstractcore.apps.summarizer large.txt --provider openai --model gpt-4o-mini --max-tokens 24000
|
|
26
42
|
"""
|
|
27
43
|
|
|
28
44
|
import argparse
|
|
@@ -239,16 +255,14 @@ Default model setup:
|
|
|
239
255
|
|
|
240
256
|
parser.add_argument(
|
|
241
257
|
'--max-tokens',
|
|
242
|
-
|
|
243
|
-
default
|
|
244
|
-
help='Maximum total tokens for LLM context (default: 32000)'
|
|
258
|
+
default='auto',
|
|
259
|
+
help='Maximum total tokens for LLM context (default: auto). Use "auto" or -1 for model\'s full capability, or specific number for hard limit (e.g., 16000 for 8GB GPU)'
|
|
245
260
|
)
|
|
246
261
|
|
|
247
262
|
parser.add_argument(
|
|
248
263
|
'--max-output-tokens',
|
|
249
|
-
|
|
250
|
-
default
|
|
251
|
-
help='Maximum tokens for LLM output generation (default: 8000)'
|
|
264
|
+
default='auto',
|
|
265
|
+
help='Maximum tokens for LLM output generation (default: auto). Use "auto" or -1 for model\'s capability, or specific number'
|
|
252
266
|
)
|
|
253
267
|
|
|
254
268
|
parser.add_argument(
|
|
@@ -329,19 +343,40 @@ Default model setup:
|
|
|
329
343
|
provider, model = get_app_defaults('summarizer')
|
|
330
344
|
config_source = "configured defaults"
|
|
331
345
|
|
|
332
|
-
#
|
|
333
|
-
max_tokens
|
|
346
|
+
# Parse max_tokens (support 'auto', -1, or specific number)
|
|
347
|
+
if args.max_tokens in ('auto', 'Auto', 'AUTO'):
|
|
348
|
+
max_tokens = -1
|
|
349
|
+
else:
|
|
350
|
+
try:
|
|
351
|
+
max_tokens = int(args.max_tokens)
|
|
352
|
+
except ValueError:
|
|
353
|
+
print(f"Error: --max-tokens must be 'auto' or a number, got: {args.max_tokens}")
|
|
354
|
+
sys.exit(1)
|
|
355
|
+
|
|
356
|
+
# Parse max_output_tokens (support 'auto', -1, or specific number)
|
|
357
|
+
if args.max_output_tokens in ('auto', 'Auto', 'AUTO'):
|
|
358
|
+
max_output_tokens = -1
|
|
359
|
+
else:
|
|
360
|
+
try:
|
|
361
|
+
max_output_tokens = int(args.max_output_tokens)
|
|
362
|
+
except ValueError:
|
|
363
|
+
print(f"Error: --max-output-tokens must be 'auto' or a number, got: {args.max_output_tokens}")
|
|
364
|
+
sys.exit(1)
|
|
334
365
|
|
|
335
366
|
if args.verbose:
|
|
336
|
-
|
|
367
|
+
max_tokens_display = "AUTO" if max_tokens == -1 else str(max_tokens)
|
|
368
|
+
max_output_display = "AUTO" if max_output_tokens == -1 else str(max_output_tokens)
|
|
369
|
+
print(f"Initializing summarizer ({provider}, {model}, {max_tokens_display} token context, {max_output_display} output tokens) - using {config_source}...")
|
|
337
370
|
|
|
338
371
|
if args.debug:
|
|
372
|
+
max_tokens_display = "AUTO" if max_tokens == -1 else str(max_tokens)
|
|
373
|
+
max_output_display = "AUTO" if max_output_tokens == -1 else str(max_output_tokens)
|
|
339
374
|
print(f"🐛 Debug - Configuration details:")
|
|
340
375
|
print(f" Provider: {provider}")
|
|
341
376
|
print(f" Model: {model}")
|
|
342
377
|
print(f" Config source: {config_source}")
|
|
343
|
-
print(f" Max tokens: {
|
|
344
|
-
print(f" Max output tokens: {
|
|
378
|
+
print(f" Max tokens: {max_tokens_display}")
|
|
379
|
+
print(f" Max output tokens: {max_output_display}")
|
|
345
380
|
print(f" Chunk size: {args.chunk_size}")
|
|
346
381
|
print(f" Timeout: {args.timeout}")
|
|
347
382
|
print(f" Style: {args.style}")
|
|
@@ -349,12 +384,19 @@ Default model setup:
|
|
|
349
384
|
print(f" Focus: {args.focus}")
|
|
350
385
|
|
|
351
386
|
try:
|
|
352
|
-
|
|
387
|
+
# When using auto mode (-1), don't pass to create_llm (let provider use defaults)
|
|
388
|
+
llm_kwargs = {'timeout': args.timeout}
|
|
389
|
+
if max_tokens != -1:
|
|
390
|
+
llm_kwargs['max_tokens'] = max_tokens
|
|
391
|
+
if max_output_tokens != -1:
|
|
392
|
+
llm_kwargs['max_output_tokens'] = max_output_tokens
|
|
393
|
+
|
|
394
|
+
llm = create_llm(provider, model=model, **llm_kwargs)
|
|
353
395
|
summarizer = BasicSummarizer(
|
|
354
396
|
llm,
|
|
355
397
|
max_chunk_size=args.chunk_size,
|
|
356
398
|
max_tokens=max_tokens,
|
|
357
|
-
max_output_tokens=
|
|
399
|
+
max_output_tokens=max_output_tokens,
|
|
358
400
|
timeout=args.timeout
|
|
359
401
|
)
|
|
360
402
|
except Exception as e:
|
|
@@ -20,6 +20,41 @@ _model_capabilities: Optional[Dict[str, Any]] = None
|
|
|
20
20
|
# Cache for resolved model names and architectures to reduce redundant logging
|
|
21
21
|
_resolved_aliases_cache: Dict[str, str] = {}
|
|
22
22
|
_detected_architectures_cache: Dict[str, str] = {}
|
|
23
|
+
# Cache to avoid repeating default-capabilities warnings for the same unknown model.
|
|
24
|
+
_default_capabilities_warning_cache: set[str] = set()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Some callers pass provider/model as a single string (e.g. "lmstudio/qwen/qwen3-next-80b").
|
|
28
|
+
# For capability lookup we want the underlying model id, not the provider prefix.
|
|
29
|
+
_KNOWN_PROVIDER_PREFIXES = {
|
|
30
|
+
"anthropic",
|
|
31
|
+
"azure",
|
|
32
|
+
"bedrock",
|
|
33
|
+
"fireworks",
|
|
34
|
+
"gemini",
|
|
35
|
+
"google",
|
|
36
|
+
"groq",
|
|
37
|
+
"huggingface",
|
|
38
|
+
"lmstudio",
|
|
39
|
+
"local",
|
|
40
|
+
"mlx",
|
|
41
|
+
"nvidia",
|
|
42
|
+
"ollama",
|
|
43
|
+
"openai",
|
|
44
|
+
"openai-compatible",
|
|
45
|
+
"together",
|
|
46
|
+
"vllm",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _strip_provider_prefix(model_name: str) -> str:
|
|
51
|
+
s = str(model_name or "").strip()
|
|
52
|
+
if not s or "/" not in s:
|
|
53
|
+
return s
|
|
54
|
+
head, rest = s.split("/", 1)
|
|
55
|
+
if head.strip().lower() in _KNOWN_PROVIDER_PREFIXES and rest.strip():
|
|
56
|
+
return rest.strip()
|
|
57
|
+
return s
|
|
23
58
|
|
|
24
59
|
|
|
25
60
|
def _load_json_assets():
|
|
@@ -72,16 +107,36 @@ def detect_architecture(model_name: str) -> str:
|
|
|
72
107
|
_detected_architectures_cache[model_name] = "generic"
|
|
73
108
|
return "generic"
|
|
74
109
|
|
|
75
|
-
|
|
110
|
+
# Normalize model names for better pattern matching:
|
|
111
|
+
# - HuggingFace cache names use `--` as `/` separators (models--org--name).
|
|
112
|
+
# - Claude versions sometimes appear as `claude-3-5-sonnet` (normalize to `claude-3.5-sonnet`).
|
|
113
|
+
model_lower = model_name.lower().replace("--", "/")
|
|
114
|
+
import re
|
|
115
|
+
model_lower = re.sub(r'(claude-\d+)-(\d+)(?=-|$)', r'\1.\2', model_lower)
|
|
116
|
+
|
|
117
|
+
# Choose the most specific matching architecture.
|
|
118
|
+
# Many architectures include broad patterns (e.g. "gpt") that can accidentally
|
|
119
|
+
# match more specific models (e.g. "gpt-oss"). We resolve this by selecting the
|
|
120
|
+
# longest matching pattern across all architectures.
|
|
121
|
+
best_arch = "generic"
|
|
122
|
+
best_pattern = ""
|
|
76
123
|
|
|
77
|
-
# Check each architecture's patterns
|
|
78
124
|
for arch_name, arch_config in _architecture_formats["architectures"].items():
|
|
79
125
|
patterns = arch_config.get("patterns", [])
|
|
80
126
|
for pattern in patterns:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
127
|
+
pat = str(pattern).lower()
|
|
128
|
+
if not pat:
|
|
129
|
+
continue
|
|
130
|
+
if pat in model_lower and len(pat) > len(best_pattern):
|
|
131
|
+
best_arch = arch_name
|
|
132
|
+
best_pattern = pat
|
|
133
|
+
|
|
134
|
+
if best_arch != "generic":
|
|
135
|
+
logger.debug(
|
|
136
|
+
f"Detected architecture '{best_arch}' for model '{model_name}' (pattern: '{best_pattern}')"
|
|
137
|
+
)
|
|
138
|
+
_detected_architectures_cache[model_name] = best_arch
|
|
139
|
+
return best_arch
|
|
85
140
|
|
|
86
141
|
# Fallback to generic
|
|
87
142
|
logger.debug(f"No specific architecture detected for '{model_name}', using generic")
|
|
@@ -147,22 +202,69 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
|
|
|
147
202
|
if normalized_model_name != model_name:
|
|
148
203
|
logger.debug(f"Normalized model name '{model_name}' to '{normalized_model_name}'")
|
|
149
204
|
|
|
150
|
-
#
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
205
|
+
# Also support "provider/model" strings by stripping known provider prefixes.
|
|
206
|
+
stripped_model_name = _strip_provider_prefix(model_name)
|
|
207
|
+
stripped_normalized_name = _strip_provider_prefix(normalized_model_name)
|
|
208
|
+
|
|
209
|
+
def _tail(name: str) -> str:
|
|
210
|
+
s = str(name or "").strip()
|
|
211
|
+
if not s or "/" not in s:
|
|
212
|
+
return s
|
|
213
|
+
return s.split("/")[-1].strip()
|
|
214
|
+
|
|
215
|
+
def _candidates(*names: str) -> List[str]:
|
|
216
|
+
out: List[str] = []
|
|
217
|
+
for n in names:
|
|
218
|
+
s = str(n or "").strip()
|
|
219
|
+
if not s:
|
|
220
|
+
continue
|
|
221
|
+
out.append(s)
|
|
222
|
+
t = _tail(s)
|
|
223
|
+
if t and t != s:
|
|
224
|
+
out.append(t)
|
|
225
|
+
# Deduplicate while preserving order
|
|
226
|
+
uniq: List[str] = []
|
|
227
|
+
seen: set[str] = set()
|
|
228
|
+
for s in out:
|
|
229
|
+
if s in seen:
|
|
230
|
+
continue
|
|
231
|
+
seen.add(s)
|
|
232
|
+
uniq.append(s)
|
|
233
|
+
return uniq
|
|
234
|
+
|
|
235
|
+
# Check if any normalized/stripped name is a canonical name.
|
|
236
|
+
for candidate in _candidates(normalized_model_name, stripped_normalized_name, stripped_model_name):
|
|
237
|
+
if candidate in models:
|
|
238
|
+
_resolved_aliases_cache[model_name] = candidate
|
|
239
|
+
return candidate
|
|
240
|
+
|
|
241
|
+
# Check if it's an alias of any model (try both original and normalized).
|
|
242
|
+
# Some JSON entries intentionally share aliases (e.g. base + variant). Prefer the
|
|
243
|
+
# most specific canonical model name deterministically.
|
|
244
|
+
alias_matches: List[str] = []
|
|
156
245
|
for canonical_name, model_info in models.items():
|
|
157
246
|
aliases = model_info.get("aliases", [])
|
|
158
|
-
if
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
247
|
+
if not isinstance(aliases, list) or not aliases:
|
|
248
|
+
continue
|
|
249
|
+
candidates = _candidates(model_name, normalized_model_name, stripped_model_name, stripped_normalized_name)
|
|
250
|
+
alias_set = {str(a).strip().lower() for a in aliases if isinstance(a, str) and str(a).strip()}
|
|
251
|
+
cand_set = {str(c).strip().lower() for c in candidates if isinstance(c, str) and str(c).strip()}
|
|
252
|
+
if alias_set and cand_set and alias_set.intersection(cand_set):
|
|
253
|
+
alias_matches.append(canonical_name)
|
|
254
|
+
|
|
255
|
+
if alias_matches:
|
|
256
|
+
best = max(alias_matches, key=lambda n: (len(str(n)), str(n)))
|
|
257
|
+
logger.debug(f"Resolved alias '{model_name}' to canonical name '{best}'")
|
|
258
|
+
_resolved_aliases_cache[model_name] = best
|
|
259
|
+
return best
|
|
162
260
|
|
|
163
261
|
# Return normalized name if no alias found
|
|
164
|
-
|
|
165
|
-
|
|
262
|
+
fallback = stripped_normalized_name or normalized_model_name
|
|
263
|
+
fallback_tail = _tail(fallback)
|
|
264
|
+
if fallback_tail:
|
|
265
|
+
fallback = fallback_tail
|
|
266
|
+
_resolved_aliases_cache[model_name] = fallback
|
|
267
|
+
return fallback
|
|
166
268
|
|
|
167
269
|
|
|
168
270
|
def get_model_capabilities(model_name: str) -> Dict[str, Any]:
|
|
@@ -199,15 +301,44 @@ def get_model_capabilities(model_name: str) -> Dict[str, Any]:
|
|
|
199
301
|
# Step 3: Try partial matches for common model naming patterns
|
|
200
302
|
# Use canonical_name (which has been normalized) for better matching
|
|
201
303
|
canonical_lower = canonical_name.lower()
|
|
202
|
-
|
|
203
|
-
|
|
304
|
+
candidates_name_in_key: List[tuple[int, int, str]] = []
|
|
305
|
+
candidates_key_in_name: List[tuple[int, str]] = []
|
|
306
|
+
for model_key in models.keys():
|
|
307
|
+
if not isinstance(model_key, str) or not model_key.strip():
|
|
308
|
+
continue
|
|
309
|
+
key_lower = model_key.lower()
|
|
310
|
+
|
|
311
|
+
# Prefer a close "superstring" match where the canonical name is missing a suffix.
|
|
312
|
+
# Example: "qwen3-next-80b" -> "qwen3-next-80b-a3b"
|
|
313
|
+
if canonical_lower and canonical_lower in key_lower:
|
|
314
|
+
extra = max(0, len(key_lower) - len(canonical_lower))
|
|
315
|
+
candidates_name_in_key.append((extra, len(key_lower), model_key))
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
# Otherwise, prefer the most specific substring match (e.g. provider/model prefixes).
|
|
319
|
+
if key_lower in canonical_lower:
|
|
320
|
+
candidates_key_in_name.append((len(key_lower), model_key))
|
|
321
|
+
|
|
322
|
+
best_key: Optional[str] = None
|
|
323
|
+
best_mode: Optional[str] = None
|
|
324
|
+
if candidates_name_in_key:
|
|
325
|
+
candidates_name_in_key.sort(key=lambda x: (x[0], -x[1]))
|
|
326
|
+
best_key = candidates_name_in_key[0][2]
|
|
327
|
+
best_mode = "name_in_key"
|
|
328
|
+
elif candidates_key_in_name:
|
|
329
|
+
best_key = max(candidates_key_in_name, key=lambda x: x[0])[1]
|
|
330
|
+
best_mode = "key_in_name"
|
|
331
|
+
|
|
332
|
+
if best_key is not None:
|
|
333
|
+
capabilities = models.get(best_key)
|
|
334
|
+
if isinstance(capabilities, dict):
|
|
204
335
|
result = capabilities.copy()
|
|
205
336
|
# Remove alias-specific fields
|
|
206
337
|
result.pop("canonical_name", None)
|
|
207
338
|
result.pop("aliases", None)
|
|
208
339
|
if "architecture" not in result:
|
|
209
340
|
result["architecture"] = detect_architecture(model_name)
|
|
210
|
-
logger.debug(f"Using capabilities from '{
|
|
341
|
+
logger.debug(f"Using capabilities from '{best_key}' for '{model_name}' (partial match: {best_mode})")
|
|
211
342
|
return result
|
|
212
343
|
|
|
213
344
|
# Step 4: Fallback to default capabilities based on architecture
|
|
@@ -215,16 +346,50 @@ def get_model_capabilities(model_name: str) -> Dict[str, Any]:
|
|
|
215
346
|
default_caps = _model_capabilities.get("default_capabilities", {}).copy()
|
|
216
347
|
default_caps["architecture"] = architecture
|
|
217
348
|
|
|
218
|
-
# Enhance defaults based on architecture
|
|
349
|
+
# Enhance defaults based on architecture.
|
|
350
|
+
#
|
|
351
|
+
# NOTE: `architecture_formats.json.tool_format` describes the *prompted transcript syntax*
|
|
352
|
+
# for tool calls (e.g. XML-wrapped, <|tool_call|> blocks, etc). Some architectures/models
|
|
353
|
+
# also support *native tool APIs* (provider-level `tools` payloads) even when their prompted
|
|
354
|
+
# transcript format is non-native. For those cases, architectures can set an explicit
|
|
355
|
+
# `default_tool_support` to avoid relying on tool_format heuristics.
|
|
219
356
|
arch_format = get_architecture_format(architecture)
|
|
220
|
-
|
|
357
|
+
|
|
358
|
+
explicit_support = str(arch_format.get("default_tool_support") or "").strip().lower()
|
|
359
|
+
if explicit_support in {"native", "prompted", "none"}:
|
|
360
|
+
default_caps["tool_support"] = explicit_support
|
|
361
|
+
elif arch_format.get("tool_format") == "native":
|
|
221
362
|
default_caps["tool_support"] = "native"
|
|
222
|
-
elif arch_format.get("tool_format") in ["special_token", "json", "xml", "pythonic"]:
|
|
363
|
+
elif arch_format.get("tool_format") in ["special_token", "json", "xml", "pythonic", "glm_xml"]:
|
|
223
364
|
default_caps["tool_support"] = "prompted"
|
|
224
365
|
else:
|
|
225
366
|
default_caps["tool_support"] = "none"
|
|
226
367
|
|
|
368
|
+
# Propagate architecture-level output wrappers into default capabilities.
|
|
369
|
+
wrappers = arch_format.get("output_wrappers")
|
|
370
|
+
if isinstance(wrappers, dict) and wrappers:
|
|
371
|
+
default_caps["output_wrappers"] = dict(wrappers)
|
|
372
|
+
|
|
227
373
|
logger.debug(f"Using default capabilities for '{model_name}' (architecture: {architecture})")
|
|
374
|
+
|
|
375
|
+
# Emit a one-time warning for unknown models to keep model_capabilities.json up to date.
|
|
376
|
+
try:
|
|
377
|
+
raw_name = str(model_name).strip()
|
|
378
|
+
except Exception:
|
|
379
|
+
raw_name = ""
|
|
380
|
+
|
|
381
|
+
if raw_name and raw_name not in _default_capabilities_warning_cache:
|
|
382
|
+
_default_capabilities_warning_cache.add(raw_name)
|
|
383
|
+
logger.warning(
|
|
384
|
+
"Model not found in model_capabilities.json; falling back to architecture defaults",
|
|
385
|
+
model_name=raw_name,
|
|
386
|
+
detected_architecture=architecture,
|
|
387
|
+
default_tool_support=default_caps.get("tool_support"),
|
|
388
|
+
next_steps=(
|
|
389
|
+
"Add this model (or an alias) to abstractcore/abstractcore/assets/model_capabilities.json "
|
|
390
|
+
"or email contact@abstractcore.ai with the exact model id and provider."
|
|
391
|
+
),
|
|
392
|
+
)
|
|
228
393
|
return default_caps
|
|
229
394
|
|
|
230
395
|
|
|
@@ -539,4 +704,4 @@ def check_vision_model_compatibility(model_name: str, provider: str = None) -> D
|
|
|
539
704
|
result['warnings'].append("No max_image_tokens specified")
|
|
540
705
|
result['recommendations'].append("Add max_image_tokens to model capabilities")
|
|
541
706
|
|
|
542
|
-
return result
|
|
707
|
+
return result
|
|
@@ -88,7 +88,7 @@
|
|
|
88
88
|
"patterns": ["qwen3-next"]
|
|
89
89
|
},
|
|
90
90
|
"qwen3_vl": {
|
|
91
|
-
"description": "Alibaba's Qwen3-VL multimodal architecture (
|
|
91
|
+
"description": "Alibaba's Qwen3-VL multimodal architecture (May 2025)",
|
|
92
92
|
"message_format": "im_start_end",
|
|
93
93
|
"system_prefix": "<|im_start|>system\n",
|
|
94
94
|
"system_suffix": "<|im_end|>\n",
|
|
@@ -96,8 +96,20 @@
|
|
|
96
96
|
"user_suffix": "<|im_end|>\n",
|
|
97
97
|
"assistant_prefix": "<|im_start|>assistant\n",
|
|
98
98
|
"assistant_suffix": "<|im_end|>\n",
|
|
99
|
-
"tool_format": "
|
|
100
|
-
"patterns": ["qwen3-vl"]
|
|
99
|
+
"tool_format": "native",
|
|
100
|
+
"patterns": ["qwen3-vl-2b", "qwen3-vl-4b", "qwen3-vl-8b"]
|
|
101
|
+
},
|
|
102
|
+
"qwen3_vl_moe": {
|
|
103
|
+
"description": "Alibaba's Qwen3-VL MoE multimodal architecture (May 2025)",
|
|
104
|
+
"message_format": "im_start_end",
|
|
105
|
+
"system_prefix": "<|im_start|>system\n",
|
|
106
|
+
"system_suffix": "<|im_end|>\n",
|
|
107
|
+
"user_prefix": "<|im_start|>user\n",
|
|
108
|
+
"user_suffix": "<|im_end|>\n",
|
|
109
|
+
"assistant_prefix": "<|im_start|>assistant\n",
|
|
110
|
+
"assistant_suffix": "<|im_end|>\n",
|
|
111
|
+
"tool_format": "native",
|
|
112
|
+
"patterns": ["qwen3-vl-30b-a3b", "qwen3-vl-235b-a22b"]
|
|
101
113
|
},
|
|
102
114
|
"qwen3_moe": {
|
|
103
115
|
"description": "Alibaba's Qwen3 MoE architecture (April 2025)",
|
|
@@ -211,6 +223,30 @@
|
|
|
211
223
|
"tool_format": "native",
|
|
212
224
|
"patterns": ["codestral"]
|
|
213
225
|
},
|
|
226
|
+
"mistral3": {
|
|
227
|
+
"description": "Mistral AI Mistral 3 architecture with rope-scaling (December 2024)",
|
|
228
|
+
"message_format": "inst",
|
|
229
|
+
"system_prefix": "",
|
|
230
|
+
"system_suffix": "\n\n",
|
|
231
|
+
"user_prefix": "[INST] ",
|
|
232
|
+
"user_suffix": " [/INST]",
|
|
233
|
+
"assistant_prefix": "",
|
|
234
|
+
"assistant_suffix": "",
|
|
235
|
+
"tool_format": "native",
|
|
236
|
+
"patterns": ["mistral-small-3", "devstral-small-2"]
|
|
237
|
+
},
|
|
238
|
+
"ministral3": {
|
|
239
|
+
"description": "Mistral AI Ministral 3 architecture (December 2024)",
|
|
240
|
+
"message_format": "inst",
|
|
241
|
+
"system_prefix": "",
|
|
242
|
+
"system_suffix": "\n\n",
|
|
243
|
+
"user_prefix": "[INST] ",
|
|
244
|
+
"user_suffix": " [/INST]",
|
|
245
|
+
"assistant_prefix": "",
|
|
246
|
+
"assistant_suffix": "",
|
|
247
|
+
"tool_format": "native",
|
|
248
|
+
"patterns": ["devstral-2-123b"]
|
|
249
|
+
},
|
|
214
250
|
"phi": {
|
|
215
251
|
"description": "Microsoft's Phi architecture family",
|
|
216
252
|
"message_format": "basic",
|
|
@@ -295,6 +331,26 @@
|
|
|
295
331
|
"tool_format": "none",
|
|
296
332
|
"patterns": ["gemma-2b", "gemma-7b"]
|
|
297
333
|
},
|
|
334
|
+
"glm4v_moe": {
|
|
335
|
+
"description": "Zhipu AI's GLM-4.6V multimodal MoE architecture (May 2025)",
|
|
336
|
+
"message_format": "glm_special_tokens",
|
|
337
|
+
"system_prefix": "<|system|>\n",
|
|
338
|
+
"system_suffix": "\n",
|
|
339
|
+
"user_prefix": "<|user|>\n",
|
|
340
|
+
"user_suffix": "\n",
|
|
341
|
+
"assistant_prefix": "<|assistant|>\n",
|
|
342
|
+
"assistant_suffix": "\n",
|
|
343
|
+
"tool_format": "glm_xml",
|
|
344
|
+
"tool_calling_format": "<tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>",
|
|
345
|
+
"output_wrappers": {
|
|
346
|
+
"start": "<|begin_of_box|>",
|
|
347
|
+
"end": "<|end_of_box|>"
|
|
348
|
+
},
|
|
349
|
+
"thinking_output_field": "reasoning_content",
|
|
350
|
+
"thinking_tags": ["<think>", "</think>"],
|
|
351
|
+
"thinking_control": "/nothink",
|
|
352
|
+
"patterns": ["glm-4.6v", "glm4.6v", "zai-org/glm-4.6v", "glm-4.6v-flash", "glm-4.6v-fp8", "glm-4.6v-flash-fp8"]
|
|
353
|
+
},
|
|
298
354
|
"glm4_moe": {
|
|
299
355
|
"description": "Zhipu AI's GLM-4.5+ MoE architecture (July 2025)",
|
|
300
356
|
"message_format": "im_start_end",
|
|
@@ -306,7 +362,7 @@
|
|
|
306
362
|
"assistant_suffix": "\n",
|
|
307
363
|
"tool_format": "special_token",
|
|
308
364
|
"tool_prefix": "<|tool_call|>",
|
|
309
|
-
"patterns": ["glm-4.
|
|
365
|
+
"patterns": ["glm-4.7", "glm-4.6", "glm-4.5", "glm-4.5-air"]
|
|
310
366
|
},
|
|
311
367
|
"glm4v": {
|
|
312
368
|
"description": "Zhipu AI's GLM-4V multimodal architecture (June 2024)",
|
|
@@ -345,6 +401,19 @@
|
|
|
345
401
|
"tool_prefix": "<|tool_call|>",
|
|
346
402
|
"patterns": ["granite", "granite3.3:2b"]
|
|
347
403
|
},
|
|
404
|
+
"granitemoehybrid": {
|
|
405
|
+
"description": "IBM's Granite 4.0 hybrid MoE architecture with Mamba2 (October 2025)",
|
|
406
|
+
"message_format": "special_tokens",
|
|
407
|
+
"system_prefix": "<|system|>\n",
|
|
408
|
+
"system_suffix": "\n",
|
|
409
|
+
"user_prefix": "<|user|>\n",
|
|
410
|
+
"user_suffix": "\n",
|
|
411
|
+
"assistant_prefix": "<|assistant|>\n",
|
|
412
|
+
"assistant_suffix": "\n",
|
|
413
|
+
"tool_format": "native",
|
|
414
|
+
"tool_prefix": "<|tool_call|>",
|
|
415
|
+
"patterns": ["granite-4.0-h", "granite-h-tiny", "granite-h-micro"]
|
|
416
|
+
},
|
|
348
417
|
"deepseek": {
|
|
349
418
|
"description": "DeepSeek architecture family",
|
|
350
419
|
"message_format": "im_start_end",
|
|
@@ -357,6 +426,32 @@
|
|
|
357
426
|
"tool_format": "json",
|
|
358
427
|
"patterns": ["deepseek"]
|
|
359
428
|
},
|
|
429
|
+
"minimax_m2_1": {
|
|
430
|
+
"description": "MiniMax M2.1 enhanced MoE architecture with improved coding and agentic capabilities (December 2024)",
|
|
431
|
+
"message_format": "im_start_end",
|
|
432
|
+
"system_prefix": "<|im_start|>system\n",
|
|
433
|
+
"system_suffix": "<|im_end|>\n",
|
|
434
|
+
"user_prefix": "<|im_start|>user\n",
|
|
435
|
+
"user_suffix": "<|im_end|>\n",
|
|
436
|
+
"assistant_prefix": "<|im_start|>assistant\n",
|
|
437
|
+
"assistant_suffix": "<|im_end|>\n",
|
|
438
|
+
"tool_format": "native",
|
|
439
|
+
"thinking_format": "<think>...</think>",
|
|
440
|
+
"patterns": ["minimax-m2.1", "minimaxai/minimax-m2.1", "minimax/minimax-m2.1"]
|
|
441
|
+
},
|
|
442
|
+
"minimax_m2": {
|
|
443
|
+
"description": "MiniMax M2 MoE architecture with interleaved thinking (January 2025)",
|
|
444
|
+
"message_format": "im_start_end",
|
|
445
|
+
"system_prefix": "<|im_start|>system\n",
|
|
446
|
+
"system_suffix": "<|im_end|>\n",
|
|
447
|
+
"user_prefix": "<|im_start|>user\n",
|
|
448
|
+
"user_suffix": "<|im_end|>\n",
|
|
449
|
+
"assistant_prefix": "<|im_start|>assistant\n",
|
|
450
|
+
"assistant_suffix": "<|im_end|>\n",
|
|
451
|
+
"tool_format": "native",
|
|
452
|
+
"thinking_format": "<think>...</think>",
|
|
453
|
+
"patterns": ["minimax-m2", "minimaxai/minimax-m2", "minimax/minimax-m2"]
|
|
454
|
+
},
|
|
360
455
|
"seed_oss": {
|
|
361
456
|
"description": "ByteDance's Seed-OSS long-context architecture (August 2025)",
|
|
362
457
|
"message_format": "im_start_end",
|
|
@@ -390,6 +485,7 @@
|
|
|
390
485
|
"user_suffix": "\n",
|
|
391
486
|
"assistant_prefix": "Assistant: ",
|
|
392
487
|
"assistant_suffix": "\n",
|
|
488
|
+
"default_tool_support": "native",
|
|
393
489
|
"tool_format": "xml",
|
|
394
490
|
"patterns": ["claude", "claude-3", "claude-4", "claude-4.1", "claude-4.5"]
|
|
395
491
|
},
|
|
@@ -399,6 +495,32 @@
|
|
|
399
495
|
"tool_format": "openai_functions",
|
|
400
496
|
"patterns": ["gpt", "chatgpt", "gpt-5"]
|
|
401
497
|
},
|
|
498
|
+
"gpt_oss": {
|
|
499
|
+
"description": "OpenAI GPT-OSS open-weight MoE architecture with harmony format (August 2025)",
|
|
500
|
+
"message_format": "harmony",
|
|
501
|
+
"system_prefix": "<|system|>\n",
|
|
502
|
+
"system_suffix": "\n",
|
|
503
|
+
"user_prefix": "<|user|>\n",
|
|
504
|
+
"user_suffix": "\n",
|
|
505
|
+
"assistant_prefix": "<|assistant|>\n",
|
|
506
|
+
"assistant_suffix": "\n",
|
|
507
|
+
"tool_format": "native",
|
|
508
|
+
"reasoning_levels": ["low", "medium", "high"],
|
|
509
|
+
"patterns": ["gpt-oss", "openai/gpt-oss"]
|
|
510
|
+
},
|
|
511
|
+
"nemotron_hybrid_moe": {
|
|
512
|
+
"description": "NVIDIA Nemotron-3-Nano hybrid MoE architecture with Mamba-2 and Attention layers (December 2025)",
|
|
513
|
+
"message_format": "im_start_end",
|
|
514
|
+
"system_prefix": "<|im_start|>system\n",
|
|
515
|
+
"system_suffix": "<|im_end|>\n",
|
|
516
|
+
"user_prefix": "<|im_start|>user\n",
|
|
517
|
+
"user_suffix": "<|im_end|>\n",
|
|
518
|
+
"assistant_prefix": "<|im_start|>assistant\n",
|
|
519
|
+
"assistant_suffix": "<|im_end|>\n",
|
|
520
|
+
"tool_format": "json",
|
|
521
|
+
"reasoning_support": true,
|
|
522
|
+
"patterns": ["nemotron-3-nano", "nemotron-nano", "nvidia/nemotron"]
|
|
523
|
+
},
|
|
402
524
|
"generic": {
|
|
403
525
|
"description": "Generic/unknown architecture fallback",
|
|
404
526
|
"message_format": "basic",
|
|
@@ -420,7 +542,8 @@
|
|
|
420
542
|
"human_assistant": "Human/Assistant format",
|
|
421
543
|
"openai_chat": "OpenAI chat completion format",
|
|
422
544
|
"llama3_header": "LLaMA 3+ format with <|start_header_id|> and <|eot_id|>",
|
|
423
|
-
"glm_special_tokens": "GLM format with <|system|>, <|user|>, <|assistant|> tokens"
|
|
545
|
+
"glm_special_tokens": "GLM format with <|system|>, <|user|>, <|assistant|> tokens",
|
|
546
|
+
"harmony": "OpenAI harmony response format for GPT-OSS models"
|
|
424
547
|
},
|
|
425
548
|
"tool_formats": {
|
|
426
549
|
"pythonic": "Python function call syntax: [func(arg=val)]",
|
|
@@ -432,4 +555,4 @@
|
|
|
432
555
|
"prompted": "Tool use through careful prompting",
|
|
433
556
|
"none": "No tool support"
|
|
434
557
|
}
|
|
435
|
-
}
|
|
558
|
+
}
|