abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +781 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +31 -19
- abstractcore/config/manager.py +389 -11
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +35 -923
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +461 -13
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.2.dist-info/METADATA +562 -0
- abstractcore-2.11.2.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ Structured output handler for managing schema-based LLM responses.
|
|
|
5
5
|
import json
|
|
6
6
|
import re
|
|
7
7
|
import time
|
|
8
|
-
from typing import Type, Dict, Any, Optional
|
|
8
|
+
from typing import Type, Dict, Any, Optional, get_args, get_origin
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from pydantic import BaseModel, ValidationError
|
|
11
11
|
|
|
@@ -15,6 +15,64 @@ from ..utils.self_fixes import fix_json
|
|
|
15
15
|
from ..events import EventType, emit_global, create_structured_output_event
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
def _coerce_single_list_wrapper(data: Any, *, response_model: Type[BaseModel]) -> Any:
|
|
19
|
+
"""Repair common wrapper-shape drift for list-centric schemas.
|
|
20
|
+
|
|
21
|
+
Some servers/models will emit either:
|
|
22
|
+
- the list itself (instead of the object wrapper)
|
|
23
|
+
- a single list item (instead of the wrapper with a 1-element list)
|
|
24
|
+
|
|
25
|
+
When the response model is an object with exactly one list field, we can safely
|
|
26
|
+
coerce these shapes back into the expected wrapper.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
fields = getattr(response_model, "model_fields", None)
|
|
31
|
+
if not isinstance(fields, dict) or len(fields) != 1:
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
(field_name, field_info), = list(fields.items())
|
|
35
|
+
if not isinstance(field_name, str) or not field_name:
|
|
36
|
+
return data
|
|
37
|
+
|
|
38
|
+
# If the model already returned the expected wrapper field, keep as-is.
|
|
39
|
+
if isinstance(data, dict) and field_name in data:
|
|
40
|
+
return data
|
|
41
|
+
|
|
42
|
+
annotation = getattr(field_info, "annotation", None)
|
|
43
|
+
origin = get_origin(annotation)
|
|
44
|
+
args = get_args(annotation)
|
|
45
|
+
if origin is not list or not args:
|
|
46
|
+
return data
|
|
47
|
+
|
|
48
|
+
item_type = args[0]
|
|
49
|
+
item_model_fields = getattr(item_type, "model_fields", None) if isinstance(item_type, type) else None
|
|
50
|
+
required: set[str] = set()
|
|
51
|
+
if isinstance(item_model_fields, dict) and item_model_fields:
|
|
52
|
+
for k, v in item_model_fields.items():
|
|
53
|
+
if isinstance(k, str) and k and hasattr(v, "is_required") and v.is_required():
|
|
54
|
+
required.add(k)
|
|
55
|
+
|
|
56
|
+
# If we got a list of items, wrap it.
|
|
57
|
+
if isinstance(data, list):
|
|
58
|
+
return {field_name: data}
|
|
59
|
+
|
|
60
|
+
# If we got a single item-like object, wrap it.
|
|
61
|
+
if isinstance(data, dict):
|
|
62
|
+
# When the item type is a BaseModel, only coerce if required fields match.
|
|
63
|
+
if required:
|
|
64
|
+
if required.issubset(set(data.keys())):
|
|
65
|
+
return {field_name: [data]}
|
|
66
|
+
return data
|
|
67
|
+
# Otherwise (e.g. list[dict]), we can't validate shape here; still, this is a
|
|
68
|
+
# safe coercion when the response model is a 1-field list wrapper.
|
|
69
|
+
return {field_name: [data]}
|
|
70
|
+
except Exception:
|
|
71
|
+
return data
|
|
72
|
+
|
|
73
|
+
return data
|
|
74
|
+
|
|
75
|
+
|
|
18
76
|
class StructuredOutputHandler:
|
|
19
77
|
"""
|
|
20
78
|
Handles structured output generation using two strategies:
|
|
@@ -189,41 +247,151 @@ class StructuredOutputHandler:
|
|
|
189
247
|
Returns:
|
|
190
248
|
Validated instance of response_model
|
|
191
249
|
"""
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
250
|
+
def _is_truncated(resp: Any) -> bool:
|
|
251
|
+
fr = getattr(resp, "finish_reason", None)
|
|
252
|
+
fr_str = str(fr or "").strip().lower()
|
|
253
|
+
return fr_str in {"length", "max_tokens", "max_output_tokens"}
|
|
254
|
+
|
|
255
|
+
def _bump_max_output_tokens(current_kwargs: dict) -> dict:
|
|
256
|
+
updated = dict(current_kwargs)
|
|
257
|
+
raw = updated.get("max_output_tokens")
|
|
258
|
+
if raw is None:
|
|
259
|
+
raw = updated.get("max_tokens")
|
|
260
|
+
cur = 0
|
|
261
|
+
if raw is not None and not isinstance(raw, bool):
|
|
262
|
+
try:
|
|
263
|
+
cur = int(raw)
|
|
264
|
+
except Exception:
|
|
265
|
+
cur = 0
|
|
266
|
+
if cur <= 0:
|
|
267
|
+
try:
|
|
268
|
+
cur = int(getattr(provider, "max_output_tokens", 0) or 0)
|
|
269
|
+
except Exception:
|
|
270
|
+
cur = 0
|
|
271
|
+
if cur <= 0:
|
|
272
|
+
cur = 512
|
|
273
|
+
|
|
274
|
+
# Prefer geometric growth; also add a fixed floor so small values ramp quickly.
|
|
275
|
+
bumped = max(cur * 2, cur + 500)
|
|
276
|
+
cap = 0
|
|
277
|
+
try:
|
|
278
|
+
# Use model capabilities (not provider defaults) to avoid accidental hard caps.
|
|
279
|
+
from ..architectures.detection import get_context_limits
|
|
280
|
+
|
|
281
|
+
model_name = getattr(provider, "model", None)
|
|
282
|
+
limits = get_context_limits(str(model_name or ""))
|
|
283
|
+
cap = int(limits.get("max_output_tokens") or 0)
|
|
284
|
+
except Exception:
|
|
285
|
+
cap = 0
|
|
286
|
+
if cap <= 0:
|
|
287
|
+
cap = 1_000_000
|
|
288
|
+
updated["max_output_tokens"] = min(bumped, cap)
|
|
289
|
+
# Avoid ambiguity when both keys are present.
|
|
290
|
+
updated.pop("max_tokens", None)
|
|
291
|
+
return updated
|
|
292
|
+
|
|
293
|
+
last_error: Exception | None = None
|
|
294
|
+
attempt_kwargs = dict(kwargs)
|
|
295
|
+
def _coerce_boolish(value: Any) -> bool:
|
|
296
|
+
if isinstance(value, bool):
|
|
297
|
+
return bool(value)
|
|
298
|
+
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
299
|
+
return float(value) != 0.0
|
|
300
|
+
if isinstance(value, str):
|
|
301
|
+
return value.strip().lower() in {"1", "true", "yes", "y", "on"}
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
allow_truncation_raw = attempt_kwargs.pop("allow_truncation", None)
|
|
305
|
+
if allow_truncation_raw is None:
|
|
306
|
+
allow_truncation_raw = attempt_kwargs.pop("allow_truncated", None)
|
|
307
|
+
allow_truncation = _coerce_boolish(allow_truncation_raw) if allow_truncation_raw is not None else False
|
|
308
|
+
|
|
309
|
+
max_attempts = int(getattr(self.retry_strategy, "max_attempts", 3) or 3)
|
|
310
|
+
for attempt in range(1, max_attempts + 1):
|
|
311
|
+
response = provider._generate_internal(
|
|
312
|
+
prompt=prompt,
|
|
313
|
+
response_model=response_model,
|
|
314
|
+
**attempt_kwargs,
|
|
315
|
+
)
|
|
199
316
|
|
|
200
|
-
# For native support, the response content should already be structured
|
|
201
|
-
if isinstance(response.content, dict):
|
|
202
|
-
return response_model.model_validate(response.content)
|
|
203
|
-
else:
|
|
204
|
-
# Parse JSON string
|
|
205
317
|
try:
|
|
318
|
+
if isinstance(response.content, dict):
|
|
319
|
+
validated = response_model.model_validate(
|
|
320
|
+
_coerce_single_list_wrapper(response.content, response_model=response_model)
|
|
321
|
+
)
|
|
322
|
+
if _is_truncated(response) and not allow_truncation:
|
|
323
|
+
if attempt < max_attempts:
|
|
324
|
+
bumped = _bump_max_output_tokens(attempt_kwargs)
|
|
325
|
+
self.logger.warning(
|
|
326
|
+
"Structured output truncated; retrying with higher max_output_tokens",
|
|
327
|
+
finish_reason=str(getattr(response, "finish_reason", None)),
|
|
328
|
+
attempt=attempt,
|
|
329
|
+
max_output_tokens=attempt_kwargs.get("max_output_tokens") or attempt_kwargs.get("max_tokens"),
|
|
330
|
+
next_max_output_tokens=bumped.get("max_output_tokens"),
|
|
331
|
+
)
|
|
332
|
+
attempt_kwargs = bumped
|
|
333
|
+
continue
|
|
334
|
+
raise RuntimeError("Structured output was truncated (finish_reason=length). Increase max_output_tokens.")
|
|
335
|
+
return validated
|
|
336
|
+
|
|
206
337
|
data = json.loads(response.content)
|
|
207
|
-
|
|
338
|
+
validated = response_model.model_validate(_coerce_single_list_wrapper(data, response_model=response_model))
|
|
339
|
+
if _is_truncated(response) and not allow_truncation:
|
|
340
|
+
if attempt < max_attempts:
|
|
341
|
+
bumped = _bump_max_output_tokens(attempt_kwargs)
|
|
342
|
+
self.logger.warning(
|
|
343
|
+
"Structured output truncated; retrying with higher max_output_tokens",
|
|
344
|
+
finish_reason=str(getattr(response, "finish_reason", None)),
|
|
345
|
+
attempt=attempt,
|
|
346
|
+
max_output_tokens=attempt_kwargs.get("max_output_tokens") or attempt_kwargs.get("max_tokens"),
|
|
347
|
+
next_max_output_tokens=bumped.get("max_output_tokens"),
|
|
348
|
+
)
|
|
349
|
+
attempt_kwargs = bumped
|
|
350
|
+
continue
|
|
351
|
+
raise RuntimeError("Structured output was truncated (finish_reason=length). Increase max_output_tokens.")
|
|
352
|
+
return validated
|
|
208
353
|
except (json.JSONDecodeError, ValidationError) as e:
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
354
|
+
last_error = e
|
|
355
|
+
|
|
356
|
+
if _is_truncated(response) and attempt < max_attempts:
|
|
357
|
+
bumped = _bump_max_output_tokens(attempt_kwargs)
|
|
358
|
+
self.logger.warning(
|
|
359
|
+
"Structured output truncated; retrying with higher max_output_tokens",
|
|
360
|
+
finish_reason=str(getattr(response, "finish_reason", None)),
|
|
361
|
+
attempt=attempt,
|
|
362
|
+
max_output_tokens=attempt_kwargs.get("max_output_tokens") or attempt_kwargs.get("max_tokens"),
|
|
363
|
+
next_max_output_tokens=bumped.get("max_output_tokens"),
|
|
364
|
+
)
|
|
365
|
+
attempt_kwargs = bumped
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
fixed_json = None
|
|
369
|
+
try:
|
|
370
|
+
fixed_json = fix_json(response.content)
|
|
371
|
+
except Exception:
|
|
372
|
+
fixed_json = None
|
|
373
|
+
|
|
215
374
|
if fixed_json:
|
|
216
375
|
try:
|
|
217
376
|
data = json.loads(fixed_json)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
377
|
+
validated = response_model.model_validate(
|
|
378
|
+
_coerce_single_list_wrapper(data, response_model=response_model)
|
|
379
|
+
)
|
|
380
|
+
if _is_truncated(response) and not allow_truncation:
|
|
381
|
+
raise RuntimeError(
|
|
382
|
+
"Structured output was truncated (finish_reason=length) and only repaired via JSON self-fix. "
|
|
383
|
+
"Increase max_output_tokens."
|
|
384
|
+
)
|
|
385
|
+
return validated
|
|
386
|
+
except (json.JSONDecodeError, ValidationError):
|
|
387
|
+
pass
|
|
388
|
+
|
|
389
|
+
# Non-truncation failures can still happen even in native mode; fall back to prompted.
|
|
225
390
|
return self._generate_prompted(provider, prompt, response_model, **kwargs)
|
|
226
391
|
|
|
392
|
+
assert last_error is not None
|
|
393
|
+
raise last_error
|
|
394
|
+
|
|
227
395
|
def _generate_prompted(
|
|
228
396
|
self,
|
|
229
397
|
provider,
|
|
@@ -253,6 +421,20 @@ class StructuredOutputHandler:
|
|
|
253
421
|
|
|
254
422
|
last_error = None
|
|
255
423
|
current_prompt = enhanced_prompt
|
|
424
|
+
current_kwargs = dict(kwargs)
|
|
425
|
+
def _coerce_boolish(value: Any) -> bool:
|
|
426
|
+
if isinstance(value, bool):
|
|
427
|
+
return bool(value)
|
|
428
|
+
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
429
|
+
return float(value) != 0.0
|
|
430
|
+
if isinstance(value, str):
|
|
431
|
+
return value.strip().lower() in {"1", "true", "yes", "y", "on"}
|
|
432
|
+
return False
|
|
433
|
+
|
|
434
|
+
allow_truncation_raw = current_kwargs.pop("allow_truncation", None)
|
|
435
|
+
if allow_truncation_raw is None:
|
|
436
|
+
allow_truncation_raw = current_kwargs.pop("allow_truncated", None)
|
|
437
|
+
allow_truncation = _coerce_boolish(allow_truncation_raw) if allow_truncation_raw is not None else False
|
|
256
438
|
|
|
257
439
|
for attempt in range(1, self.retry_strategy.max_attempts + 1):
|
|
258
440
|
attempt_start_time = time.time()
|
|
@@ -269,7 +451,7 @@ class StructuredOutputHandler:
|
|
|
269
451
|
# Generate response
|
|
270
452
|
response = provider._generate_internal(
|
|
271
453
|
prompt=current_prompt,
|
|
272
|
-
**
|
|
454
|
+
**current_kwargs
|
|
273
455
|
)
|
|
274
456
|
|
|
275
457
|
# Extract and validate JSON
|
|
@@ -281,7 +463,7 @@ class StructuredOutputHandler:
|
|
|
281
463
|
# Preprocess enum responses if we have mappings
|
|
282
464
|
if hasattr(self, '_enum_mappings') and self._enum_mappings:
|
|
283
465
|
data = self._preprocess_enum_response(data, self._enum_mappings)
|
|
284
|
-
result = response_model.model_validate(data)
|
|
466
|
+
result = response_model.model_validate(_coerce_single_list_wrapper(data, response_model=response_model))
|
|
285
467
|
except (json.JSONDecodeError, ValidationError) as parse_error:
|
|
286
468
|
# Try to fix the JSON
|
|
287
469
|
self.logger.debug("JSON parsing failed, attempting self-fix",
|
|
@@ -296,7 +478,9 @@ class StructuredOutputHandler:
|
|
|
296
478
|
# Preprocess enum responses if we have mappings
|
|
297
479
|
if hasattr(self, '_enum_mappings') and self._enum_mappings:
|
|
298
480
|
data = self._preprocess_enum_response(data, self._enum_mappings)
|
|
299
|
-
result = response_model.model_validate(
|
|
481
|
+
result = response_model.model_validate(
|
|
482
|
+
_coerce_single_list_wrapper(data, response_model=response_model)
|
|
483
|
+
)
|
|
300
484
|
self.logger.info("JSON self-fix successful", attempt=attempt + 1)
|
|
301
485
|
except (json.JSONDecodeError, ValidationError) as fix_error:
|
|
302
486
|
self.logger.debug("Self-fix failed", error=str(fix_error), attempt=attempt + 1)
|
|
@@ -309,6 +493,52 @@ class StructuredOutputHandler:
|
|
|
309
493
|
# Note: VALIDATION_SUCCEEDED event removed in simplification
|
|
310
494
|
# Success is indicated by successfully parsing the response
|
|
311
495
|
|
|
496
|
+
finish_reason = str(getattr(response, "finish_reason", "") or "").strip().lower()
|
|
497
|
+
is_truncated = finish_reason in {"length", "max_tokens", "max_output_tokens"}
|
|
498
|
+
if is_truncated and not allow_truncation:
|
|
499
|
+
if attempt < self.retry_strategy.max_attempts:
|
|
500
|
+
raw = current_kwargs.get("max_output_tokens")
|
|
501
|
+
if raw is None:
|
|
502
|
+
raw = current_kwargs.get("max_tokens")
|
|
503
|
+
cur = 0
|
|
504
|
+
if raw is not None and not isinstance(raw, bool):
|
|
505
|
+
try:
|
|
506
|
+
cur = int(raw)
|
|
507
|
+
except Exception:
|
|
508
|
+
cur = 0
|
|
509
|
+
if cur <= 0:
|
|
510
|
+
try:
|
|
511
|
+
cur = int(getattr(provider, "max_output_tokens", 0) or 0)
|
|
512
|
+
except Exception:
|
|
513
|
+
cur = 0
|
|
514
|
+
if cur <= 0:
|
|
515
|
+
cur = 512
|
|
516
|
+
bumped = max(cur * 2, cur + 500)
|
|
517
|
+
cap = 0
|
|
518
|
+
try:
|
|
519
|
+
from ..architectures.detection import get_context_limits
|
|
520
|
+
|
|
521
|
+
model_name = getattr(provider, "model", None)
|
|
522
|
+
limits = get_context_limits(str(model_name or ""))
|
|
523
|
+
cap = int(limits.get("max_output_tokens") or 0)
|
|
524
|
+
except Exception:
|
|
525
|
+
cap = 0
|
|
526
|
+
if cap <= 0:
|
|
527
|
+
cap = 1_000_000
|
|
528
|
+
next_budget = min(bumped, cap)
|
|
529
|
+
self.logger.warning(
|
|
530
|
+
"Structured output truncated; retrying with higher max_output_tokens",
|
|
531
|
+
finish_reason=finish_reason,
|
|
532
|
+
attempt=attempt,
|
|
533
|
+
max_output_tokens=current_kwargs.get("max_output_tokens") or current_kwargs.get("max_tokens"),
|
|
534
|
+
next_max_output_tokens=next_budget,
|
|
535
|
+
)
|
|
536
|
+
current_kwargs["max_output_tokens"] = next_budget
|
|
537
|
+
current_kwargs.pop("max_tokens", None)
|
|
538
|
+
current_prompt = enhanced_prompt
|
|
539
|
+
continue
|
|
540
|
+
raise RuntimeError("Structured output was truncated (finish_reason=length). Increase max_output_tokens.")
|
|
541
|
+
|
|
312
542
|
# Log successful validation
|
|
313
543
|
self.logger.info("Validation attempt succeeded",
|
|
314
544
|
provider=provider_name,
|
|
@@ -352,6 +582,51 @@ class StructuredOutputHandler:
|
|
|
352
582
|
validation_success=False)
|
|
353
583
|
|
|
354
584
|
# Check if we should retry
|
|
585
|
+
finish_reason = str(getattr(response, "finish_reason", "") or "").strip().lower()
|
|
586
|
+
is_truncated = finish_reason in {"length", "max_tokens", "max_output_tokens"}
|
|
587
|
+
if is_truncated and attempt < self.retry_strategy.max_attempts:
|
|
588
|
+
raw = current_kwargs.get("max_output_tokens")
|
|
589
|
+
if raw is None:
|
|
590
|
+
raw = current_kwargs.get("max_tokens")
|
|
591
|
+
cur = 0
|
|
592
|
+
if raw is not None and not isinstance(raw, bool):
|
|
593
|
+
try:
|
|
594
|
+
cur = int(raw)
|
|
595
|
+
except Exception:
|
|
596
|
+
cur = 0
|
|
597
|
+
if cur <= 0:
|
|
598
|
+
try:
|
|
599
|
+
cur = int(getattr(provider, "max_output_tokens", 0) or 0)
|
|
600
|
+
except Exception:
|
|
601
|
+
cur = 0
|
|
602
|
+
if cur <= 0:
|
|
603
|
+
cur = 512
|
|
604
|
+
bumped = max(cur * 2, cur + 500)
|
|
605
|
+
cap = 0
|
|
606
|
+
try:
|
|
607
|
+
from ..architectures.detection import get_context_limits
|
|
608
|
+
|
|
609
|
+
model_name = getattr(provider, "model", None)
|
|
610
|
+
limits = get_context_limits(str(model_name or ""))
|
|
611
|
+
cap = int(limits.get("max_output_tokens") or 0)
|
|
612
|
+
except Exception:
|
|
613
|
+
cap = 0
|
|
614
|
+
if cap <= 0:
|
|
615
|
+
cap = 1_000_000
|
|
616
|
+
next_budget = min(bumped, cap)
|
|
617
|
+
self.logger.warning(
|
|
618
|
+
"Structured output truncated; retrying with higher max_output_tokens",
|
|
619
|
+
finish_reason=finish_reason,
|
|
620
|
+
attempt=attempt,
|
|
621
|
+
max_output_tokens=current_kwargs.get("max_output_tokens") or current_kwargs.get("max_tokens"),
|
|
622
|
+
next_max_output_tokens=next_budget,
|
|
623
|
+
)
|
|
624
|
+
current_kwargs["max_output_tokens"] = next_budget
|
|
625
|
+
current_kwargs.pop("max_tokens", None)
|
|
626
|
+
# Keep the base prompt stable: appending more text makes truncation more likely.
|
|
627
|
+
current_prompt = enhanced_prompt
|
|
628
|
+
continue
|
|
629
|
+
|
|
355
630
|
if self.retry_strategy.should_retry(attempt, e):
|
|
356
631
|
# Note: RETRY_ATTEMPTED event removed in simplification
|
|
357
632
|
# Retry logic tracked through VALIDATION_FAILED event with attempt number
|
|
@@ -405,18 +680,18 @@ class StructuredOutputHandler:
|
|
|
405
680
|
# Create example from schema
|
|
406
681
|
example = self._create_example_from_schema(schema)
|
|
407
682
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
Example format:
|
|
415
|
-
{json.dumps(example, indent=2)}
|
|
683
|
+
schema_block = (
|
|
684
|
+
f"Please respond with valid JSON that matches this exact schema for {model_name}:\n\n"
|
|
685
|
+
f"{json.dumps(schema, indent=2)}\n\n"
|
|
686
|
+
f"Example format:\n{json.dumps(example, indent=2)}\n\n"
|
|
687
|
+
"Important: Return ONLY the JSON object, no additional text or formatting."
|
|
688
|
+
)
|
|
416
689
|
|
|
417
|
-
|
|
690
|
+
marker = "<<STRUCTURED_OUTPUT_SCHEMA>>"
|
|
691
|
+
if marker in prompt:
|
|
692
|
+
return prompt.replace(marker, schema_block, 1)
|
|
418
693
|
|
|
419
|
-
return
|
|
694
|
+
return f"{prompt}\n\n{schema_block}"
|
|
420
695
|
|
|
421
696
|
def _create_example_from_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
422
697
|
"""
|
|
@@ -592,4 +867,4 @@ Important: Return ONLY the JSON object, no additional text or formatting."""
|
|
|
592
867
|
return [convert_enum_values(item, path) for item in obj]
|
|
593
868
|
return obj
|
|
594
869
|
|
|
595
|
-
return convert_enum_values(data)
|
|
870
|
+
return convert_enum_values(data)
|