prompture 0.0.29.dev8__py3-none-any.whl → 0.0.38.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/__init__.py +264 -23
- prompture/_version.py +34 -0
- prompture/agent.py +924 -0
- prompture/agent_types.py +156 -0
- prompture/aio/__init__.py +74 -0
- prompture/async_agent.py +880 -0
- prompture/async_conversation.py +789 -0
- prompture/async_core.py +803 -0
- prompture/async_driver.py +193 -0
- prompture/async_groups.py +551 -0
- prompture/cache.py +469 -0
- prompture/callbacks.py +55 -0
- prompture/cli.py +63 -4
- prompture/conversation.py +826 -0
- prompture/core.py +894 -263
- prompture/cost_mixin.py +51 -0
- prompture/discovery.py +187 -0
- prompture/driver.py +206 -5
- prompture/drivers/__init__.py +175 -67
- prompture/drivers/airllm_driver.py +109 -0
- prompture/drivers/async_airllm_driver.py +26 -0
- prompture/drivers/async_azure_driver.py +123 -0
- prompture/drivers/async_claude_driver.py +113 -0
- prompture/drivers/async_google_driver.py +316 -0
- prompture/drivers/async_grok_driver.py +97 -0
- prompture/drivers/async_groq_driver.py +90 -0
- prompture/drivers/async_hugging_driver.py +61 -0
- prompture/drivers/async_lmstudio_driver.py +148 -0
- prompture/drivers/async_local_http_driver.py +44 -0
- prompture/drivers/async_ollama_driver.py +135 -0
- prompture/drivers/async_openai_driver.py +102 -0
- prompture/drivers/async_openrouter_driver.py +102 -0
- prompture/drivers/async_registry.py +133 -0
- prompture/drivers/azure_driver.py +42 -9
- prompture/drivers/claude_driver.py +257 -34
- prompture/drivers/google_driver.py +295 -42
- prompture/drivers/grok_driver.py +35 -32
- prompture/drivers/groq_driver.py +33 -26
- prompture/drivers/hugging_driver.py +6 -6
- prompture/drivers/lmstudio_driver.py +97 -19
- prompture/drivers/local_http_driver.py +6 -6
- prompture/drivers/ollama_driver.py +168 -23
- prompture/drivers/openai_driver.py +184 -9
- prompture/drivers/openrouter_driver.py +37 -25
- prompture/drivers/registry.py +306 -0
- prompture/drivers/vision_helpers.py +153 -0
- prompture/field_definitions.py +106 -96
- prompture/group_types.py +147 -0
- prompture/groups.py +530 -0
- prompture/image.py +180 -0
- prompture/logging.py +80 -0
- prompture/model_rates.py +217 -0
- prompture/persistence.py +254 -0
- prompture/persona.py +482 -0
- prompture/runner.py +49 -47
- prompture/scaffold/__init__.py +1 -0
- prompture/scaffold/generator.py +84 -0
- prompture/scaffold/templates/Dockerfile.j2 +12 -0
- prompture/scaffold/templates/README.md.j2 +41 -0
- prompture/scaffold/templates/config.py.j2 +21 -0
- prompture/scaffold/templates/env.example.j2 +8 -0
- prompture/scaffold/templates/main.py.j2 +86 -0
- prompture/scaffold/templates/models.py.j2 +40 -0
- prompture/scaffold/templates/requirements.txt.j2 +5 -0
- prompture/serialization.py +218 -0
- prompture/server.py +183 -0
- prompture/session.py +117 -0
- prompture/settings.py +19 -1
- prompture/tools.py +219 -267
- prompture/tools_schema.py +254 -0
- prompture/validator.py +3 -3
- prompture-0.0.38.dev2.dist-info/METADATA +369 -0
- prompture-0.0.38.dev2.dist-info/RECORD +77 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/WHEEL +1 -1
- prompture-0.0.29.dev8.dist-info/METADATA +0 -368
- prompture-0.0.29.dev8.dist-info/RECORD +0 -27
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/top_level.txt +0 -0
prompture/tools.py
CHANGED
|
@@ -8,135 +8,54 @@ This module provides utilities for:
|
|
|
8
8
|
3. Exclusive field extraction against Pydantic models
|
|
9
9
|
4. Safe JSON text extraction from messy LLM output
|
|
10
10
|
5. Small parsing helpers (booleans, lists, datetimes)
|
|
11
|
-
6. Lightweight, flexible debug logging with levels
|
|
12
11
|
|
|
13
12
|
Notes:
|
|
14
13
|
- Only standard lib + pydantic + python-dateutil are required.
|
|
15
14
|
- Functions are defensive and avoid raising unless necessary for correctness.
|
|
16
15
|
"""
|
|
16
|
+
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import re
|
|
20
|
-
import sys
|
|
21
19
|
import json
|
|
22
|
-
import
|
|
20
|
+
import logging
|
|
21
|
+
import re
|
|
22
|
+
import uuid
|
|
23
|
+
from collections.abc import Mapping
|
|
24
|
+
from datetime import date, datetime, time, timezone
|
|
23
25
|
from decimal import Decimal, InvalidOperation
|
|
24
|
-
from datetime import date, time, datetime, timezone
|
|
25
|
-
from enum import Enum
|
|
26
26
|
from typing import (
|
|
27
27
|
Any,
|
|
28
|
-
Dict,
|
|
29
|
-
List,
|
|
30
|
-
Optional,
|
|
31
|
-
Type,
|
|
32
28
|
Union,
|
|
33
|
-
get_origin,
|
|
34
29
|
get_args,
|
|
35
|
-
|
|
36
|
-
Mapping,
|
|
37
|
-
Tuple,
|
|
30
|
+
get_origin,
|
|
38
31
|
)
|
|
39
|
-
import uuid
|
|
40
32
|
|
|
41
33
|
import dateutil.parser
|
|
42
34
|
from pydantic import BaseModel
|
|
43
35
|
from tukuy import TukuyTransformer
|
|
44
36
|
|
|
37
|
+
logger = logging.getLogger("prompture.tools")
|
|
38
|
+
|
|
45
39
|
# Initialize Tukuy transformer
|
|
46
40
|
TUKUY = TukuyTransformer()
|
|
47
41
|
|
|
48
42
|
__all__ = [
|
|
49
|
-
"
|
|
43
|
+
"as_list",
|
|
44
|
+
"clean_json_text",
|
|
45
|
+
"clean_toon_text",
|
|
50
46
|
"convert_value",
|
|
47
|
+
"create_field_schema",
|
|
51
48
|
"extract_fields",
|
|
52
|
-
"
|
|
49
|
+
"get_field_default",
|
|
50
|
+
"get_type_default",
|
|
51
|
+
"load_field_definitions",
|
|
53
52
|
"parse_boolean",
|
|
54
53
|
"parse_datetime",
|
|
55
|
-
"
|
|
56
|
-
"clean_json_text",
|
|
57
|
-
"log_debug",
|
|
58
|
-
"LogLevel",
|
|
59
|
-
"load_field_definitions",
|
|
54
|
+
"parse_shorthand_number",
|
|
60
55
|
"validate_field_definition",
|
|
61
|
-
"get_type_default",
|
|
62
|
-
"get_field_default",
|
|
63
56
|
]
|
|
64
57
|
|
|
65
58
|
|
|
66
|
-
# ---------------------------------------------------------------------------
|
|
67
|
-
# Logging
|
|
68
|
-
# ---------------------------------------------------------------------------
|
|
69
|
-
|
|
70
|
-
class LogLevel(int, Enum):
|
|
71
|
-
OFF = 1000
|
|
72
|
-
ERROR = 40
|
|
73
|
-
WARN = 30
|
|
74
|
-
INFO = 20
|
|
75
|
-
DEBUG = 10
|
|
76
|
-
TRACE = 5 # very verbose
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def log_debug(
|
|
80
|
-
level: int | LogLevel,
|
|
81
|
-
current_level: int | LogLevel,
|
|
82
|
-
msg: str | Mapping[str, Any] | Iterable[Tuple[str, Any]],
|
|
83
|
-
*,
|
|
84
|
-
prefix: str = "",
|
|
85
|
-
stream = None,
|
|
86
|
-
ts: bool = False,
|
|
87
|
-
json_mode: bool = False,
|
|
88
|
-
) -> None:
|
|
89
|
-
"""
|
|
90
|
-
Simple leveled logger.
|
|
91
|
-
|
|
92
|
-
Args:
|
|
93
|
-
level: Level of this message.
|
|
94
|
-
current_level: Minimum level that should be emitted.
|
|
95
|
-
msg: Message string OR a mapping/iterable of (key, value) to print.
|
|
96
|
-
prefix: Optional prefix (e.g., "[extractor] ").
|
|
97
|
-
stream: File-like stream; defaults to sys.stderr.
|
|
98
|
-
ts: If True, prepend ISO timestamp.
|
|
99
|
-
json_mode: If True, print as a single JSON object line.
|
|
100
|
-
|
|
101
|
-
Examples:
|
|
102
|
-
log_debug(LogLevel.DEBUG, current, "Parsed field X")
|
|
103
|
-
log_debug(LogLevel.INFO, current, {"field": "age", "value": 42})
|
|
104
|
-
"""
|
|
105
|
-
if int(current_level) > int(level):
|
|
106
|
-
return
|
|
107
|
-
|
|
108
|
-
stream = stream or sys.stderr
|
|
109
|
-
parts: List[str] = []
|
|
110
|
-
|
|
111
|
-
if ts:
|
|
112
|
-
parts.append(datetime.now(timezone.utc).isoformat())
|
|
113
|
-
|
|
114
|
-
if prefix:
|
|
115
|
-
parts.append(prefix.rstrip())
|
|
116
|
-
|
|
117
|
-
if json_mode:
|
|
118
|
-
if isinstance(msg, str):
|
|
119
|
-
payload = {"message": msg}
|
|
120
|
-
elif isinstance(msg, Mapping):
|
|
121
|
-
payload = dict(msg)
|
|
122
|
-
else:
|
|
123
|
-
payload = dict(msg) # try to coerce iterable of pairs
|
|
124
|
-
out = " ".join(parts + [json.dumps(payload, default=str, ensure_ascii=False)])
|
|
125
|
-
stream.write(out + "\n")
|
|
126
|
-
return
|
|
127
|
-
|
|
128
|
-
if isinstance(msg, str):
|
|
129
|
-
parts.append(msg)
|
|
130
|
-
elif isinstance(msg, Mapping):
|
|
131
|
-
kv = " ".join(f"{k}={json.dumps(v, default=str, ensure_ascii=False)}" for k, v in msg.items())
|
|
132
|
-
parts.append(kv)
|
|
133
|
-
else:
|
|
134
|
-
kv = " ".join(f"{k}={json.dumps(v, default=str, ensure_ascii=False)}" for k, v in msg)
|
|
135
|
-
parts.append(kv)
|
|
136
|
-
|
|
137
|
-
stream.write(" ".join(parts) + "\n")
|
|
138
|
-
|
|
139
|
-
|
|
140
59
|
# ---------------------------------------------------------------------------
|
|
141
60
|
# Helpers
|
|
142
61
|
# ---------------------------------------------------------------------------
|
|
@@ -146,35 +65,35 @@ _CURRENCY_PREFIX = tuple("$€£¥₿₽₹₩₫₪₴₦₲₵₡₱₺₸")
|
|
|
146
65
|
def parse_boolean(value: Any) -> bool:
|
|
147
66
|
"""
|
|
148
67
|
Enhanced boolean parser with multilingual variants and edge case handling.
|
|
149
|
-
|
|
68
|
+
|
|
150
69
|
Supports:
|
|
151
70
|
- Standard: true/false, yes/no, on/off, 1/0
|
|
152
71
|
- Multilingual: si/no (Spanish), oui/non (French), ja/nein (German)
|
|
153
72
|
- Edge cases: empty strings, whitespace, case variations
|
|
154
|
-
|
|
73
|
+
|
|
155
74
|
Uses Tukuy transformer for robust multilingual support.
|
|
156
75
|
"""
|
|
157
76
|
if isinstance(value, bool):
|
|
158
77
|
return value
|
|
159
78
|
if value is None:
|
|
160
79
|
raise ValueError("Cannot parse None as boolean")
|
|
161
|
-
|
|
80
|
+
|
|
162
81
|
# Handle numeric values
|
|
163
82
|
if isinstance(value, (int, float, Decimal)):
|
|
164
83
|
return bool(value)
|
|
165
|
-
|
|
84
|
+
|
|
166
85
|
s = str(value).strip().lower()
|
|
167
|
-
|
|
86
|
+
|
|
168
87
|
# Handle empty strings and common "falsy" representations
|
|
169
88
|
if not s or s in ("", "null", "none", "n/a", "na", "nil", "undefined"):
|
|
170
89
|
return False
|
|
171
|
-
|
|
90
|
+
|
|
172
91
|
# Quick check for obvious true/false values before using Tukuy
|
|
173
92
|
if s in ("1", "true", "yes", "on", "si", "sí", "oui", "ja", "t", "y"):
|
|
174
93
|
return True
|
|
175
94
|
if s in ("0", "false", "no", "off", "non", "nein", "f", "n"):
|
|
176
95
|
return False
|
|
177
|
-
|
|
96
|
+
|
|
178
97
|
# Use Tukuy for more complex cases
|
|
179
98
|
try:
|
|
180
99
|
return TUKUY.transform(s, ["bool"])
|
|
@@ -186,21 +105,22 @@ def parse_boolean(value: Any) -> bool:
|
|
|
186
105
|
return bool(num_val)
|
|
187
106
|
except (ValueError, TypeError):
|
|
188
107
|
pass
|
|
189
|
-
|
|
108
|
+
|
|
190
109
|
# If it contains "true", "yes", "on", etc., lean towards True
|
|
191
110
|
true_indicators = ["true", "yes", "on", "enable", "active", "si", "oui", "ja"]
|
|
192
111
|
false_indicators = ["false", "no", "off", "disable", "inactive", "non", "nein"]
|
|
193
|
-
|
|
112
|
+
|
|
194
113
|
s_lower = s.lower()
|
|
195
114
|
if any(indicator in s_lower for indicator in true_indicators):
|
|
196
115
|
return True
|
|
197
116
|
if any(indicator in s_lower for indicator in false_indicators):
|
|
198
117
|
return False
|
|
199
|
-
|
|
118
|
+
|
|
200
119
|
# Final fallback - raise error for truly ambiguous cases
|
|
201
|
-
raise ValueError(f"Cannot parse '{value}' as boolean")
|
|
120
|
+
raise ValueError(f"Cannot parse '{value}' as boolean") from None
|
|
121
|
+
|
|
202
122
|
|
|
203
|
-
def as_list(value: Any, *, sep: str | None = None) ->
|
|
123
|
+
def as_list(value: Any, *, sep: str | None = None) -> list[Any]:
|
|
204
124
|
"""
|
|
205
125
|
Convert a value into a list.
|
|
206
126
|
- Lists/tuples: returned as list.
|
|
@@ -221,6 +141,7 @@ def as_list(value: Any, *, sep: str | None = None) -> List[Any]:
|
|
|
221
141
|
return [p.strip() for p in parts if p.strip() != ""]
|
|
222
142
|
return [value]
|
|
223
143
|
|
|
144
|
+
|
|
224
145
|
def parse_datetime(
|
|
225
146
|
value: Any,
|
|
226
147
|
*,
|
|
@@ -245,6 +166,7 @@ def parse_datetime(
|
|
|
245
166
|
def _strip_currency_prefix(s: str) -> str:
|
|
246
167
|
return s[1:].lstrip() if s and s[0] in _CURRENCY_PREFIX else s
|
|
247
168
|
|
|
169
|
+
|
|
248
170
|
def parse_shorthand_number(
|
|
249
171
|
value: Any,
|
|
250
172
|
*,
|
|
@@ -296,14 +218,14 @@ def parse_shorthand_number(
|
|
|
296
218
|
# Use appropriate Tukuy transformer based on as_decimal
|
|
297
219
|
transformer = ["shorthand_decimal"] if as_decimal else ["shorthand_number"]
|
|
298
220
|
num = TUKUY.transform(s, transformer)
|
|
299
|
-
|
|
221
|
+
|
|
300
222
|
# Handle percent if needed
|
|
301
223
|
if is_percent:
|
|
302
224
|
if isinstance(num, (int, float)):
|
|
303
225
|
num = num * percent_base / 100
|
|
304
226
|
else: # Decimal
|
|
305
|
-
num = num * Decimal(str(percent_base)) / Decimal(
|
|
306
|
-
|
|
227
|
+
num = num * Decimal(str(percent_base)) / Decimal("100")
|
|
228
|
+
|
|
307
229
|
return num
|
|
308
230
|
|
|
309
231
|
|
|
@@ -311,7 +233,8 @@ def parse_shorthand_number(
|
|
|
311
233
|
# Schema creation
|
|
312
234
|
# ---------------------------------------------------------------------------
|
|
313
235
|
|
|
314
|
-
|
|
236
|
+
|
|
237
|
+
def _base_schema_for_type(field_name: str, field_type: type[Any]) -> dict[str, Any]:
|
|
315
238
|
origin = get_origin(field_type)
|
|
316
239
|
args = get_args(field_type)
|
|
317
240
|
|
|
@@ -331,14 +254,14 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
331
254
|
}
|
|
332
255
|
|
|
333
256
|
# Containers
|
|
334
|
-
if origin in (list,
|
|
257
|
+
if origin in (list, list):
|
|
335
258
|
item_t = args[0] if args else Any
|
|
336
259
|
return {
|
|
337
260
|
"type": "array",
|
|
338
261
|
"items": _strip_desc(_base_schema_for_type(f"{field_name}_item", item_t)),
|
|
339
262
|
}
|
|
340
263
|
|
|
341
|
-
if origin in (tuple,
|
|
264
|
+
if origin in (tuple, tuple):
|
|
342
265
|
# Treat as array with items; if variable length, use first type as items
|
|
343
266
|
if args and args[-1] is Ellipsis:
|
|
344
267
|
item_t = args[0]
|
|
@@ -349,15 +272,12 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
349
272
|
elif args:
|
|
350
273
|
return {
|
|
351
274
|
"type": "array",
|
|
352
|
-
"prefixItems": [
|
|
353
|
-
_strip_desc(_base_schema_for_type(f"{field_name}_{i}", t))
|
|
354
|
-
for i, t in enumerate(args)
|
|
355
|
-
],
|
|
275
|
+
"prefixItems": [_strip_desc(_base_schema_for_type(f"{field_name}_{i}", t)) for i, t in enumerate(args)],
|
|
356
276
|
"items": False,
|
|
357
277
|
}
|
|
358
278
|
return {"type": "array"}
|
|
359
279
|
|
|
360
|
-
if origin in (dict,
|
|
280
|
+
if origin in (dict, dict):
|
|
361
281
|
key_t = args[0] if args else str
|
|
362
282
|
val_t = args[1] if len(args) > 1 else Any
|
|
363
283
|
# JSON Schema keys must be strings; if not, we'll still describe "object"
|
|
@@ -393,7 +313,7 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
393
313
|
|
|
394
314
|
# Custom types with __schema__
|
|
395
315
|
if hasattr(field_type, "__schema__"):
|
|
396
|
-
sch =
|
|
316
|
+
sch = field_type.__schema__
|
|
397
317
|
if isinstance(sch, Mapping):
|
|
398
318
|
return dict(sch)
|
|
399
319
|
|
|
@@ -401,7 +321,7 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
401
321
|
return {"type": "string"}
|
|
402
322
|
|
|
403
323
|
|
|
404
|
-
def _strip_desc(schema:
|
|
324
|
+
def _strip_desc(schema: dict[str, Any]) -> dict[str, Any]:
|
|
405
325
|
"""Remove 'description' if present (useful when embedding item schemas)."""
|
|
406
326
|
schema = dict(schema)
|
|
407
327
|
schema.pop("description", None)
|
|
@@ -410,9 +330,9 @@ def _strip_desc(schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
410
330
|
|
|
411
331
|
def create_field_schema(
|
|
412
332
|
field_name: str,
|
|
413
|
-
field_type:
|
|
414
|
-
description:
|
|
415
|
-
) ->
|
|
333
|
+
field_type: type[Any],
|
|
334
|
+
description: str | None = None,
|
|
335
|
+
) -> dict[str, Any]:
|
|
416
336
|
"""
|
|
417
337
|
Creates a JSON(-like) schema for a field based on its type.
|
|
418
338
|
|
|
@@ -440,6 +360,7 @@ def create_field_schema(
|
|
|
440
360
|
# Conversion
|
|
441
361
|
# ---------------------------------------------------------------------------
|
|
442
362
|
|
|
363
|
+
|
|
443
364
|
def _to_decimal(value: Any) -> Decimal:
|
|
444
365
|
if isinstance(value, Decimal):
|
|
445
366
|
return value
|
|
@@ -451,15 +372,15 @@ def _to_decimal(value: Any) -> Decimal:
|
|
|
451
372
|
|
|
452
373
|
def convert_value(
|
|
453
374
|
value: Any,
|
|
454
|
-
target_type:
|
|
375
|
+
target_type: type[Any],
|
|
455
376
|
allow_shorthand: bool = True,
|
|
456
|
-
field_name:
|
|
457
|
-
field_definitions:
|
|
377
|
+
field_name: str | None = None,
|
|
378
|
+
field_definitions: dict[str, Any] | None = None,
|
|
458
379
|
use_defaults_on_failure: bool = True,
|
|
459
380
|
) -> Any:
|
|
460
381
|
"""
|
|
461
382
|
Enhanced value converter with robust error handling and default value support.
|
|
462
|
-
|
|
383
|
+
|
|
463
384
|
Convert 'value' to 'target_type' with support for:
|
|
464
385
|
- Optional/Union with intelligent type ordering
|
|
465
386
|
- Numeric shorthand (1.2k, $3,400, 12%) with better error handling
|
|
@@ -475,61 +396,59 @@ def convert_value(
|
|
|
475
396
|
field_name: Name of field being converted (for field-specific defaults)
|
|
476
397
|
field_definitions: Field definitions dict for custom defaults/instructions
|
|
477
398
|
use_defaults_on_failure: Whether to use defaults when conversion fails
|
|
478
|
-
|
|
399
|
+
|
|
479
400
|
Returns:
|
|
480
401
|
Converted value or appropriate default
|
|
481
|
-
|
|
402
|
+
|
|
482
403
|
Notes:
|
|
483
404
|
- For List[T], a scalar becomes [T(scalar)]
|
|
484
405
|
- For Decimal and floats, shorthand and currency are supported
|
|
485
406
|
- Conversion failures log warnings but continue with defaults if enabled
|
|
486
407
|
- Union types try conversions in order and use first successful result
|
|
487
408
|
"""
|
|
488
|
-
|
|
409
|
+
|
|
489
410
|
def _get_fallback_value(error_msg: str = "") -> Any:
|
|
490
411
|
"""Get appropriate fallback value when conversion fails."""
|
|
491
412
|
if not use_defaults_on_failure:
|
|
492
413
|
raise ValueError(error_msg)
|
|
493
|
-
|
|
414
|
+
|
|
494
415
|
try:
|
|
495
416
|
# Try field-specific default first
|
|
496
417
|
if field_name and field_definitions:
|
|
497
418
|
field_def = field_definitions.get(field_name, {})
|
|
498
|
-
if isinstance(field_def, dict) and
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
419
|
+
if isinstance(field_def, dict) and "default" in field_def:
|
|
420
|
+
logger.debug("Using field default for '%s': %s", field_name, field_def["default"])
|
|
421
|
+
return field_def["default"]
|
|
422
|
+
|
|
503
423
|
# Fall back to type default
|
|
504
424
|
type_default = get_type_default(target_type)
|
|
505
|
-
|
|
506
|
-
f"Using type default for {target_type}: {type_default}")
|
|
425
|
+
logger.debug("Using type default for %s: %s", target_type, type_default)
|
|
507
426
|
return type_default
|
|
508
|
-
|
|
427
|
+
|
|
509
428
|
except Exception as fallback_error:
|
|
510
|
-
|
|
511
|
-
f"Failed to get fallback for {target_type}: {fallback_error}")
|
|
429
|
+
logger.warning("Failed to get fallback for %s: %s", target_type, fallback_error)
|
|
512
430
|
return None
|
|
513
431
|
|
|
514
|
-
def _safe_convert_recursive(val: Any, typ:
|
|
432
|
+
def _safe_convert_recursive(val: Any, typ: type[Any]) -> Any:
|
|
515
433
|
"""Recursively convert with same parameters but no fallback to avoid infinite recursion."""
|
|
516
434
|
return convert_value(
|
|
517
|
-
val,
|
|
435
|
+
val,
|
|
436
|
+
typ,
|
|
518
437
|
allow_shorthand=allow_shorthand,
|
|
519
438
|
field_name=field_name,
|
|
520
439
|
field_definitions=field_definitions,
|
|
521
|
-
use_defaults_on_failure=False # Avoid recursion in fallbacks
|
|
440
|
+
use_defaults_on_failure=False, # Avoid recursion in fallbacks
|
|
522
441
|
)
|
|
523
442
|
|
|
524
443
|
# Handle None values early
|
|
525
444
|
if value is None:
|
|
526
445
|
origin = get_origin(target_type)
|
|
527
446
|
args = get_args(target_type)
|
|
528
|
-
|
|
447
|
+
|
|
529
448
|
# Check if target type is Optional (Union with None)
|
|
530
449
|
if origin is Union and type(None) in args:
|
|
531
450
|
return None
|
|
532
|
-
|
|
451
|
+
|
|
533
452
|
# For non-optional types, use fallback
|
|
534
453
|
return _get_fallback_value("Cannot convert None to non-optional type")
|
|
535
454
|
|
|
@@ -540,43 +459,40 @@ def convert_value(
|
|
|
540
459
|
if origin is Union:
|
|
541
460
|
non_none = [a for a in args if a is not type(None)]
|
|
542
461
|
is_optional = type(None) in args
|
|
543
|
-
|
|
462
|
+
|
|
544
463
|
if value is None and is_optional:
|
|
545
464
|
return None
|
|
546
|
-
|
|
465
|
+
|
|
547
466
|
# Try each non-none type until one works
|
|
548
467
|
conversion_errors = []
|
|
549
|
-
|
|
550
|
-
for
|
|
468
|
+
|
|
469
|
+
for _i, t in enumerate(non_none):
|
|
551
470
|
try:
|
|
552
471
|
result = _safe_convert_recursive(value, t)
|
|
553
|
-
|
|
554
|
-
f"Union conversion succeeded with type {t} for value '{value}'")
|
|
472
|
+
logger.debug("Union conversion succeeded with type %s for value '%s'", t, value)
|
|
555
473
|
return result
|
|
556
474
|
except Exception as e:
|
|
557
475
|
conversion_errors.append((t, str(e)))
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
476
|
+
logger.debug("Union conversion failed for type %s: %s", t, e)
|
|
477
|
+
|
|
561
478
|
# All conversions failed
|
|
562
479
|
error_msg = f"Cannot convert '{value}' to any Union type {non_none}. Errors: {conversion_errors}"
|
|
563
|
-
|
|
480
|
+
logger.warning("%s", error_msg)
|
|
564
481
|
return _get_fallback_value(error_msg)
|
|
565
482
|
|
|
566
483
|
# Lists / Tuples - Enhanced error handling for individual items
|
|
567
|
-
if origin in (list,
|
|
484
|
+
if origin in (list, list):
|
|
568
485
|
item_t = args[0] if args else Any
|
|
569
486
|
try:
|
|
570
487
|
items = as_list(value)
|
|
571
488
|
result_items = []
|
|
572
|
-
|
|
489
|
+
|
|
573
490
|
for i, item in enumerate(items):
|
|
574
491
|
try:
|
|
575
492
|
converted_item = _safe_convert_recursive(item, item_t)
|
|
576
493
|
result_items.append(converted_item)
|
|
577
494
|
except Exception as e:
|
|
578
|
-
|
|
579
|
-
f"Failed to convert list item {i} '{item}' to {item_t}: {e}")
|
|
495
|
+
logger.warning("Failed to convert list item %d '%s' to %s: %s", i, item, item_t, e)
|
|
580
496
|
# Try to get default for item type
|
|
581
497
|
try:
|
|
582
498
|
default_item = get_type_default(item_t)
|
|
@@ -584,15 +500,15 @@ def convert_value(
|
|
|
584
500
|
except Exception:
|
|
585
501
|
# Skip item if we can't get a default
|
|
586
502
|
continue
|
|
587
|
-
|
|
503
|
+
|
|
588
504
|
return result_items
|
|
589
|
-
|
|
505
|
+
|
|
590
506
|
except Exception as e:
|
|
591
507
|
error_msg = f"Cannot convert '{value}' to list: {e}"
|
|
592
|
-
|
|
508
|
+
logger.warning("%s", error_msg)
|
|
593
509
|
return _get_fallback_value(error_msg)
|
|
594
510
|
|
|
595
|
-
if origin in (tuple,
|
|
511
|
+
if origin in (tuple, tuple):
|
|
596
512
|
try:
|
|
597
513
|
if not isinstance(value, (list, tuple)):
|
|
598
514
|
value = [value]
|
|
@@ -603,8 +519,7 @@ def convert_value(
|
|
|
603
519
|
try:
|
|
604
520
|
converted_items.append(_safe_convert_recursive(item, item_t))
|
|
605
521
|
except Exception as e:
|
|
606
|
-
|
|
607
|
-
f"Failed to convert tuple item '{item}': {e}")
|
|
522
|
+
logger.warning("Failed to convert tuple item '%s': %s", item, e)
|
|
608
523
|
converted_items.append(get_type_default(item_t))
|
|
609
524
|
return tuple(converted_items)
|
|
610
525
|
elif args:
|
|
@@ -615,25 +530,24 @@ def convert_value(
|
|
|
615
530
|
try:
|
|
616
531
|
converted_items.append(_safe_convert_recursive(v, t))
|
|
617
532
|
except Exception as e:
|
|
618
|
-
|
|
619
|
-
f"Failed to convert tuple item '{v}' to {t}: {e}")
|
|
533
|
+
logger.warning("Failed to convert tuple item '%s' to %s: %s", v, t, e)
|
|
620
534
|
converted_items.append(get_type_default(t))
|
|
621
535
|
return tuple(converted_items)
|
|
622
536
|
return tuple(value)
|
|
623
537
|
except Exception as e:
|
|
624
538
|
error_msg = f"Cannot convert '{value}' to tuple: {e}"
|
|
625
|
-
|
|
539
|
+
logger.warning("%s", error_msg)
|
|
626
540
|
return _get_fallback_value(error_msg)
|
|
627
541
|
|
|
628
542
|
# Dict - Enhanced error handling
|
|
629
|
-
if origin in (dict,
|
|
543
|
+
if origin in (dict, dict):
|
|
630
544
|
key_t = args[0] if args else str
|
|
631
545
|
val_t = args[1] if len(args) > 1 else Any
|
|
632
|
-
|
|
546
|
+
|
|
633
547
|
try:
|
|
634
548
|
if not isinstance(value, Mapping):
|
|
635
549
|
raise ValueError(f"Cannot convert non-mapping '{value}' to dict")
|
|
636
|
-
|
|
550
|
+
|
|
637
551
|
result_dict = {}
|
|
638
552
|
for k, v in value.items():
|
|
639
553
|
try:
|
|
@@ -641,20 +555,19 @@ def convert_value(
|
|
|
641
555
|
converted_val = _safe_convert_recursive(v, val_t)
|
|
642
556
|
result_dict[converted_key] = converted_val
|
|
643
557
|
except Exception as e:
|
|
644
|
-
|
|
645
|
-
f"Failed to convert dict item {k}:{v}: {e}")
|
|
558
|
+
logger.warning("Failed to convert dict item %s:%s: %s", k, v, e)
|
|
646
559
|
# Skip problematic items
|
|
647
560
|
continue
|
|
648
|
-
|
|
561
|
+
|
|
649
562
|
return result_dict
|
|
650
|
-
|
|
563
|
+
|
|
651
564
|
except Exception as e:
|
|
652
565
|
error_msg = f"Cannot convert '{value}' to dict: {e}"
|
|
653
|
-
|
|
566
|
+
logger.warning("%s", error_msg)
|
|
654
567
|
return _get_fallback_value(error_msg)
|
|
655
568
|
|
|
656
569
|
# Scalars with enhanced error handling
|
|
657
|
-
|
|
570
|
+
|
|
658
571
|
# Numbers - More robust handling
|
|
659
572
|
if target_type is int:
|
|
660
573
|
try:
|
|
@@ -662,21 +575,21 @@ def convert_value(
|
|
|
662
575
|
if isinstance(value, str):
|
|
663
576
|
s = value.strip()
|
|
664
577
|
if not s or s.lower() in ("", "null", "none", "n/a", "na"):
|
|
665
|
-
return _get_fallback_value(
|
|
666
|
-
|
|
578
|
+
return _get_fallback_value("Empty/null string cannot be converted to int")
|
|
579
|
+
|
|
667
580
|
if allow_shorthand:
|
|
668
581
|
parsed = parse_shorthand_number(value, as_decimal=False)
|
|
669
582
|
return int(parsed)
|
|
670
583
|
else:
|
|
671
584
|
return int(value)
|
|
672
|
-
|
|
585
|
+
|
|
673
586
|
except Exception as e:
|
|
674
587
|
# Try alternative parsing
|
|
675
588
|
try:
|
|
676
589
|
return int(parse_shorthand_number(value, as_decimal=False, allow_percent=False))
|
|
677
590
|
except Exception:
|
|
678
591
|
error_msg = f"Cannot convert '{value}' to int: {e}"
|
|
679
|
-
|
|
592
|
+
logger.warning("%s", error_msg)
|
|
680
593
|
return _get_fallback_value(error_msg)
|
|
681
594
|
|
|
682
595
|
if target_type is float:
|
|
@@ -685,17 +598,17 @@ def convert_value(
|
|
|
685
598
|
if isinstance(value, str):
|
|
686
599
|
s = value.strip()
|
|
687
600
|
if not s or s.lower() in ("", "null", "none", "n/a", "na"):
|
|
688
|
-
return _get_fallback_value(
|
|
689
|
-
|
|
601
|
+
return _get_fallback_value("Empty/null string cannot be converted to float")
|
|
602
|
+
|
|
690
603
|
if allow_shorthand:
|
|
691
604
|
parsed = parse_shorthand_number(value, as_decimal=False)
|
|
692
605
|
return float(parsed)
|
|
693
606
|
else:
|
|
694
607
|
return float(value)
|
|
695
|
-
|
|
608
|
+
|
|
696
609
|
except Exception as e:
|
|
697
610
|
error_msg = f"Cannot convert '{value}' to float: {e}"
|
|
698
|
-
|
|
611
|
+
logger.warning("%s", error_msg)
|
|
699
612
|
return _get_fallback_value(error_msg)
|
|
700
613
|
|
|
701
614
|
if target_type is Decimal:
|
|
@@ -704,17 +617,17 @@ def convert_value(
|
|
|
704
617
|
if isinstance(value, str):
|
|
705
618
|
s = value.strip()
|
|
706
619
|
if not s or s.lower() in ("", "null", "none", "n/a", "na"):
|
|
707
|
-
return _get_fallback_value(
|
|
708
|
-
|
|
620
|
+
return _get_fallback_value("Empty/null string cannot be converted to Decimal")
|
|
621
|
+
|
|
709
622
|
if allow_shorthand:
|
|
710
623
|
parsed = parse_shorthand_number(value, as_decimal=True)
|
|
711
624
|
return _to_decimal(parsed)
|
|
712
625
|
else:
|
|
713
626
|
return _to_decimal(value)
|
|
714
|
-
|
|
627
|
+
|
|
715
628
|
except Exception as e:
|
|
716
629
|
error_msg = f"Cannot convert '{value}' to Decimal: {e}"
|
|
717
|
-
|
|
630
|
+
logger.warning("%s", error_msg)
|
|
718
631
|
return _get_fallback_value(error_msg)
|
|
719
632
|
|
|
720
633
|
# Bool - Enhanced error handling
|
|
@@ -723,7 +636,7 @@ def convert_value(
|
|
|
723
636
|
return parse_boolean(value)
|
|
724
637
|
except Exception as e:
|
|
725
638
|
error_msg = f"Cannot convert '{value}' to bool: {e}"
|
|
726
|
-
|
|
639
|
+
logger.warning("%s", error_msg)
|
|
727
640
|
return _get_fallback_value(error_msg)
|
|
728
641
|
|
|
729
642
|
# Strings - More robust handling
|
|
@@ -734,7 +647,7 @@ def convert_value(
|
|
|
734
647
|
return str(value)
|
|
735
648
|
except Exception as e:
|
|
736
649
|
error_msg = f"Cannot convert '{value}' to str: {e}"
|
|
737
|
-
|
|
650
|
+
logger.warning("%s", error_msg)
|
|
738
651
|
return _get_fallback_value(error_msg)
|
|
739
652
|
|
|
740
653
|
# Datetime / Date / Time - Enhanced error handling
|
|
@@ -743,25 +656,25 @@ def convert_value(
|
|
|
743
656
|
return parse_datetime(value)
|
|
744
657
|
except Exception as e:
|
|
745
658
|
error_msg = f"Cannot convert '{value}' to datetime: {e}"
|
|
746
|
-
|
|
659
|
+
logger.warning("%s", error_msg)
|
|
747
660
|
return _get_fallback_value(error_msg)
|
|
748
|
-
|
|
661
|
+
|
|
749
662
|
if target_type is date:
|
|
750
663
|
try:
|
|
751
664
|
dt = parse_datetime(value)
|
|
752
665
|
return dt.date()
|
|
753
666
|
except Exception as e:
|
|
754
667
|
error_msg = f"Cannot convert '{value}' to date: {e}"
|
|
755
|
-
|
|
668
|
+
logger.warning("%s", error_msg)
|
|
756
669
|
return _get_fallback_value(error_msg)
|
|
757
|
-
|
|
670
|
+
|
|
758
671
|
if target_type is time:
|
|
759
672
|
try:
|
|
760
673
|
dt = parse_datetime(value)
|
|
761
674
|
return dt.time()
|
|
762
675
|
except Exception as e:
|
|
763
676
|
error_msg = f"Cannot convert '{value}' to time: {e}"
|
|
764
|
-
|
|
677
|
+
logger.warning("%s", error_msg)
|
|
765
678
|
return _get_fallback_value(error_msg)
|
|
766
679
|
|
|
767
680
|
# UUID - Enhanced error handling
|
|
@@ -772,7 +685,7 @@ def convert_value(
|
|
|
772
685
|
return uuid.UUID(str(value))
|
|
773
686
|
except Exception as e:
|
|
774
687
|
error_msg = f"Cannot convert '{value}' to UUID: {e}"
|
|
775
|
-
|
|
688
|
+
logger.warning("%s", error_msg)
|
|
776
689
|
return _get_fallback_value(error_msg)
|
|
777
690
|
|
|
778
691
|
# Pydantic models - Enhanced error handling
|
|
@@ -786,7 +699,7 @@ def convert_value(
|
|
|
786
699
|
raise ValueError(f"Cannot convert non-mapping '{value}' to {target_type.__name__}")
|
|
787
700
|
except Exception as e:
|
|
788
701
|
error_msg = f"Cannot convert '{value}' to {target_type.__name__}: {e}"
|
|
789
|
-
|
|
702
|
+
logger.warning("%s", error_msg)
|
|
790
703
|
return _get_fallback_value(error_msg)
|
|
791
704
|
|
|
792
705
|
# Fallback: direct cast if possible
|
|
@@ -794,7 +707,7 @@ def convert_value(
|
|
|
794
707
|
return target_type(value) # type: ignore[call-arg]
|
|
795
708
|
except Exception as e:
|
|
796
709
|
error_msg = f"Cannot convert '{value}' to {getattr(target_type, '__name__', target_type)}: {e}"
|
|
797
|
-
|
|
710
|
+
logger.warning("%s", error_msg)
|
|
798
711
|
return _get_fallback_value(error_msg)
|
|
799
712
|
|
|
800
713
|
|
|
@@ -802,17 +715,17 @@ def convert_value(
|
|
|
802
715
|
# Extraction
|
|
803
716
|
# ---------------------------------------------------------------------------
|
|
804
717
|
|
|
718
|
+
|
|
805
719
|
def extract_fields(
|
|
806
|
-
model_cls:
|
|
807
|
-
data:
|
|
808
|
-
fields:
|
|
720
|
+
model_cls: type[BaseModel],
|
|
721
|
+
data: dict[str, Any],
|
|
722
|
+
fields: list[str] | None = None,
|
|
809
723
|
*,
|
|
810
724
|
strict: bool = True,
|
|
811
725
|
missing: str = "skip", # "skip" | "none" | "error"
|
|
812
|
-
|
|
813
|
-
field_definitions: Optional[Dict[str, Any]] = None,
|
|
726
|
+
field_definitions: dict[str, Any] | None = None,
|
|
814
727
|
use_defaults_on_failure: bool = True,
|
|
815
|
-
) ->
|
|
728
|
+
) -> dict[str, Any]:
|
|
816
729
|
"""
|
|
817
730
|
Extract and convert only specified fields based on a Pydantic model.
|
|
818
731
|
|
|
@@ -825,7 +738,6 @@ def extract_fields(
|
|
|
825
738
|
- "skip": drop it
|
|
826
739
|
- "none": include with None
|
|
827
740
|
- "error": raise KeyError
|
|
828
|
-
level: LogLevel for internal debug logs (uses log_debug).
|
|
829
741
|
field_definitions: Optional field definitions for default values and conversion hints.
|
|
830
742
|
use_defaults_on_failure: Whether to use default values when conversion fails.
|
|
831
743
|
|
|
@@ -844,7 +756,7 @@ def extract_fields(
|
|
|
844
756
|
if strict and invalid:
|
|
845
757
|
raise KeyError(f"Fields not found in model: {', '.join(sorted(invalid))}")
|
|
846
758
|
|
|
847
|
-
result:
|
|
759
|
+
result: dict[str, Any] = {}
|
|
848
760
|
|
|
849
761
|
for fname in fields:
|
|
850
762
|
if fname not in valid_fields:
|
|
@@ -856,11 +768,11 @@ def extract_fields(
|
|
|
856
768
|
|
|
857
769
|
if source_key not in data:
|
|
858
770
|
if missing == "skip":
|
|
859
|
-
|
|
771
|
+
logger.debug("Skipping missing field: %s", fname)
|
|
860
772
|
continue
|
|
861
773
|
if missing == "none":
|
|
862
774
|
result[fname] = None
|
|
863
|
-
|
|
775
|
+
logger.debug("Missing field set to None: %s", fname)
|
|
864
776
|
continue
|
|
865
777
|
raise KeyError(f"Missing required field in data: {source_key}")
|
|
866
778
|
|
|
@@ -872,31 +784,23 @@ def extract_fields(
|
|
|
872
784
|
allow_shorthand=True,
|
|
873
785
|
field_name=fname,
|
|
874
786
|
field_definitions=field_definitions,
|
|
875
|
-
use_defaults_on_failure=use_defaults_on_failure
|
|
787
|
+
use_defaults_on_failure=use_defaults_on_failure,
|
|
876
788
|
)
|
|
877
789
|
result[fname] = converted
|
|
878
|
-
|
|
790
|
+
logger.debug("Converted field %s: %s -> %s", fname, raw, converted)
|
|
879
791
|
except Exception as e:
|
|
880
792
|
# If we're not using defaults, re-raise the original error
|
|
881
793
|
if not use_defaults_on_failure:
|
|
882
794
|
raise ValueError(f"Validation failed for field '{fname}': {e}") from e
|
|
883
|
-
|
|
795
|
+
|
|
884
796
|
# Try to get a fallback value using the field info
|
|
885
797
|
try:
|
|
886
798
|
fallback_value = get_field_default(fname, finfo, field_definitions)
|
|
887
799
|
result[fname] = fallback_value
|
|
888
|
-
|
|
889
|
-
"field": fname,
|
|
890
|
-
"error": str(e),
|
|
891
|
-
"fallback": fallback_value
|
|
892
|
-
})
|
|
800
|
+
logger.warning("Field %s conversion error: %s, using fallback: %s", fname, e, fallback_value)
|
|
893
801
|
except Exception as fallback_error:
|
|
894
802
|
# If even fallback fails, re-raise original error
|
|
895
|
-
|
|
896
|
-
"field": fname,
|
|
897
|
-
"conversion_error": str(e),
|
|
898
|
-
"fallback_error": str(fallback_error)
|
|
899
|
-
})
|
|
803
|
+
logger.error("Field %s conversion error: %s, fallback error: %s", fname, e, fallback_error)
|
|
900
804
|
raise ValueError(f"Validation failed for field '{fname}': {e}") from e
|
|
901
805
|
|
|
902
806
|
return result
|
|
@@ -906,7 +810,8 @@ def extract_fields(
|
|
|
906
810
|
# Field Definitions
|
|
907
811
|
# ---------------------------------------------------------------------------
|
|
908
812
|
|
|
909
|
-
|
|
813
|
+
|
|
814
|
+
def load_field_definitions(path: str) -> dict[str, Any]:
|
|
910
815
|
"""
|
|
911
816
|
Load field definitions from a JSON or YAML file.
|
|
912
817
|
|
|
@@ -920,27 +825,29 @@ def load_field_definitions(path: str) -> Dict[str, Any]:
|
|
|
920
825
|
ValueError: If file format is not supported or content is invalid
|
|
921
826
|
FileNotFoundError: If the file doesn't exist
|
|
922
827
|
"""
|
|
923
|
-
import yaml
|
|
924
828
|
from pathlib import Path
|
|
925
829
|
|
|
830
|
+
import yaml
|
|
831
|
+
|
|
926
832
|
path_obj = Path(path)
|
|
927
833
|
if not path_obj.exists():
|
|
928
834
|
raise FileNotFoundError(f"Field definitions file not found: {path}")
|
|
929
835
|
|
|
930
836
|
suffix = path_obj.suffix.lower()
|
|
931
|
-
content = path_obj.read_text(encoding=
|
|
837
|
+
content = path_obj.read_text(encoding="utf-8")
|
|
932
838
|
|
|
933
839
|
try:
|
|
934
|
-
if suffix ==
|
|
840
|
+
if suffix == ".json":
|
|
935
841
|
return json.loads(content)
|
|
936
|
-
elif suffix in (
|
|
842
|
+
elif suffix in (".yaml", ".yml"):
|
|
937
843
|
return yaml.safe_load(content)
|
|
938
844
|
else:
|
|
939
845
|
raise ValueError(f"Unsupported file format: {suffix}")
|
|
940
846
|
except Exception as e:
|
|
941
847
|
raise ValueError(f"Failed to parse field definitions: {e}") from e
|
|
942
848
|
|
|
943
|
-
|
|
849
|
+
|
|
850
|
+
def validate_field_definition(definition: dict[str, Any]) -> bool:
|
|
944
851
|
"""
|
|
945
852
|
Validate a field definition structure.
|
|
946
853
|
|
|
@@ -957,43 +864,42 @@ def validate_field_definition(definition: Dict[str, Any]) -> bool:
|
|
|
957
864
|
- default: Default value
|
|
958
865
|
- nullable: Whether field can be None
|
|
959
866
|
"""
|
|
960
|
-
required_keys = {
|
|
961
|
-
|
|
867
|
+
required_keys = {"type", "description", "instructions", "default", "nullable"}
|
|
868
|
+
|
|
962
869
|
# Check for required keys
|
|
963
870
|
if not all(key in definition for key in required_keys):
|
|
964
871
|
return False
|
|
965
|
-
|
|
872
|
+
|
|
966
873
|
# Validate type
|
|
967
|
-
if not isinstance(definition[
|
|
874
|
+
if not isinstance(definition["type"], (type, str)):
|
|
968
875
|
return False
|
|
969
|
-
|
|
876
|
+
|
|
970
877
|
# Validate description and instructions are strings
|
|
971
|
-
if not all(isinstance(definition[k], str) for k in [
|
|
878
|
+
if not all(isinstance(definition[k], str) for k in ["description", "instructions"]):
|
|
972
879
|
return False
|
|
973
|
-
|
|
880
|
+
|
|
974
881
|
# Validate nullable is boolean
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
return True
|
|
882
|
+
return isinstance(definition["nullable"], bool)
|
|
883
|
+
|
|
979
884
|
|
|
980
885
|
# ---------------------------------------------------------------------------
|
|
981
886
|
# Default Value Handling
|
|
982
887
|
# ---------------------------------------------------------------------------
|
|
983
888
|
|
|
984
|
-
|
|
889
|
+
|
|
890
|
+
def get_type_default(field_type: type[Any]) -> Any:
|
|
985
891
|
"""
|
|
986
892
|
Get a sensible default value for a given type.
|
|
987
|
-
|
|
893
|
+
|
|
988
894
|
Args:
|
|
989
895
|
field_type: The type to get a default for
|
|
990
|
-
|
|
896
|
+
|
|
991
897
|
Returns:
|
|
992
898
|
A default value appropriate for the type
|
|
993
899
|
"""
|
|
994
900
|
origin = get_origin(field_type)
|
|
995
901
|
args = get_args(field_type)
|
|
996
|
-
|
|
902
|
+
|
|
997
903
|
# Handle Optional/Union types
|
|
998
904
|
if origin is Union:
|
|
999
905
|
non_none = [a for a in args if a is not type(None)]
|
|
@@ -1002,15 +908,15 @@ def get_type_default(field_type: Type[Any]) -> Any:
|
|
|
1002
908
|
return get_type_default(non_none[0])
|
|
1003
909
|
# Multiple non-None types -> return None
|
|
1004
910
|
return None
|
|
1005
|
-
|
|
911
|
+
|
|
1006
912
|
# Container types
|
|
1007
|
-
if origin in (list,
|
|
913
|
+
if origin in (list, list) or field_type is list:
|
|
1008
914
|
return []
|
|
1009
|
-
if origin in (tuple,
|
|
915
|
+
if origin in (tuple, tuple) or field_type is tuple:
|
|
1010
916
|
return ()
|
|
1011
|
-
if origin in (dict,
|
|
917
|
+
if origin in (dict, dict) or field_type is dict:
|
|
1012
918
|
return {}
|
|
1013
|
-
|
|
919
|
+
|
|
1014
920
|
# Scalar types
|
|
1015
921
|
if field_type is int:
|
|
1016
922
|
return 0
|
|
@@ -1028,53 +934,62 @@ def get_type_default(field_type: Type[Any]) -> Any:
|
|
|
1028
934
|
return time(0, 0)
|
|
1029
935
|
if field_type is uuid.UUID:
|
|
1030
936
|
return uuid.uuid4()
|
|
1031
|
-
|
|
937
|
+
|
|
1032
938
|
# Pydantic models - try to create empty instance
|
|
1033
939
|
if isinstance(field_type, type) and issubclass(field_type, BaseModel):
|
|
1034
940
|
try:
|
|
1035
941
|
return field_type()
|
|
1036
942
|
except Exception:
|
|
1037
943
|
return None
|
|
1038
|
-
|
|
944
|
+
|
|
1039
945
|
# Fallback
|
|
1040
946
|
return None
|
|
1041
947
|
|
|
1042
948
|
|
|
1043
|
-
def get_field_default(field_name: str, field_info: Any, field_definitions:
|
|
949
|
+
def get_field_default(field_name: str, field_info: Any, field_definitions: dict[str, Any] | None = None) -> Any:
|
|
1044
950
|
"""
|
|
1045
951
|
Get the default value for a field using the priority order:
|
|
1046
952
|
1. field_definitions default
|
|
1047
953
|
2. Pydantic field default
|
|
1048
954
|
3. Type-appropriate default
|
|
1049
|
-
|
|
955
|
+
|
|
1050
956
|
Args:
|
|
1051
957
|
field_name: Name of the field
|
|
1052
958
|
field_info: Pydantic field info
|
|
1053
959
|
field_definitions: Optional field definitions dict
|
|
1054
|
-
|
|
960
|
+
|
|
1055
961
|
Returns:
|
|
1056
962
|
The appropriate default value
|
|
1057
963
|
"""
|
|
1058
964
|
# Priority 1: field_definitions
|
|
1059
965
|
if field_definitions and field_name in field_definitions:
|
|
1060
966
|
field_def = field_definitions[field_name]
|
|
1061
|
-
if isinstance(field_def, dict) and
|
|
1062
|
-
return field_def[
|
|
1063
|
-
|
|
967
|
+
if isinstance(field_def, dict) and "default" in field_def:
|
|
968
|
+
return field_def["default"]
|
|
969
|
+
|
|
1064
970
|
# Priority 2: Pydantic default - check for PydanticUndefined
|
|
1065
|
-
if hasattr(field_info,
|
|
971
|
+
if hasattr(field_info, "default"):
|
|
1066
972
|
default_val = field_info.default
|
|
1067
973
|
# Handle PydanticUndefined (newer Pydantic) and Ellipsis (older Pydantic)
|
|
1068
|
-
if default_val is not ... and str(default_val) !=
|
|
974
|
+
if default_val is not ... and str(default_val) != "PydanticUndefined":
|
|
1069
975
|
return default_val
|
|
1070
|
-
|
|
976
|
+
|
|
1071
977
|
# Priority 3: Type default
|
|
1072
978
|
return get_type_default(field_info.annotation)
|
|
1073
979
|
|
|
980
|
+
|
|
1074
981
|
# ---------------------------------------------------------------------------
|
|
1075
982
|
# JSON text cleaning
|
|
1076
983
|
# ---------------------------------------------------------------------------
|
|
1077
984
|
|
|
985
|
+
|
|
986
|
+
def strip_think_tags(text: str) -> str:
|
|
987
|
+
"""Remove <think>...</think> sections that some models emit."""
|
|
988
|
+
if not text:
|
|
989
|
+
return ""
|
|
990
|
+
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
|
|
991
|
+
|
|
992
|
+
|
|
1078
993
|
def clean_json_text(text: str) -> str:
|
|
1079
994
|
"""Attempts to extract a valid JSON object string from text.
|
|
1080
995
|
|
|
@@ -1089,8 +1004,7 @@ def clean_json_text(text: str) -> str:
|
|
|
1089
1004
|
Returns:
|
|
1090
1005
|
A string that best resembles valid JSON content.
|
|
1091
1006
|
"""
|
|
1092
|
-
text =
|
|
1093
|
-
text = text.strip()
|
|
1007
|
+
text = strip_think_tags(text).strip()
|
|
1094
1008
|
|
|
1095
1009
|
if text.startswith("```"):
|
|
1096
1010
|
start_fence = text.find("```")
|
|
@@ -1099,7 +1013,7 @@ def clean_json_text(text: str) -> str:
|
|
|
1099
1013
|
if start_content != -1:
|
|
1100
1014
|
end_fence = text.find("```", start_content)
|
|
1101
1015
|
if end_fence != -1:
|
|
1102
|
-
return text[start_content + 1:end_fence].strip()
|
|
1016
|
+
return text[start_content + 1 : end_fence].strip()
|
|
1103
1017
|
else:
|
|
1104
1018
|
return text[start_content + 1 :].strip()
|
|
1105
1019
|
|
|
@@ -1108,4 +1022,42 @@ def clean_json_text(text: str) -> str:
|
|
|
1108
1022
|
if start != -1 and end != -1 and end > start:
|
|
1109
1023
|
return text[start : end + 1]
|
|
1110
1024
|
|
|
1111
|
-
return text
|
|
1025
|
+
return text
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
def clean_toon_text(text: str) -> str:
|
|
1029
|
+
"""Extract TOON content from an LLM response, removing markdown and prefixes."""
|
|
1030
|
+
cleaned = strip_think_tags(text).strip()
|
|
1031
|
+
|
|
1032
|
+
if cleaned.startswith("```toon"):
|
|
1033
|
+
cleaned = cleaned[len("```toon") :]
|
|
1034
|
+
elif cleaned.startswith("```"):
|
|
1035
|
+
cleaned = cleaned[len("```") :]
|
|
1036
|
+
|
|
1037
|
+
if cleaned.endswith("```"):
|
|
1038
|
+
cleaned = cleaned[: -len("```")]
|
|
1039
|
+
|
|
1040
|
+
cleaned = cleaned.strip()
|
|
1041
|
+
|
|
1042
|
+
prefixes_to_remove = [
|
|
1043
|
+
"Here is the TOON data:",
|
|
1044
|
+
"Here's the TOON format:",
|
|
1045
|
+
"TOON output:",
|
|
1046
|
+
"Result:",
|
|
1047
|
+
]
|
|
1048
|
+
|
|
1049
|
+
for prefix in prefixes_to_remove:
|
|
1050
|
+
if cleaned.lower().startswith(prefix.lower()):
|
|
1051
|
+
cleaned = cleaned[len(prefix) :].strip()
|
|
1052
|
+
|
|
1053
|
+
def _normalize_simple_array(match: re.Match[str]) -> str:
|
|
1054
|
+
prefix = match.group(1)
|
|
1055
|
+
values = match.group(2)
|
|
1056
|
+
if ";" not in values:
|
|
1057
|
+
return match.group(0)
|
|
1058
|
+
normalized = ",".join(part.strip() for part in values.split(";") if part.strip())
|
|
1059
|
+
return f"{prefix}{normalized}"
|
|
1060
|
+
|
|
1061
|
+
cleaned = re.sub(r"^([^\n:]+?\[\d+\]:)([^\n]+)$", _normalize_simple_array, cleaned, flags=re.MULTILINE)
|
|
1062
|
+
|
|
1063
|
+
return cleaned
|