prompture 0.0.33.dev1__py3-none-any.whl → 0.0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/__init__.py +133 -49
- prompture/_version.py +34 -0
- prompture/aio/__init__.py +74 -0
- prompture/async_conversation.py +484 -0
- prompture/async_core.py +803 -0
- prompture/async_driver.py +131 -0
- prompture/cache.py +469 -0
- prompture/callbacks.py +50 -0
- prompture/cli.py +7 -3
- prompture/conversation.py +504 -0
- prompture/core.py +475 -352
- prompture/cost_mixin.py +51 -0
- prompture/discovery.py +50 -35
- prompture/driver.py +125 -5
- prompture/drivers/__init__.py +171 -73
- prompture/drivers/airllm_driver.py +13 -20
- prompture/drivers/async_airllm_driver.py +26 -0
- prompture/drivers/async_azure_driver.py +117 -0
- prompture/drivers/async_claude_driver.py +107 -0
- prompture/drivers/async_google_driver.py +132 -0
- prompture/drivers/async_grok_driver.py +91 -0
- prompture/drivers/async_groq_driver.py +84 -0
- prompture/drivers/async_hugging_driver.py +61 -0
- prompture/drivers/async_lmstudio_driver.py +79 -0
- prompture/drivers/async_local_http_driver.py +44 -0
- prompture/drivers/async_ollama_driver.py +125 -0
- prompture/drivers/async_openai_driver.py +96 -0
- prompture/drivers/async_openrouter_driver.py +96 -0
- prompture/drivers/async_registry.py +129 -0
- prompture/drivers/azure_driver.py +36 -9
- prompture/drivers/claude_driver.py +86 -34
- prompture/drivers/google_driver.py +87 -51
- prompture/drivers/grok_driver.py +29 -32
- prompture/drivers/groq_driver.py +27 -26
- prompture/drivers/hugging_driver.py +6 -6
- prompture/drivers/lmstudio_driver.py +26 -13
- prompture/drivers/local_http_driver.py +6 -6
- prompture/drivers/ollama_driver.py +90 -23
- prompture/drivers/openai_driver.py +36 -9
- prompture/drivers/openrouter_driver.py +31 -25
- prompture/drivers/registry.py +306 -0
- prompture/field_definitions.py +106 -96
- prompture/logging.py +80 -0
- prompture/model_rates.py +217 -0
- prompture/runner.py +49 -47
- prompture/session.py +117 -0
- prompture/settings.py +14 -1
- prompture/tools.py +172 -265
- prompture/validator.py +3 -3
- {prompture-0.0.33.dev1.dist-info → prompture-0.0.34.dist-info}/METADATA +18 -20
- prompture-0.0.34.dist-info/RECORD +55 -0
- prompture-0.0.33.dev1.dist-info/RECORD +0 -29
- {prompture-0.0.33.dev1.dist-info → prompture-0.0.34.dist-info}/WHEEL +0 -0
- {prompture-0.0.33.dev1.dist-info → prompture-0.0.34.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.33.dev1.dist-info → prompture-0.0.34.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.33.dev1.dist-info → prompture-0.0.34.dist-info}/top_level.txt +0 -0
prompture/tools.py
CHANGED
|
@@ -8,136 +8,54 @@ This module provides utilities for:
|
|
|
8
8
|
3. Exclusive field extraction against Pydantic models
|
|
9
9
|
4. Safe JSON text extraction from messy LLM output
|
|
10
10
|
5. Small parsing helpers (booleans, lists, datetimes)
|
|
11
|
-
6. Lightweight, flexible debug logging with levels
|
|
12
11
|
|
|
13
12
|
Notes:
|
|
14
13
|
- Only standard lib + pydantic + python-dateutil are required.
|
|
15
14
|
- Functions are defensive and avoid raising unless necessary for correctness.
|
|
16
15
|
"""
|
|
16
|
+
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import re
|
|
20
|
-
import sys
|
|
21
19
|
import json
|
|
22
|
-
import
|
|
20
|
+
import logging
|
|
21
|
+
import re
|
|
22
|
+
import uuid
|
|
23
|
+
from collections.abc import Mapping
|
|
24
|
+
from datetime import date, datetime, time, timezone
|
|
23
25
|
from decimal import Decimal, InvalidOperation
|
|
24
|
-
from datetime import date, time, datetime, timezone
|
|
25
|
-
from enum import Enum
|
|
26
26
|
from typing import (
|
|
27
27
|
Any,
|
|
28
|
-
Dict,
|
|
29
|
-
List,
|
|
30
|
-
Optional,
|
|
31
|
-
Type,
|
|
32
28
|
Union,
|
|
33
|
-
get_origin,
|
|
34
29
|
get_args,
|
|
35
|
-
|
|
36
|
-
Mapping,
|
|
37
|
-
Tuple,
|
|
30
|
+
get_origin,
|
|
38
31
|
)
|
|
39
|
-
import uuid
|
|
40
32
|
|
|
41
33
|
import dateutil.parser
|
|
42
34
|
from pydantic import BaseModel
|
|
43
35
|
from tukuy import TukuyTransformer
|
|
44
36
|
|
|
37
|
+
logger = logging.getLogger("prompture.tools")
|
|
38
|
+
|
|
45
39
|
# Initialize Tukuy transformer
|
|
46
40
|
TUKUY = TukuyTransformer()
|
|
47
41
|
|
|
48
42
|
__all__ = [
|
|
49
|
-
"create_field_schema",
|
|
50
|
-
"convert_value",
|
|
51
|
-
"extract_fields",
|
|
52
|
-
"parse_shorthand_number",
|
|
53
|
-
"parse_boolean",
|
|
54
|
-
"parse_datetime",
|
|
55
43
|
"as_list",
|
|
56
44
|
"clean_json_text",
|
|
57
45
|
"clean_toon_text",
|
|
58
|
-
"
|
|
59
|
-
"
|
|
46
|
+
"convert_value",
|
|
47
|
+
"create_field_schema",
|
|
48
|
+
"extract_fields",
|
|
49
|
+
"get_field_default",
|
|
50
|
+
"get_type_default",
|
|
60
51
|
"load_field_definitions",
|
|
52
|
+
"parse_boolean",
|
|
53
|
+
"parse_datetime",
|
|
54
|
+
"parse_shorthand_number",
|
|
61
55
|
"validate_field_definition",
|
|
62
|
-
"get_type_default",
|
|
63
|
-
"get_field_default",
|
|
64
56
|
]
|
|
65
57
|
|
|
66
58
|
|
|
67
|
-
# ---------------------------------------------------------------------------
|
|
68
|
-
# Logging
|
|
69
|
-
# ---------------------------------------------------------------------------
|
|
70
|
-
|
|
71
|
-
class LogLevel(int, Enum):
|
|
72
|
-
OFF = 1000
|
|
73
|
-
ERROR = 40
|
|
74
|
-
WARN = 30
|
|
75
|
-
INFO = 20
|
|
76
|
-
DEBUG = 10
|
|
77
|
-
TRACE = 5 # very verbose
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def log_debug(
|
|
81
|
-
level: int | LogLevel,
|
|
82
|
-
current_level: int | LogLevel,
|
|
83
|
-
msg: str | Mapping[str, Any] | Iterable[Tuple[str, Any]],
|
|
84
|
-
*,
|
|
85
|
-
prefix: str = "",
|
|
86
|
-
stream = None,
|
|
87
|
-
ts: bool = False,
|
|
88
|
-
json_mode: bool = False,
|
|
89
|
-
) -> None:
|
|
90
|
-
"""
|
|
91
|
-
Simple leveled logger.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
level: Level of this message.
|
|
95
|
-
current_level: Minimum level that should be emitted.
|
|
96
|
-
msg: Message string OR a mapping/iterable of (key, value) to print.
|
|
97
|
-
prefix: Optional prefix (e.g., "[extractor] ").
|
|
98
|
-
stream: File-like stream; defaults to sys.stderr.
|
|
99
|
-
ts: If True, prepend ISO timestamp.
|
|
100
|
-
json_mode: If True, print as a single JSON object line.
|
|
101
|
-
|
|
102
|
-
Examples:
|
|
103
|
-
log_debug(LogLevel.DEBUG, current, "Parsed field X")
|
|
104
|
-
log_debug(LogLevel.INFO, current, {"field": "age", "value": 42})
|
|
105
|
-
"""
|
|
106
|
-
if int(current_level) > int(level):
|
|
107
|
-
return
|
|
108
|
-
|
|
109
|
-
stream = stream or sys.stderr
|
|
110
|
-
parts: List[str] = []
|
|
111
|
-
|
|
112
|
-
if ts:
|
|
113
|
-
parts.append(datetime.now(timezone.utc).isoformat())
|
|
114
|
-
|
|
115
|
-
if prefix:
|
|
116
|
-
parts.append(prefix.rstrip())
|
|
117
|
-
|
|
118
|
-
if json_mode:
|
|
119
|
-
if isinstance(msg, str):
|
|
120
|
-
payload = {"message": msg}
|
|
121
|
-
elif isinstance(msg, Mapping):
|
|
122
|
-
payload = dict(msg)
|
|
123
|
-
else:
|
|
124
|
-
payload = dict(msg) # try to coerce iterable of pairs
|
|
125
|
-
out = " ".join(parts + [json.dumps(payload, default=str, ensure_ascii=False)])
|
|
126
|
-
stream.write(out + "\n")
|
|
127
|
-
return
|
|
128
|
-
|
|
129
|
-
if isinstance(msg, str):
|
|
130
|
-
parts.append(msg)
|
|
131
|
-
elif isinstance(msg, Mapping):
|
|
132
|
-
kv = " ".join(f"{k}={json.dumps(v, default=str, ensure_ascii=False)}" for k, v in msg.items())
|
|
133
|
-
parts.append(kv)
|
|
134
|
-
else:
|
|
135
|
-
kv = " ".join(f"{k}={json.dumps(v, default=str, ensure_ascii=False)}" for k, v in msg)
|
|
136
|
-
parts.append(kv)
|
|
137
|
-
|
|
138
|
-
stream.write(" ".join(parts) + "\n")
|
|
139
|
-
|
|
140
|
-
|
|
141
59
|
# ---------------------------------------------------------------------------
|
|
142
60
|
# Helpers
|
|
143
61
|
# ---------------------------------------------------------------------------
|
|
@@ -147,35 +65,35 @@ _CURRENCY_PREFIX = tuple("$€£¥₿₽₹₩₫₪₴₦₲₵₡₱₺₸")
|
|
|
147
65
|
def parse_boolean(value: Any) -> bool:
|
|
148
66
|
"""
|
|
149
67
|
Enhanced boolean parser with multilingual variants and edge case handling.
|
|
150
|
-
|
|
68
|
+
|
|
151
69
|
Supports:
|
|
152
70
|
- Standard: true/false, yes/no, on/off, 1/0
|
|
153
71
|
- Multilingual: si/no (Spanish), oui/non (French), ja/nein (German)
|
|
154
72
|
- Edge cases: empty strings, whitespace, case variations
|
|
155
|
-
|
|
73
|
+
|
|
156
74
|
Uses Tukuy transformer for robust multilingual support.
|
|
157
75
|
"""
|
|
158
76
|
if isinstance(value, bool):
|
|
159
77
|
return value
|
|
160
78
|
if value is None:
|
|
161
79
|
raise ValueError("Cannot parse None as boolean")
|
|
162
|
-
|
|
80
|
+
|
|
163
81
|
# Handle numeric values
|
|
164
82
|
if isinstance(value, (int, float, Decimal)):
|
|
165
83
|
return bool(value)
|
|
166
|
-
|
|
84
|
+
|
|
167
85
|
s = str(value).strip().lower()
|
|
168
|
-
|
|
86
|
+
|
|
169
87
|
# Handle empty strings and common "falsy" representations
|
|
170
88
|
if not s or s in ("", "null", "none", "n/a", "na", "nil", "undefined"):
|
|
171
89
|
return False
|
|
172
|
-
|
|
90
|
+
|
|
173
91
|
# Quick check for obvious true/false values before using Tukuy
|
|
174
92
|
if s in ("1", "true", "yes", "on", "si", "sí", "oui", "ja", "t", "y"):
|
|
175
93
|
return True
|
|
176
94
|
if s in ("0", "false", "no", "off", "non", "nein", "f", "n"):
|
|
177
95
|
return False
|
|
178
|
-
|
|
96
|
+
|
|
179
97
|
# Use Tukuy for more complex cases
|
|
180
98
|
try:
|
|
181
99
|
return TUKUY.transform(s, ["bool"])
|
|
@@ -187,21 +105,22 @@ def parse_boolean(value: Any) -> bool:
|
|
|
187
105
|
return bool(num_val)
|
|
188
106
|
except (ValueError, TypeError):
|
|
189
107
|
pass
|
|
190
|
-
|
|
108
|
+
|
|
191
109
|
# If it contains "true", "yes", "on", etc., lean towards True
|
|
192
110
|
true_indicators = ["true", "yes", "on", "enable", "active", "si", "oui", "ja"]
|
|
193
111
|
false_indicators = ["false", "no", "off", "disable", "inactive", "non", "nein"]
|
|
194
|
-
|
|
112
|
+
|
|
195
113
|
s_lower = s.lower()
|
|
196
114
|
if any(indicator in s_lower for indicator in true_indicators):
|
|
197
115
|
return True
|
|
198
116
|
if any(indicator in s_lower for indicator in false_indicators):
|
|
199
117
|
return False
|
|
200
|
-
|
|
118
|
+
|
|
201
119
|
# Final fallback - raise error for truly ambiguous cases
|
|
202
|
-
raise ValueError(f"Cannot parse '{value}' as boolean")
|
|
120
|
+
raise ValueError(f"Cannot parse '{value}' as boolean") from None
|
|
203
121
|
|
|
204
|
-
|
|
122
|
+
|
|
123
|
+
def as_list(value: Any, *, sep: str | None = None) -> list[Any]:
|
|
205
124
|
"""
|
|
206
125
|
Convert a value into a list.
|
|
207
126
|
- Lists/tuples: returned as list.
|
|
@@ -222,6 +141,7 @@ def as_list(value: Any, *, sep: str | None = None) -> List[Any]:
|
|
|
222
141
|
return [p.strip() for p in parts if p.strip() != ""]
|
|
223
142
|
return [value]
|
|
224
143
|
|
|
144
|
+
|
|
225
145
|
def parse_datetime(
|
|
226
146
|
value: Any,
|
|
227
147
|
*,
|
|
@@ -246,6 +166,7 @@ def parse_datetime(
|
|
|
246
166
|
def _strip_currency_prefix(s: str) -> str:
|
|
247
167
|
return s[1:].lstrip() if s and s[0] in _CURRENCY_PREFIX else s
|
|
248
168
|
|
|
169
|
+
|
|
249
170
|
def parse_shorthand_number(
|
|
250
171
|
value: Any,
|
|
251
172
|
*,
|
|
@@ -297,14 +218,14 @@ def parse_shorthand_number(
|
|
|
297
218
|
# Use appropriate Tukuy transformer based on as_decimal
|
|
298
219
|
transformer = ["shorthand_decimal"] if as_decimal else ["shorthand_number"]
|
|
299
220
|
num = TUKUY.transform(s, transformer)
|
|
300
|
-
|
|
221
|
+
|
|
301
222
|
# Handle percent if needed
|
|
302
223
|
if is_percent:
|
|
303
224
|
if isinstance(num, (int, float)):
|
|
304
225
|
num = num * percent_base / 100
|
|
305
226
|
else: # Decimal
|
|
306
|
-
num = num * Decimal(str(percent_base)) / Decimal(
|
|
307
|
-
|
|
227
|
+
num = num * Decimal(str(percent_base)) / Decimal("100")
|
|
228
|
+
|
|
308
229
|
return num
|
|
309
230
|
|
|
310
231
|
|
|
@@ -312,7 +233,8 @@ def parse_shorthand_number(
|
|
|
312
233
|
# Schema creation
|
|
313
234
|
# ---------------------------------------------------------------------------
|
|
314
235
|
|
|
315
|
-
|
|
236
|
+
|
|
237
|
+
def _base_schema_for_type(field_name: str, field_type: type[Any]) -> dict[str, Any]:
|
|
316
238
|
origin = get_origin(field_type)
|
|
317
239
|
args = get_args(field_type)
|
|
318
240
|
|
|
@@ -332,14 +254,14 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
332
254
|
}
|
|
333
255
|
|
|
334
256
|
# Containers
|
|
335
|
-
if origin in (list,
|
|
257
|
+
if origin in (list, list):
|
|
336
258
|
item_t = args[0] if args else Any
|
|
337
259
|
return {
|
|
338
260
|
"type": "array",
|
|
339
261
|
"items": _strip_desc(_base_schema_for_type(f"{field_name}_item", item_t)),
|
|
340
262
|
}
|
|
341
263
|
|
|
342
|
-
if origin in (tuple,
|
|
264
|
+
if origin in (tuple, tuple):
|
|
343
265
|
# Treat as array with items; if variable length, use first type as items
|
|
344
266
|
if args and args[-1] is Ellipsis:
|
|
345
267
|
item_t = args[0]
|
|
@@ -350,15 +272,12 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
350
272
|
elif args:
|
|
351
273
|
return {
|
|
352
274
|
"type": "array",
|
|
353
|
-
"prefixItems": [
|
|
354
|
-
_strip_desc(_base_schema_for_type(f"{field_name}_{i}", t))
|
|
355
|
-
for i, t in enumerate(args)
|
|
356
|
-
],
|
|
275
|
+
"prefixItems": [_strip_desc(_base_schema_for_type(f"{field_name}_{i}", t)) for i, t in enumerate(args)],
|
|
357
276
|
"items": False,
|
|
358
277
|
}
|
|
359
278
|
return {"type": "array"}
|
|
360
279
|
|
|
361
|
-
if origin in (dict,
|
|
280
|
+
if origin in (dict, dict):
|
|
362
281
|
key_t = args[0] if args else str
|
|
363
282
|
val_t = args[1] if len(args) > 1 else Any
|
|
364
283
|
# JSON Schema keys must be strings; if not, we'll still describe "object"
|
|
@@ -394,7 +313,7 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
394
313
|
|
|
395
314
|
# Custom types with __schema__
|
|
396
315
|
if hasattr(field_type, "__schema__"):
|
|
397
|
-
sch =
|
|
316
|
+
sch = field_type.__schema__
|
|
398
317
|
if isinstance(sch, Mapping):
|
|
399
318
|
return dict(sch)
|
|
400
319
|
|
|
@@ -402,7 +321,7 @@ def _base_schema_for_type(field_name: str, field_type: Type[Any]) -> Dict[str, A
|
|
|
402
321
|
return {"type": "string"}
|
|
403
322
|
|
|
404
323
|
|
|
405
|
-
def _strip_desc(schema:
|
|
324
|
+
def _strip_desc(schema: dict[str, Any]) -> dict[str, Any]:
|
|
406
325
|
"""Remove 'description' if present (useful when embedding item schemas)."""
|
|
407
326
|
schema = dict(schema)
|
|
408
327
|
schema.pop("description", None)
|
|
@@ -411,9 +330,9 @@ def _strip_desc(schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
411
330
|
|
|
412
331
|
def create_field_schema(
|
|
413
332
|
field_name: str,
|
|
414
|
-
field_type:
|
|
415
|
-
description:
|
|
416
|
-
) ->
|
|
333
|
+
field_type: type[Any],
|
|
334
|
+
description: str | None = None,
|
|
335
|
+
) -> dict[str, Any]:
|
|
417
336
|
"""
|
|
418
337
|
Creates a JSON(-like) schema for a field based on its type.
|
|
419
338
|
|
|
@@ -441,6 +360,7 @@ def create_field_schema(
|
|
|
441
360
|
# Conversion
|
|
442
361
|
# ---------------------------------------------------------------------------
|
|
443
362
|
|
|
363
|
+
|
|
444
364
|
def _to_decimal(value: Any) -> Decimal:
|
|
445
365
|
if isinstance(value, Decimal):
|
|
446
366
|
return value
|
|
@@ -452,15 +372,15 @@ def _to_decimal(value: Any) -> Decimal:
|
|
|
452
372
|
|
|
453
373
|
def convert_value(
|
|
454
374
|
value: Any,
|
|
455
|
-
target_type:
|
|
375
|
+
target_type: type[Any],
|
|
456
376
|
allow_shorthand: bool = True,
|
|
457
|
-
field_name:
|
|
458
|
-
field_definitions:
|
|
377
|
+
field_name: str | None = None,
|
|
378
|
+
field_definitions: dict[str, Any] | None = None,
|
|
459
379
|
use_defaults_on_failure: bool = True,
|
|
460
380
|
) -> Any:
|
|
461
381
|
"""
|
|
462
382
|
Enhanced value converter with robust error handling and default value support.
|
|
463
|
-
|
|
383
|
+
|
|
464
384
|
Convert 'value' to 'target_type' with support for:
|
|
465
385
|
- Optional/Union with intelligent type ordering
|
|
466
386
|
- Numeric shorthand (1.2k, $3,400, 12%) with better error handling
|
|
@@ -476,61 +396,59 @@ def convert_value(
|
|
|
476
396
|
field_name: Name of field being converted (for field-specific defaults)
|
|
477
397
|
field_definitions: Field definitions dict for custom defaults/instructions
|
|
478
398
|
use_defaults_on_failure: Whether to use defaults when conversion fails
|
|
479
|
-
|
|
399
|
+
|
|
480
400
|
Returns:
|
|
481
401
|
Converted value or appropriate default
|
|
482
|
-
|
|
402
|
+
|
|
483
403
|
Notes:
|
|
484
404
|
- For List[T], a scalar becomes [T(scalar)]
|
|
485
405
|
- For Decimal and floats, shorthand and currency are supported
|
|
486
406
|
- Conversion failures log warnings but continue with defaults if enabled
|
|
487
407
|
- Union types try conversions in order and use first successful result
|
|
488
408
|
"""
|
|
489
|
-
|
|
409
|
+
|
|
490
410
|
def _get_fallback_value(error_msg: str = "") -> Any:
|
|
491
411
|
"""Get appropriate fallback value when conversion fails."""
|
|
492
412
|
if not use_defaults_on_failure:
|
|
493
413
|
raise ValueError(error_msg)
|
|
494
|
-
|
|
414
|
+
|
|
495
415
|
try:
|
|
496
416
|
# Try field-specific default first
|
|
497
417
|
if field_name and field_definitions:
|
|
498
418
|
field_def = field_definitions.get(field_name, {})
|
|
499
|
-
if isinstance(field_def, dict) and
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
419
|
+
if isinstance(field_def, dict) and "default" in field_def:
|
|
420
|
+
logger.debug("Using field default for '%s': %s", field_name, field_def["default"])
|
|
421
|
+
return field_def["default"]
|
|
422
|
+
|
|
504
423
|
# Fall back to type default
|
|
505
424
|
type_default = get_type_default(target_type)
|
|
506
|
-
|
|
507
|
-
f"Using type default for {target_type}: {type_default}")
|
|
425
|
+
logger.debug("Using type default for %s: %s", target_type, type_default)
|
|
508
426
|
return type_default
|
|
509
|
-
|
|
427
|
+
|
|
510
428
|
except Exception as fallback_error:
|
|
511
|
-
|
|
512
|
-
f"Failed to get fallback for {target_type}: {fallback_error}")
|
|
429
|
+
logger.warning("Failed to get fallback for %s: %s", target_type, fallback_error)
|
|
513
430
|
return None
|
|
514
431
|
|
|
515
|
-
def _safe_convert_recursive(val: Any, typ:
|
|
432
|
+
def _safe_convert_recursive(val: Any, typ: type[Any]) -> Any:
|
|
516
433
|
"""Recursively convert with same parameters but no fallback to avoid infinite recursion."""
|
|
517
434
|
return convert_value(
|
|
518
|
-
val,
|
|
435
|
+
val,
|
|
436
|
+
typ,
|
|
519
437
|
allow_shorthand=allow_shorthand,
|
|
520
438
|
field_name=field_name,
|
|
521
439
|
field_definitions=field_definitions,
|
|
522
|
-
use_defaults_on_failure=False # Avoid recursion in fallbacks
|
|
440
|
+
use_defaults_on_failure=False, # Avoid recursion in fallbacks
|
|
523
441
|
)
|
|
524
442
|
|
|
525
443
|
# Handle None values early
|
|
526
444
|
if value is None:
|
|
527
445
|
origin = get_origin(target_type)
|
|
528
446
|
args = get_args(target_type)
|
|
529
|
-
|
|
447
|
+
|
|
530
448
|
# Check if target type is Optional (Union with None)
|
|
531
449
|
if origin is Union and type(None) in args:
|
|
532
450
|
return None
|
|
533
|
-
|
|
451
|
+
|
|
534
452
|
# For non-optional types, use fallback
|
|
535
453
|
return _get_fallback_value("Cannot convert None to non-optional type")
|
|
536
454
|
|
|
@@ -541,43 +459,40 @@ def convert_value(
|
|
|
541
459
|
if origin is Union:
|
|
542
460
|
non_none = [a for a in args if a is not type(None)]
|
|
543
461
|
is_optional = type(None) in args
|
|
544
|
-
|
|
462
|
+
|
|
545
463
|
if value is None and is_optional:
|
|
546
464
|
return None
|
|
547
|
-
|
|
465
|
+
|
|
548
466
|
# Try each non-none type until one works
|
|
549
467
|
conversion_errors = []
|
|
550
|
-
|
|
551
|
-
for
|
|
468
|
+
|
|
469
|
+
for _i, t in enumerate(non_none):
|
|
552
470
|
try:
|
|
553
471
|
result = _safe_convert_recursive(value, t)
|
|
554
|
-
|
|
555
|
-
f"Union conversion succeeded with type {t} for value '{value}'")
|
|
472
|
+
logger.debug("Union conversion succeeded with type %s for value '%s'", t, value)
|
|
556
473
|
return result
|
|
557
474
|
except Exception as e:
|
|
558
475
|
conversion_errors.append((t, str(e)))
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
476
|
+
logger.debug("Union conversion failed for type %s: %s", t, e)
|
|
477
|
+
|
|
562
478
|
# All conversions failed
|
|
563
479
|
error_msg = f"Cannot convert '{value}' to any Union type {non_none}. Errors: {conversion_errors}"
|
|
564
|
-
|
|
480
|
+
logger.warning("%s", error_msg)
|
|
565
481
|
return _get_fallback_value(error_msg)
|
|
566
482
|
|
|
567
483
|
# Lists / Tuples - Enhanced error handling for individual items
|
|
568
|
-
if origin in (list,
|
|
484
|
+
if origin in (list, list):
|
|
569
485
|
item_t = args[0] if args else Any
|
|
570
486
|
try:
|
|
571
487
|
items = as_list(value)
|
|
572
488
|
result_items = []
|
|
573
|
-
|
|
489
|
+
|
|
574
490
|
for i, item in enumerate(items):
|
|
575
491
|
try:
|
|
576
492
|
converted_item = _safe_convert_recursive(item, item_t)
|
|
577
493
|
result_items.append(converted_item)
|
|
578
494
|
except Exception as e:
|
|
579
|
-
|
|
580
|
-
f"Failed to convert list item {i} '{item}' to {item_t}: {e}")
|
|
495
|
+
logger.warning("Failed to convert list item %d '%s' to %s: %s", i, item, item_t, e)
|
|
581
496
|
# Try to get default for item type
|
|
582
497
|
try:
|
|
583
498
|
default_item = get_type_default(item_t)
|
|
@@ -585,15 +500,15 @@ def convert_value(
|
|
|
585
500
|
except Exception:
|
|
586
501
|
# Skip item if we can't get a default
|
|
587
502
|
continue
|
|
588
|
-
|
|
503
|
+
|
|
589
504
|
return result_items
|
|
590
|
-
|
|
505
|
+
|
|
591
506
|
except Exception as e:
|
|
592
507
|
error_msg = f"Cannot convert '{value}' to list: {e}"
|
|
593
|
-
|
|
508
|
+
logger.warning("%s", error_msg)
|
|
594
509
|
return _get_fallback_value(error_msg)
|
|
595
510
|
|
|
596
|
-
if origin in (tuple,
|
|
511
|
+
if origin in (tuple, tuple):
|
|
597
512
|
try:
|
|
598
513
|
if not isinstance(value, (list, tuple)):
|
|
599
514
|
value = [value]
|
|
@@ -604,8 +519,7 @@ def convert_value(
|
|
|
604
519
|
try:
|
|
605
520
|
converted_items.append(_safe_convert_recursive(item, item_t))
|
|
606
521
|
except Exception as e:
|
|
607
|
-
|
|
608
|
-
f"Failed to convert tuple item '{item}': {e}")
|
|
522
|
+
logger.warning("Failed to convert tuple item '%s': %s", item, e)
|
|
609
523
|
converted_items.append(get_type_default(item_t))
|
|
610
524
|
return tuple(converted_items)
|
|
611
525
|
elif args:
|
|
@@ -616,25 +530,24 @@ def convert_value(
|
|
|
616
530
|
try:
|
|
617
531
|
converted_items.append(_safe_convert_recursive(v, t))
|
|
618
532
|
except Exception as e:
|
|
619
|
-
|
|
620
|
-
f"Failed to convert tuple item '{v}' to {t}: {e}")
|
|
533
|
+
logger.warning("Failed to convert tuple item '%s' to %s: %s", v, t, e)
|
|
621
534
|
converted_items.append(get_type_default(t))
|
|
622
535
|
return tuple(converted_items)
|
|
623
536
|
return tuple(value)
|
|
624
537
|
except Exception as e:
|
|
625
538
|
error_msg = f"Cannot convert '{value}' to tuple: {e}"
|
|
626
|
-
|
|
539
|
+
logger.warning("%s", error_msg)
|
|
627
540
|
return _get_fallback_value(error_msg)
|
|
628
541
|
|
|
629
542
|
# Dict - Enhanced error handling
|
|
630
|
-
if origin in (dict,
|
|
543
|
+
if origin in (dict, dict):
|
|
631
544
|
key_t = args[0] if args else str
|
|
632
545
|
val_t = args[1] if len(args) > 1 else Any
|
|
633
|
-
|
|
546
|
+
|
|
634
547
|
try:
|
|
635
548
|
if not isinstance(value, Mapping):
|
|
636
549
|
raise ValueError(f"Cannot convert non-mapping '{value}' to dict")
|
|
637
|
-
|
|
550
|
+
|
|
638
551
|
result_dict = {}
|
|
639
552
|
for k, v in value.items():
|
|
640
553
|
try:
|
|
@@ -642,20 +555,19 @@ def convert_value(
|
|
|
642
555
|
converted_val = _safe_convert_recursive(v, val_t)
|
|
643
556
|
result_dict[converted_key] = converted_val
|
|
644
557
|
except Exception as e:
|
|
645
|
-
|
|
646
|
-
f"Failed to convert dict item {k}:{v}: {e}")
|
|
558
|
+
logger.warning("Failed to convert dict item %s:%s: %s", k, v, e)
|
|
647
559
|
# Skip problematic items
|
|
648
560
|
continue
|
|
649
|
-
|
|
561
|
+
|
|
650
562
|
return result_dict
|
|
651
|
-
|
|
563
|
+
|
|
652
564
|
except Exception as e:
|
|
653
565
|
error_msg = f"Cannot convert '{value}' to dict: {e}"
|
|
654
|
-
|
|
566
|
+
logger.warning("%s", error_msg)
|
|
655
567
|
return _get_fallback_value(error_msg)
|
|
656
568
|
|
|
657
569
|
# Scalars with enhanced error handling
|
|
658
|
-
|
|
570
|
+
|
|
659
571
|
# Numbers - More robust handling
|
|
660
572
|
if target_type is int:
|
|
661
573
|
try:
|
|
@@ -663,21 +575,21 @@ def convert_value(
|
|
|
663
575
|
if isinstance(value, str):
|
|
664
576
|
s = value.strip()
|
|
665
577
|
if not s or s.lower() in ("", "null", "none", "n/a", "na"):
|
|
666
|
-
return _get_fallback_value(
|
|
667
|
-
|
|
578
|
+
return _get_fallback_value("Empty/null string cannot be converted to int")
|
|
579
|
+
|
|
668
580
|
if allow_shorthand:
|
|
669
581
|
parsed = parse_shorthand_number(value, as_decimal=False)
|
|
670
582
|
return int(parsed)
|
|
671
583
|
else:
|
|
672
584
|
return int(value)
|
|
673
|
-
|
|
585
|
+
|
|
674
586
|
except Exception as e:
|
|
675
587
|
# Try alternative parsing
|
|
676
588
|
try:
|
|
677
589
|
return int(parse_shorthand_number(value, as_decimal=False, allow_percent=False))
|
|
678
590
|
except Exception:
|
|
679
591
|
error_msg = f"Cannot convert '{value}' to int: {e}"
|
|
680
|
-
|
|
592
|
+
logger.warning("%s", error_msg)
|
|
681
593
|
return _get_fallback_value(error_msg)
|
|
682
594
|
|
|
683
595
|
if target_type is float:
|
|
@@ -686,17 +598,17 @@ def convert_value(
|
|
|
686
598
|
if isinstance(value, str):
|
|
687
599
|
s = value.strip()
|
|
688
600
|
if not s or s.lower() in ("", "null", "none", "n/a", "na"):
|
|
689
|
-
return _get_fallback_value(
|
|
690
|
-
|
|
601
|
+
return _get_fallback_value("Empty/null string cannot be converted to float")
|
|
602
|
+
|
|
691
603
|
if allow_shorthand:
|
|
692
604
|
parsed = parse_shorthand_number(value, as_decimal=False)
|
|
693
605
|
return float(parsed)
|
|
694
606
|
else:
|
|
695
607
|
return float(value)
|
|
696
|
-
|
|
608
|
+
|
|
697
609
|
except Exception as e:
|
|
698
610
|
error_msg = f"Cannot convert '{value}' to float: {e}"
|
|
699
|
-
|
|
611
|
+
logger.warning("%s", error_msg)
|
|
700
612
|
return _get_fallback_value(error_msg)
|
|
701
613
|
|
|
702
614
|
if target_type is Decimal:
|
|
@@ -705,17 +617,17 @@ def convert_value(
|
|
|
705
617
|
if isinstance(value, str):
|
|
706
618
|
s = value.strip()
|
|
707
619
|
if not s or s.lower() in ("", "null", "none", "n/a", "na"):
|
|
708
|
-
return _get_fallback_value(
|
|
709
|
-
|
|
620
|
+
return _get_fallback_value("Empty/null string cannot be converted to Decimal")
|
|
621
|
+
|
|
710
622
|
if allow_shorthand:
|
|
711
623
|
parsed = parse_shorthand_number(value, as_decimal=True)
|
|
712
624
|
return _to_decimal(parsed)
|
|
713
625
|
else:
|
|
714
626
|
return _to_decimal(value)
|
|
715
|
-
|
|
627
|
+
|
|
716
628
|
except Exception as e:
|
|
717
629
|
error_msg = f"Cannot convert '{value}' to Decimal: {e}"
|
|
718
|
-
|
|
630
|
+
logger.warning("%s", error_msg)
|
|
719
631
|
return _get_fallback_value(error_msg)
|
|
720
632
|
|
|
721
633
|
# Bool - Enhanced error handling
|
|
@@ -724,7 +636,7 @@ def convert_value(
|
|
|
724
636
|
return parse_boolean(value)
|
|
725
637
|
except Exception as e:
|
|
726
638
|
error_msg = f"Cannot convert '{value}' to bool: {e}"
|
|
727
|
-
|
|
639
|
+
logger.warning("%s", error_msg)
|
|
728
640
|
return _get_fallback_value(error_msg)
|
|
729
641
|
|
|
730
642
|
# Strings - More robust handling
|
|
@@ -735,7 +647,7 @@ def convert_value(
|
|
|
735
647
|
return str(value)
|
|
736
648
|
except Exception as e:
|
|
737
649
|
error_msg = f"Cannot convert '{value}' to str: {e}"
|
|
738
|
-
|
|
650
|
+
logger.warning("%s", error_msg)
|
|
739
651
|
return _get_fallback_value(error_msg)
|
|
740
652
|
|
|
741
653
|
# Datetime / Date / Time - Enhanced error handling
|
|
@@ -744,25 +656,25 @@ def convert_value(
|
|
|
744
656
|
return parse_datetime(value)
|
|
745
657
|
except Exception as e:
|
|
746
658
|
error_msg = f"Cannot convert '{value}' to datetime: {e}"
|
|
747
|
-
|
|
659
|
+
logger.warning("%s", error_msg)
|
|
748
660
|
return _get_fallback_value(error_msg)
|
|
749
|
-
|
|
661
|
+
|
|
750
662
|
if target_type is date:
|
|
751
663
|
try:
|
|
752
664
|
dt = parse_datetime(value)
|
|
753
665
|
return dt.date()
|
|
754
666
|
except Exception as e:
|
|
755
667
|
error_msg = f"Cannot convert '{value}' to date: {e}"
|
|
756
|
-
|
|
668
|
+
logger.warning("%s", error_msg)
|
|
757
669
|
return _get_fallback_value(error_msg)
|
|
758
|
-
|
|
670
|
+
|
|
759
671
|
if target_type is time:
|
|
760
672
|
try:
|
|
761
673
|
dt = parse_datetime(value)
|
|
762
674
|
return dt.time()
|
|
763
675
|
except Exception as e:
|
|
764
676
|
error_msg = f"Cannot convert '{value}' to time: {e}"
|
|
765
|
-
|
|
677
|
+
logger.warning("%s", error_msg)
|
|
766
678
|
return _get_fallback_value(error_msg)
|
|
767
679
|
|
|
768
680
|
# UUID - Enhanced error handling
|
|
@@ -773,7 +685,7 @@ def convert_value(
|
|
|
773
685
|
return uuid.UUID(str(value))
|
|
774
686
|
except Exception as e:
|
|
775
687
|
error_msg = f"Cannot convert '{value}' to UUID: {e}"
|
|
776
|
-
|
|
688
|
+
logger.warning("%s", error_msg)
|
|
777
689
|
return _get_fallback_value(error_msg)
|
|
778
690
|
|
|
779
691
|
# Pydantic models - Enhanced error handling
|
|
@@ -787,7 +699,7 @@ def convert_value(
|
|
|
787
699
|
raise ValueError(f"Cannot convert non-mapping '{value}' to {target_type.__name__}")
|
|
788
700
|
except Exception as e:
|
|
789
701
|
error_msg = f"Cannot convert '{value}' to {target_type.__name__}: {e}"
|
|
790
|
-
|
|
702
|
+
logger.warning("%s", error_msg)
|
|
791
703
|
return _get_fallback_value(error_msg)
|
|
792
704
|
|
|
793
705
|
# Fallback: direct cast if possible
|
|
@@ -795,7 +707,7 @@ def convert_value(
|
|
|
795
707
|
return target_type(value) # type: ignore[call-arg]
|
|
796
708
|
except Exception as e:
|
|
797
709
|
error_msg = f"Cannot convert '{value}' to {getattr(target_type, '__name__', target_type)}: {e}"
|
|
798
|
-
|
|
710
|
+
logger.warning("%s", error_msg)
|
|
799
711
|
return _get_fallback_value(error_msg)
|
|
800
712
|
|
|
801
713
|
|
|
@@ -803,17 +715,17 @@ def convert_value(
|
|
|
803
715
|
# Extraction
|
|
804
716
|
# ---------------------------------------------------------------------------
|
|
805
717
|
|
|
718
|
+
|
|
806
719
|
def extract_fields(
|
|
807
|
-
model_cls:
|
|
808
|
-
data:
|
|
809
|
-
fields:
|
|
720
|
+
model_cls: type[BaseModel],
|
|
721
|
+
data: dict[str, Any],
|
|
722
|
+
fields: list[str] | None = None,
|
|
810
723
|
*,
|
|
811
724
|
strict: bool = True,
|
|
812
725
|
missing: str = "skip", # "skip" | "none" | "error"
|
|
813
|
-
|
|
814
|
-
field_definitions: Optional[Dict[str, Any]] = None,
|
|
726
|
+
field_definitions: dict[str, Any] | None = None,
|
|
815
727
|
use_defaults_on_failure: bool = True,
|
|
816
|
-
) ->
|
|
728
|
+
) -> dict[str, Any]:
|
|
817
729
|
"""
|
|
818
730
|
Extract and convert only specified fields based on a Pydantic model.
|
|
819
731
|
|
|
@@ -826,7 +738,6 @@ def extract_fields(
|
|
|
826
738
|
- "skip": drop it
|
|
827
739
|
- "none": include with None
|
|
828
740
|
- "error": raise KeyError
|
|
829
|
-
level: LogLevel for internal debug logs (uses log_debug).
|
|
830
741
|
field_definitions: Optional field definitions for default values and conversion hints.
|
|
831
742
|
use_defaults_on_failure: Whether to use default values when conversion fails.
|
|
832
743
|
|
|
@@ -845,7 +756,7 @@ def extract_fields(
|
|
|
845
756
|
if strict and invalid:
|
|
846
757
|
raise KeyError(f"Fields not found in model: {', '.join(sorted(invalid))}")
|
|
847
758
|
|
|
848
|
-
result:
|
|
759
|
+
result: dict[str, Any] = {}
|
|
849
760
|
|
|
850
761
|
for fname in fields:
|
|
851
762
|
if fname not in valid_fields:
|
|
@@ -857,11 +768,11 @@ def extract_fields(
|
|
|
857
768
|
|
|
858
769
|
if source_key not in data:
|
|
859
770
|
if missing == "skip":
|
|
860
|
-
|
|
771
|
+
logger.debug("Skipping missing field: %s", fname)
|
|
861
772
|
continue
|
|
862
773
|
if missing == "none":
|
|
863
774
|
result[fname] = None
|
|
864
|
-
|
|
775
|
+
logger.debug("Missing field set to None: %s", fname)
|
|
865
776
|
continue
|
|
866
777
|
raise KeyError(f"Missing required field in data: {source_key}")
|
|
867
778
|
|
|
@@ -873,31 +784,23 @@ def extract_fields(
|
|
|
873
784
|
allow_shorthand=True,
|
|
874
785
|
field_name=fname,
|
|
875
786
|
field_definitions=field_definitions,
|
|
876
|
-
use_defaults_on_failure=use_defaults_on_failure
|
|
787
|
+
use_defaults_on_failure=use_defaults_on_failure,
|
|
877
788
|
)
|
|
878
789
|
result[fname] = converted
|
|
879
|
-
|
|
790
|
+
logger.debug("Converted field %s: %s -> %s", fname, raw, converted)
|
|
880
791
|
except Exception as e:
|
|
881
792
|
# If we're not using defaults, re-raise the original error
|
|
882
793
|
if not use_defaults_on_failure:
|
|
883
794
|
raise ValueError(f"Validation failed for field '{fname}': {e}") from e
|
|
884
|
-
|
|
795
|
+
|
|
885
796
|
# Try to get a fallback value using the field info
|
|
886
797
|
try:
|
|
887
798
|
fallback_value = get_field_default(fname, finfo, field_definitions)
|
|
888
799
|
result[fname] = fallback_value
|
|
889
|
-
|
|
890
|
-
"field": fname,
|
|
891
|
-
"error": str(e),
|
|
892
|
-
"fallback": fallback_value
|
|
893
|
-
})
|
|
800
|
+
logger.warning("Field %s conversion error: %s, using fallback: %s", fname, e, fallback_value)
|
|
894
801
|
except Exception as fallback_error:
|
|
895
802
|
# If even fallback fails, re-raise original error
|
|
896
|
-
|
|
897
|
-
"field": fname,
|
|
898
|
-
"conversion_error": str(e),
|
|
899
|
-
"fallback_error": str(fallback_error)
|
|
900
|
-
})
|
|
803
|
+
logger.error("Field %s conversion error: %s, fallback error: %s", fname, e, fallback_error)
|
|
901
804
|
raise ValueError(f"Validation failed for field '{fname}': {e}") from e
|
|
902
805
|
|
|
903
806
|
return result
|
|
@@ -907,7 +810,8 @@ def extract_fields(
|
|
|
907
810
|
# Field Definitions
|
|
908
811
|
# ---------------------------------------------------------------------------
|
|
909
812
|
|
|
910
|
-
|
|
813
|
+
|
|
814
|
+
def load_field_definitions(path: str) -> dict[str, Any]:
|
|
911
815
|
"""
|
|
912
816
|
Load field definitions from a JSON or YAML file.
|
|
913
817
|
|
|
@@ -921,27 +825,29 @@ def load_field_definitions(path: str) -> Dict[str, Any]:
|
|
|
921
825
|
ValueError: If file format is not supported or content is invalid
|
|
922
826
|
FileNotFoundError: If the file doesn't exist
|
|
923
827
|
"""
|
|
924
|
-
import yaml
|
|
925
828
|
from pathlib import Path
|
|
926
829
|
|
|
830
|
+
import yaml
|
|
831
|
+
|
|
927
832
|
path_obj = Path(path)
|
|
928
833
|
if not path_obj.exists():
|
|
929
834
|
raise FileNotFoundError(f"Field definitions file not found: {path}")
|
|
930
835
|
|
|
931
836
|
suffix = path_obj.suffix.lower()
|
|
932
|
-
content = path_obj.read_text(encoding=
|
|
837
|
+
content = path_obj.read_text(encoding="utf-8")
|
|
933
838
|
|
|
934
839
|
try:
|
|
935
|
-
if suffix ==
|
|
840
|
+
if suffix == ".json":
|
|
936
841
|
return json.loads(content)
|
|
937
|
-
elif suffix in (
|
|
842
|
+
elif suffix in (".yaml", ".yml"):
|
|
938
843
|
return yaml.safe_load(content)
|
|
939
844
|
else:
|
|
940
845
|
raise ValueError(f"Unsupported file format: {suffix}")
|
|
941
846
|
except Exception as e:
|
|
942
847
|
raise ValueError(f"Failed to parse field definitions: {e}") from e
|
|
943
848
|
|
|
944
|
-
|
|
849
|
+
|
|
850
|
+
def validate_field_definition(definition: dict[str, Any]) -> bool:
|
|
945
851
|
"""
|
|
946
852
|
Validate a field definition structure.
|
|
947
853
|
|
|
@@ -958,43 +864,42 @@ def validate_field_definition(definition: Dict[str, Any]) -> bool:
|
|
|
958
864
|
- default: Default value
|
|
959
865
|
- nullable: Whether field can be None
|
|
960
866
|
"""
|
|
961
|
-
required_keys = {
|
|
962
|
-
|
|
867
|
+
required_keys = {"type", "description", "instructions", "default", "nullable"}
|
|
868
|
+
|
|
963
869
|
# Check for required keys
|
|
964
870
|
if not all(key in definition for key in required_keys):
|
|
965
871
|
return False
|
|
966
|
-
|
|
872
|
+
|
|
967
873
|
# Validate type
|
|
968
|
-
if not isinstance(definition[
|
|
874
|
+
if not isinstance(definition["type"], (type, str)):
|
|
969
875
|
return False
|
|
970
|
-
|
|
876
|
+
|
|
971
877
|
# Validate description and instructions are strings
|
|
972
|
-
if not all(isinstance(definition[k], str) for k in [
|
|
878
|
+
if not all(isinstance(definition[k], str) for k in ["description", "instructions"]):
|
|
973
879
|
return False
|
|
974
|
-
|
|
880
|
+
|
|
975
881
|
# Validate nullable is boolean
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
return True
|
|
882
|
+
return isinstance(definition["nullable"], bool)
|
|
883
|
+
|
|
980
884
|
|
|
981
885
|
# ---------------------------------------------------------------------------
|
|
982
886
|
# Default Value Handling
|
|
983
887
|
# ---------------------------------------------------------------------------
|
|
984
888
|
|
|
985
|
-
|
|
889
|
+
|
|
890
|
+
def get_type_default(field_type: type[Any]) -> Any:
|
|
986
891
|
"""
|
|
987
892
|
Get a sensible default value for a given type.
|
|
988
|
-
|
|
893
|
+
|
|
989
894
|
Args:
|
|
990
895
|
field_type: The type to get a default for
|
|
991
|
-
|
|
896
|
+
|
|
992
897
|
Returns:
|
|
993
898
|
A default value appropriate for the type
|
|
994
899
|
"""
|
|
995
900
|
origin = get_origin(field_type)
|
|
996
901
|
args = get_args(field_type)
|
|
997
|
-
|
|
902
|
+
|
|
998
903
|
# Handle Optional/Union types
|
|
999
904
|
if origin is Union:
|
|
1000
905
|
non_none = [a for a in args if a is not type(None)]
|
|
@@ -1003,15 +908,15 @@ def get_type_default(field_type: Type[Any]) -> Any:
|
|
|
1003
908
|
return get_type_default(non_none[0])
|
|
1004
909
|
# Multiple non-None types -> return None
|
|
1005
910
|
return None
|
|
1006
|
-
|
|
911
|
+
|
|
1007
912
|
# Container types
|
|
1008
|
-
if origin in (list,
|
|
913
|
+
if origin in (list, list) or field_type is list:
|
|
1009
914
|
return []
|
|
1010
|
-
if origin in (tuple,
|
|
915
|
+
if origin in (tuple, tuple) or field_type is tuple:
|
|
1011
916
|
return ()
|
|
1012
|
-
if origin in (dict,
|
|
917
|
+
if origin in (dict, dict) or field_type is dict:
|
|
1013
918
|
return {}
|
|
1014
|
-
|
|
919
|
+
|
|
1015
920
|
# Scalar types
|
|
1016
921
|
if field_type is int:
|
|
1017
922
|
return 0
|
|
@@ -1029,53 +934,55 @@ def get_type_default(field_type: Type[Any]) -> Any:
|
|
|
1029
934
|
return time(0, 0)
|
|
1030
935
|
if field_type is uuid.UUID:
|
|
1031
936
|
return uuid.uuid4()
|
|
1032
|
-
|
|
937
|
+
|
|
1033
938
|
# Pydantic models - try to create empty instance
|
|
1034
939
|
if isinstance(field_type, type) and issubclass(field_type, BaseModel):
|
|
1035
940
|
try:
|
|
1036
941
|
return field_type()
|
|
1037
942
|
except Exception:
|
|
1038
943
|
return None
|
|
1039
|
-
|
|
944
|
+
|
|
1040
945
|
# Fallback
|
|
1041
946
|
return None
|
|
1042
947
|
|
|
1043
948
|
|
|
1044
|
-
def get_field_default(field_name: str, field_info: Any, field_definitions:
|
|
949
|
+
def get_field_default(field_name: str, field_info: Any, field_definitions: dict[str, Any] | None = None) -> Any:
|
|
1045
950
|
"""
|
|
1046
951
|
Get the default value for a field using the priority order:
|
|
1047
952
|
1. field_definitions default
|
|
1048
953
|
2. Pydantic field default
|
|
1049
954
|
3. Type-appropriate default
|
|
1050
|
-
|
|
955
|
+
|
|
1051
956
|
Args:
|
|
1052
957
|
field_name: Name of the field
|
|
1053
958
|
field_info: Pydantic field info
|
|
1054
959
|
field_definitions: Optional field definitions dict
|
|
1055
|
-
|
|
960
|
+
|
|
1056
961
|
Returns:
|
|
1057
962
|
The appropriate default value
|
|
1058
963
|
"""
|
|
1059
964
|
# Priority 1: field_definitions
|
|
1060
965
|
if field_definitions and field_name in field_definitions:
|
|
1061
966
|
field_def = field_definitions[field_name]
|
|
1062
|
-
if isinstance(field_def, dict) and
|
|
1063
|
-
return field_def[
|
|
1064
|
-
|
|
967
|
+
if isinstance(field_def, dict) and "default" in field_def:
|
|
968
|
+
return field_def["default"]
|
|
969
|
+
|
|
1065
970
|
# Priority 2: Pydantic default - check for PydanticUndefined
|
|
1066
|
-
if hasattr(field_info,
|
|
971
|
+
if hasattr(field_info, "default"):
|
|
1067
972
|
default_val = field_info.default
|
|
1068
973
|
# Handle PydanticUndefined (newer Pydantic) and Ellipsis (older Pydantic)
|
|
1069
|
-
if default_val is not ... and str(default_val) !=
|
|
974
|
+
if default_val is not ... and str(default_val) != "PydanticUndefined":
|
|
1070
975
|
return default_val
|
|
1071
|
-
|
|
976
|
+
|
|
1072
977
|
# Priority 3: Type default
|
|
1073
978
|
return get_type_default(field_info.annotation)
|
|
1074
979
|
|
|
980
|
+
|
|
1075
981
|
# ---------------------------------------------------------------------------
|
|
1076
982
|
# JSON text cleaning
|
|
1077
983
|
# ---------------------------------------------------------------------------
|
|
1078
984
|
|
|
985
|
+
|
|
1079
986
|
def strip_think_tags(text: str) -> str:
|
|
1080
987
|
"""Remove <think>...</think> sections that some models emit."""
|
|
1081
988
|
if not text:
|
|
@@ -1106,7 +1013,7 @@ def clean_json_text(text: str) -> str:
|
|
|
1106
1013
|
if start_content != -1:
|
|
1107
1014
|
end_fence = text.find("```", start_content)
|
|
1108
1015
|
if end_fence != -1:
|
|
1109
|
-
return text[start_content + 1:end_fence].strip()
|
|
1016
|
+
return text[start_content + 1 : end_fence].strip()
|
|
1110
1017
|
else:
|
|
1111
1018
|
return text[start_content + 1 :].strip()
|
|
1112
1019
|
|