prompture 0.0.29.dev8__py3-none-any.whl → 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. prompture/__init__.py +146 -23
  2. prompture/_version.py +34 -0
  3. prompture/aio/__init__.py +74 -0
  4. prompture/async_conversation.py +607 -0
  5. prompture/async_core.py +803 -0
  6. prompture/async_driver.py +169 -0
  7. prompture/cache.py +469 -0
  8. prompture/callbacks.py +55 -0
  9. prompture/cli.py +63 -4
  10. prompture/conversation.py +631 -0
  11. prompture/core.py +876 -263
  12. prompture/cost_mixin.py +51 -0
  13. prompture/discovery.py +164 -0
  14. prompture/driver.py +168 -5
  15. prompture/drivers/__init__.py +173 -69
  16. prompture/drivers/airllm_driver.py +109 -0
  17. prompture/drivers/async_airllm_driver.py +26 -0
  18. prompture/drivers/async_azure_driver.py +117 -0
  19. prompture/drivers/async_claude_driver.py +107 -0
  20. prompture/drivers/async_google_driver.py +132 -0
  21. prompture/drivers/async_grok_driver.py +91 -0
  22. prompture/drivers/async_groq_driver.py +84 -0
  23. prompture/drivers/async_hugging_driver.py +61 -0
  24. prompture/drivers/async_lmstudio_driver.py +79 -0
  25. prompture/drivers/async_local_http_driver.py +44 -0
  26. prompture/drivers/async_ollama_driver.py +125 -0
  27. prompture/drivers/async_openai_driver.py +96 -0
  28. prompture/drivers/async_openrouter_driver.py +96 -0
  29. prompture/drivers/async_registry.py +129 -0
  30. prompture/drivers/azure_driver.py +36 -9
  31. prompture/drivers/claude_driver.py +251 -34
  32. prompture/drivers/google_driver.py +107 -38
  33. prompture/drivers/grok_driver.py +29 -32
  34. prompture/drivers/groq_driver.py +27 -26
  35. prompture/drivers/hugging_driver.py +6 -6
  36. prompture/drivers/lmstudio_driver.py +26 -13
  37. prompture/drivers/local_http_driver.py +6 -6
  38. prompture/drivers/ollama_driver.py +157 -23
  39. prompture/drivers/openai_driver.py +178 -9
  40. prompture/drivers/openrouter_driver.py +31 -25
  41. prompture/drivers/registry.py +306 -0
  42. prompture/field_definitions.py +106 -96
  43. prompture/logging.py +80 -0
  44. prompture/model_rates.py +217 -0
  45. prompture/runner.py +49 -47
  46. prompture/scaffold/__init__.py +1 -0
  47. prompture/scaffold/generator.py +84 -0
  48. prompture/scaffold/templates/Dockerfile.j2 +12 -0
  49. prompture/scaffold/templates/README.md.j2 +41 -0
  50. prompture/scaffold/templates/config.py.j2 +21 -0
  51. prompture/scaffold/templates/env.example.j2 +8 -0
  52. prompture/scaffold/templates/main.py.j2 +86 -0
  53. prompture/scaffold/templates/models.py.j2 +40 -0
  54. prompture/scaffold/templates/requirements.txt.j2 +5 -0
  55. prompture/server.py +183 -0
  56. prompture/session.py +117 -0
  57. prompture/settings.py +18 -1
  58. prompture/tools.py +219 -267
  59. prompture/tools_schema.py +254 -0
  60. prompture/validator.py +3 -3
  61. {prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/METADATA +117 -21
  62. prompture-0.0.35.dist-info/RECORD +66 -0
  63. {prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/WHEEL +1 -1
  64. prompture-0.0.29.dev8.dist-info/RECORD +0 -27
  65. {prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/entry_points.txt +0 -0
  66. {prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/licenses/LICENSE +0 -0
  67. {prompture-0.0.29.dev8.dist-info → prompture-0.0.35.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,631 @@
1
+ """Stateful multi-turn conversation support for Prompture."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from collections.abc import Iterator
8
+ from datetime import date, datetime
9
+ from decimal import Decimal
10
+ from typing import Any, Callable, Literal, Union
11
+
12
+ from pydantic import BaseModel
13
+
14
+ from .callbacks import DriverCallbacks
15
+ from .driver import Driver
16
+ from .drivers import get_driver_for_model
17
+ from .field_definitions import get_registry_snapshot
18
+ from .tools import (
19
+ clean_json_text,
20
+ convert_value,
21
+ get_field_default,
22
+ )
23
+ from .tools_schema import ToolRegistry
24
+
25
+ logger = logging.getLogger("prompture.conversation")
26
+
27
+
28
+ class Conversation:
29
+ """Stateful multi-turn conversation with an LLM.
30
+
31
+ Maintains a message history across calls so the model can reference
32
+ previous turns. Works with any Prompture driver.
33
+
34
+ Example::
35
+
36
+ conv = Conversation("openai/gpt-4", system_prompt="You are a data extractor")
37
+ r1 = conv.ask_for_json("Extract names from: John, age 30", name_schema)
38
+ r2 = conv.ask_for_json("Now extract ages", age_schema) # sees turn 1
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ model_name: str | None = None,
44
+ *,
45
+ driver: Driver | None = None,
46
+ system_prompt: str | None = None,
47
+ options: dict[str, Any] | None = None,
48
+ callbacks: DriverCallbacks | None = None,
49
+ tools: ToolRegistry | None = None,
50
+ max_tool_rounds: int = 10,
51
+ ) -> None:
52
+ if model_name is None and driver is None:
53
+ raise ValueError("Either model_name or driver must be provided")
54
+
55
+ if driver is not None:
56
+ self._driver = driver
57
+ else:
58
+ self._driver = get_driver_for_model(model_name)
59
+
60
+ if callbacks is not None:
61
+ self._driver.callbacks = callbacks
62
+
63
+ self._model_name = model_name or ""
64
+ self._system_prompt = system_prompt
65
+ self._options = dict(options) if options else {}
66
+ self._messages: list[dict[str, Any]] = []
67
+ self._usage = {
68
+ "prompt_tokens": 0,
69
+ "completion_tokens": 0,
70
+ "total_tokens": 0,
71
+ "cost": 0.0,
72
+ "turns": 0,
73
+ }
74
+ self._tools = tools or ToolRegistry()
75
+ self._max_tool_rounds = max_tool_rounds
76
+
77
+ # ------------------------------------------------------------------
78
+ # Public helpers
79
+ # ------------------------------------------------------------------
80
+
81
+ @property
82
+ def messages(self) -> list[dict[str, Any]]:
83
+ """Read-only view of the conversation history."""
84
+ return list(self._messages)
85
+
86
+ @property
87
+ def usage(self) -> dict[str, Any]:
88
+ """Accumulated token/cost totals across all turns."""
89
+ return dict(self._usage)
90
+
91
+ def clear(self) -> None:
92
+ """Reset message history (keeps system_prompt and driver)."""
93
+ self._messages.clear()
94
+
95
+ def add_context(self, role: str, content: str) -> None:
96
+ """Seed the history with a user or assistant message."""
97
+ if role not in ("user", "assistant"):
98
+ raise ValueError("role must be 'user' or 'assistant'")
99
+ self._messages.append({"role": role, "content": content})
100
+
101
+ def register_tool(
102
+ self,
103
+ fn: Callable[..., Any],
104
+ *,
105
+ name: str | None = None,
106
+ description: str | None = None,
107
+ ) -> None:
108
+ """Register a Python function as a tool the LLM can call."""
109
+ self._tools.register(fn, name=name, description=description)
110
+
111
+ def usage_summary(self) -> str:
112
+ """Human-readable summary of accumulated usage."""
113
+ u = self._usage
114
+ return f"Conversation: {u['total_tokens']:,} tokens across {u['turns']} turn(s) costing ${u['cost']:.4f}"
115
+
116
+ # ------------------------------------------------------------------
117
+ # Core methods
118
+ # ------------------------------------------------------------------
119
+
120
+ def _build_messages(self, user_content: str) -> list[dict[str, Any]]:
121
+ """Build the full messages array for an API call."""
122
+ msgs: list[dict[str, Any]] = []
123
+ if self._system_prompt:
124
+ msgs.append({"role": "system", "content": self._system_prompt})
125
+ msgs.extend(self._messages)
126
+ msgs.append({"role": "user", "content": user_content})
127
+ return msgs
128
+
129
+ def _accumulate_usage(self, meta: dict[str, Any]) -> None:
130
+ self._usage["prompt_tokens"] += meta.get("prompt_tokens", 0)
131
+ self._usage["completion_tokens"] += meta.get("completion_tokens", 0)
132
+ self._usage["total_tokens"] += meta.get("total_tokens", 0)
133
+ self._usage["cost"] += meta.get("cost", 0.0)
134
+ self._usage["turns"] += 1
135
+
136
+ def ask(
137
+ self,
138
+ content: str,
139
+ options: dict[str, Any] | None = None,
140
+ ) -> str:
141
+ """Send a message and get a raw text response.
142
+
143
+ Appends the user message and assistant response to history.
144
+ If tools are registered and the driver supports tool use,
145
+ dispatches to the tool execution loop.
146
+ """
147
+ if self._tools and getattr(self._driver, "supports_tool_use", False):
148
+ return self._ask_with_tools(content, options)
149
+
150
+ merged = {**self._options, **(options or {})}
151
+ messages = self._build_messages(content)
152
+ resp = self._driver.generate_messages_with_hooks(messages, merged)
153
+
154
+ text = resp.get("text", "")
155
+ meta = resp.get("meta", {})
156
+
157
+ # Record in history
158
+ self._messages.append({"role": "user", "content": content})
159
+ self._messages.append({"role": "assistant", "content": text})
160
+ self._accumulate_usage(meta)
161
+
162
+ return text
163
+
164
+ def _ask_with_tools(
165
+ self,
166
+ content: str,
167
+ options: dict[str, Any] | None = None,
168
+ ) -> str:
169
+ """Execute the tool-use loop: send -> check tool_calls -> execute -> re-send."""
170
+ merged = {**self._options, **(options or {})}
171
+ tool_defs = self._tools.to_openai_format()
172
+
173
+ # Build messages including user content
174
+ self._messages.append({"role": "user", "content": content})
175
+ msgs = self._build_messages_raw()
176
+
177
+ for _round in range(self._max_tool_rounds):
178
+ resp = self._driver.generate_messages_with_tools(msgs, tool_defs, merged)
179
+
180
+ meta = resp.get("meta", {})
181
+ self._accumulate_usage(meta)
182
+
183
+ tool_calls = resp.get("tool_calls", [])
184
+ text = resp.get("text", "")
185
+
186
+ if not tool_calls:
187
+ # No tool calls -> final response
188
+ self._messages.append({"role": "assistant", "content": text})
189
+ return text
190
+
191
+ # Record assistant message with tool_calls
192
+ assistant_msg: dict[str, Any] = {"role": "assistant", "content": text}
193
+ assistant_msg["tool_calls"] = [
194
+ {
195
+ "id": tc["id"],
196
+ "type": "function",
197
+ "function": {"name": tc["name"], "arguments": json.dumps(tc["arguments"])},
198
+ }
199
+ for tc in tool_calls
200
+ ]
201
+ self._messages.append(assistant_msg)
202
+ msgs.append(assistant_msg)
203
+
204
+ # Execute each tool call and append results
205
+ for tc in tool_calls:
206
+ try:
207
+ result = self._tools.execute(tc["name"], tc["arguments"])
208
+ result_str = json.dumps(result) if not isinstance(result, str) else result
209
+ except Exception as exc:
210
+ result_str = f"Error: {exc}"
211
+
212
+ tool_result_msg: dict[str, Any] = {
213
+ "role": "tool",
214
+ "tool_call_id": tc["id"],
215
+ "content": result_str,
216
+ }
217
+ self._messages.append(tool_result_msg)
218
+ msgs.append(tool_result_msg)
219
+
220
+ raise RuntimeError(f"Tool execution loop exceeded {self._max_tool_rounds} rounds")
221
+
222
+ def _build_messages_raw(self) -> list[dict[str, Any]]:
223
+ """Build messages array from system prompt + full history (including tool messages)."""
224
+ msgs: list[dict[str, Any]] = []
225
+ if self._system_prompt:
226
+ msgs.append({"role": "system", "content": self._system_prompt})
227
+ msgs.extend(self._messages)
228
+ return msgs
229
+
230
+ # ------------------------------------------------------------------
231
+ # Streaming
232
+ # ------------------------------------------------------------------
233
+
234
+ def ask_stream(
235
+ self,
236
+ content: str,
237
+ options: dict[str, Any] | None = None,
238
+ ) -> Iterator[str]:
239
+ """Send a message and yield text chunks as they arrive.
240
+
241
+ Falls back to non-streaming :meth:`ask` if the driver doesn't
242
+ support streaming. After iteration completes, the full response
243
+ is recorded in history.
244
+ """
245
+ if not getattr(self._driver, "supports_streaming", False):
246
+ yield self.ask(content, options)
247
+ return
248
+
249
+ merged = {**self._options, **(options or {})}
250
+ messages = self._build_messages(content)
251
+
252
+ self._messages.append({"role": "user", "content": content})
253
+
254
+ full_text = ""
255
+ for chunk in self._driver.generate_messages_stream(messages, merged):
256
+ if chunk["type"] == "delta":
257
+ full_text += chunk["text"]
258
+ # Fire stream delta callback
259
+ self._driver._fire_callback(
260
+ "on_stream_delta",
261
+ {"text": chunk["text"], "driver": getattr(self._driver, "model", self._driver.__class__.__name__)},
262
+ )
263
+ yield chunk["text"]
264
+ elif chunk["type"] == "done":
265
+ meta = chunk.get("meta", {})
266
+ self._accumulate_usage(meta)
267
+
268
+ self._messages.append({"role": "assistant", "content": full_text})
269
+
270
+ def ask_for_json(
271
+ self,
272
+ content: str,
273
+ json_schema: dict[str, Any],
274
+ *,
275
+ ai_cleanup: bool = True,
276
+ options: dict[str, Any] | None = None,
277
+ output_format: Literal["json", "toon"] = "json",
278
+ json_mode: Literal["auto", "on", "off"] = "auto",
279
+ ) -> dict[str, Any]:
280
+ """Send a message with schema enforcement and get structured JSON back.
281
+
282
+ The schema instructions are appended to the prompt but only the
283
+ original *content* is stored in conversation history to keep
284
+ context clean for subsequent turns.
285
+ """
286
+
287
+ merged = {**self._options, **(options or {})}
288
+
289
+ # Build the full prompt with schema instructions inline (handled by ask_for_json)
290
+ # We use a special approach: call ask_for_json with the driver but pass messages context
291
+ schema_string = json.dumps(json_schema, indent=2)
292
+
293
+ # Determine JSON mode
294
+ use_json_mode = False
295
+ if json_mode == "on":
296
+ use_json_mode = True
297
+ elif json_mode == "auto":
298
+ use_json_mode = getattr(self._driver, "supports_json_mode", False)
299
+
300
+ if use_json_mode:
301
+ merged = {**merged, "json_mode": True}
302
+ if getattr(self._driver, "supports_json_schema", False):
303
+ merged["json_schema"] = json_schema
304
+
305
+ # Build instruction based on JSON mode
306
+ if use_json_mode and getattr(self._driver, "supports_json_schema", False):
307
+ instruct = "Extract data matching the requested schema.\nIf a value is unknown use null."
308
+ elif use_json_mode:
309
+ instruct = (
310
+ "Return a JSON object that validates against this schema:\n"
311
+ f"{schema_string}\n\n"
312
+ "If a value is unknown use null."
313
+ )
314
+ else:
315
+ instruct = (
316
+ "Return only a single JSON object (no markdown, no extra text) that validates against this JSON schema:\n"
317
+ f"{schema_string}\n\n"
318
+ "If a value is unknown use null. Use double quotes for keys and strings."
319
+ )
320
+
321
+ full_user_content = f"{content}\n\n{instruct}"
322
+
323
+ messages = self._build_messages(full_user_content)
324
+ resp = self._driver.generate_messages_with_hooks(messages, merged)
325
+
326
+ text = resp.get("text", "")
327
+ meta = resp.get("meta", {})
328
+
329
+ # Store original content (without schema boilerplate) for cleaner context
330
+ self._messages.append({"role": "user", "content": content})
331
+
332
+ # Parse JSON
333
+ cleaned = clean_json_text(text)
334
+ try:
335
+ json_obj = json.loads(cleaned)
336
+ except json.JSONDecodeError:
337
+ if ai_cleanup:
338
+ from .core import clean_json_text_with_ai
339
+
340
+ cleaned = clean_json_text_with_ai(self._driver, cleaned, self._model_name, merged)
341
+ json_obj = json.loads(cleaned)
342
+ else:
343
+ raise
344
+
345
+ # Store assistant response in history
346
+ self._messages.append({"role": "assistant", "content": cleaned})
347
+ self._accumulate_usage(meta)
348
+
349
+ model_name = self._model_name
350
+ if "/" in model_name:
351
+ model_name = model_name.split("/", 1)[1]
352
+
353
+ usage = {
354
+ **meta,
355
+ "raw_response": resp,
356
+ "model_name": model_name or getattr(self._driver, "model", ""),
357
+ }
358
+
359
+ result: dict[str, Any] = {
360
+ "json_string": cleaned,
361
+ "json_object": json_obj,
362
+ "usage": usage,
363
+ "output_format": output_format,
364
+ }
365
+
366
+ if output_format == "toon":
367
+ try:
368
+ import toon
369
+
370
+ result["toon_string"] = toon.encode(json_obj)
371
+ except ImportError:
372
+ raise RuntimeError("TOON requested but 'python-toon' is not installed.") from None
373
+
374
+ return result
375
+
376
+ def extract_with_model(
377
+ self,
378
+ model_cls: type[BaseModel],
379
+ text: str,
380
+ *,
381
+ instruction_template: str = "Extract information from the following text:",
382
+ ai_cleanup: bool = True,
383
+ output_format: Literal["json", "toon"] = "json",
384
+ options: dict[str, Any] | None = None,
385
+ json_mode: Literal["auto", "on", "off"] = "auto",
386
+ ) -> dict[str, Any]:
387
+ """Extract structured information into a Pydantic model with conversation context."""
388
+ from .core import normalize_field_value
389
+
390
+ schema = model_cls.model_json_schema()
391
+ content_prompt = f"{instruction_template} {text}"
392
+
393
+ result = self.ask_for_json(
394
+ content=content_prompt,
395
+ json_schema=schema,
396
+ ai_cleanup=ai_cleanup,
397
+ options=options,
398
+ output_format=output_format,
399
+ json_mode=json_mode,
400
+ )
401
+
402
+ # Normalize field values
403
+ json_object = result["json_object"]
404
+ schema_properties = schema.get("properties", {})
405
+
406
+ for field_name, field_info in model_cls.model_fields.items():
407
+ if field_name in json_object and field_name in schema_properties:
408
+ field_def = {
409
+ "nullable": not schema_properties[field_name].get("type")
410
+ or "null"
411
+ in (
412
+ schema_properties[field_name].get("anyOf", [])
413
+ if isinstance(schema_properties[field_name].get("anyOf"), list)
414
+ else []
415
+ ),
416
+ "default": field_info.default
417
+ if hasattr(field_info, "default") and field_info.default is not ...
418
+ else None,
419
+ }
420
+ json_object[field_name] = normalize_field_value(
421
+ json_object[field_name], field_info.annotation, field_def
422
+ )
423
+
424
+ model_instance = model_cls(**json_object)
425
+
426
+ result_dict = {
427
+ "json_string": result["json_string"],
428
+ "json_object": result["json_object"],
429
+ "usage": result["usage"],
430
+ }
431
+ result_dict["model"] = model_instance
432
+
433
+ return type(
434
+ "ExtractResult",
435
+ (dict,),
436
+ {
437
+ "__getattr__": lambda self, key: self.get(key),
438
+ "__call__": lambda self: self["model"],
439
+ },
440
+ )(result_dict)
441
+
442
+ # ------------------------------------------------------------------
443
+ # Internal: stepwise with shared context
444
+ # ------------------------------------------------------------------
445
+
446
+ def _stepwise_extract(
447
+ self,
448
+ model_cls: type[BaseModel],
449
+ text: str,
450
+ instruction_template: str,
451
+ ai_cleanup: bool,
452
+ fields: list[str] | None,
453
+ field_definitions: dict[str, Any] | None,
454
+ json_mode: Literal["auto", "on", "off"],
455
+ ) -> dict[str, Union[str, dict[str, Any]]]:
456
+ """Stepwise extraction using conversation context between fields."""
457
+ if field_definitions is None:
458
+ field_definitions = get_registry_snapshot()
459
+
460
+ data: dict[str, Any] = {}
461
+ validation_errors: list[str] = []
462
+ field_results: dict[str, Any] = {}
463
+
464
+ accumulated_usage = {
465
+ "prompt_tokens": 0,
466
+ "completion_tokens": 0,
467
+ "total_tokens": 0,
468
+ "cost": 0.0,
469
+ "model_name": self._model_name,
470
+ "field_usages": {},
471
+ }
472
+
473
+ valid_fields = set(model_cls.model_fields.keys())
474
+ if fields is not None:
475
+ invalid_fields = set(fields) - valid_fields
476
+ if invalid_fields:
477
+ raise KeyError(f"Fields not found in model: {', '.join(invalid_fields)}")
478
+ field_items = [(name, model_cls.model_fields[name]) for name in fields]
479
+ else:
480
+ field_items = list(model_cls.model_fields.items())
481
+
482
+ # Seed conversation with the source text
483
+ self.add_context("user", f"I need to extract information from this text:\n\n{text}")
484
+ self.add_context(
485
+ "assistant", "I'll help you extract the information from that text. What would you like to extract?"
486
+ )
487
+
488
+ for field_name, field_info in field_items:
489
+ logger.debug("[stepwise-conv] Extracting field: %s", field_name)
490
+
491
+ field_schema = {
492
+ "value": {
493
+ "type": "integer" if field_info.annotation is int else "string",
494
+ "description": field_info.description or f"Value for {field_name}",
495
+ }
496
+ }
497
+
498
+ try:
499
+ prompt = instruction_template.format(field_name=field_name)
500
+ result = self.ask_for_json(
501
+ content=f"{prompt} {text}",
502
+ json_schema=field_schema,
503
+ ai_cleanup=ai_cleanup,
504
+ json_mode=json_mode,
505
+ )
506
+
507
+ field_usage = result.get("usage", {})
508
+ accumulated_usage["prompt_tokens"] += field_usage.get("prompt_tokens", 0)
509
+ accumulated_usage["completion_tokens"] += field_usage.get("completion_tokens", 0)
510
+ accumulated_usage["total_tokens"] += field_usage.get("total_tokens", 0)
511
+ accumulated_usage["cost"] += field_usage.get("cost", 0.0)
512
+ accumulated_usage["field_usages"][field_name] = field_usage
513
+
514
+ extracted_value = result["json_object"]["value"]
515
+ if isinstance(extracted_value, dict) and "value" in extracted_value:
516
+ raw_value = extracted_value["value"]
517
+ else:
518
+ raw_value = extracted_value
519
+
520
+ # Normalize
521
+ from .core import normalize_field_value
522
+
523
+ field_def = {}
524
+ if field_definitions and field_name in field_definitions:
525
+ field_def = field_definitions[field_name] if isinstance(field_definitions[field_name], dict) else {}
526
+
527
+ nullable = field_def.get("nullable", True)
528
+ default_value = field_def.get("default")
529
+ if (
530
+ default_value is None
531
+ and hasattr(field_info, "default")
532
+ and field_info.default is not ...
533
+ and str(field_info.default) != "PydanticUndefined"
534
+ ):
535
+ default_value = field_info.default
536
+
537
+ normalize_def = {"nullable": nullable, "default": default_value}
538
+ raw_value = normalize_field_value(raw_value, field_info.annotation, normalize_def)
539
+
540
+ try:
541
+ converted_value = convert_value(raw_value, field_info.annotation, allow_shorthand=True)
542
+ data[field_name] = converted_value
543
+ field_results[field_name] = {"status": "success", "used_default": False}
544
+ except ValueError as e:
545
+ error_msg = f"Type conversion failed for {field_name}: {e!s}"
546
+ has_default = _has_default(field_name, field_info, field_definitions)
547
+ if not has_default:
548
+ validation_errors.append(error_msg)
549
+ default_value = get_field_default(field_name, field_info, field_definitions)
550
+ data[field_name] = default_value
551
+ field_results[field_name] = {
552
+ "status": "conversion_failed",
553
+ "error": error_msg,
554
+ "used_default": True,
555
+ }
556
+
557
+ except Exception as e:
558
+ error_msg = f"Extraction failed for {field_name}: {e!s}"
559
+ has_default = _has_default(field_name, field_info, field_definitions)
560
+ if not has_default:
561
+ validation_errors.append(error_msg)
562
+ default_value = get_field_default(field_name, field_info, field_definitions)
563
+ data[field_name] = default_value
564
+ field_results[field_name] = {"status": "extraction_failed", "error": error_msg, "used_default": True}
565
+ accumulated_usage["field_usages"][field_name] = {
566
+ "error": str(e),
567
+ "status": "failed",
568
+ "used_default": True,
569
+ "default_value": default_value,
570
+ }
571
+
572
+ if validation_errors:
573
+ accumulated_usage["validation_errors"] = validation_errors
574
+
575
+ try:
576
+ model_instance = model_cls(**data)
577
+ model_dict = model_instance.model_dump()
578
+
579
+ class ExtendedJSONEncoder(json.JSONEncoder):
580
+ def default(self, obj):
581
+ if isinstance(obj, (datetime, date)):
582
+ return obj.isoformat()
583
+ if isinstance(obj, Decimal):
584
+ return str(obj)
585
+ return super().default(obj)
586
+
587
+ json_string = json.dumps(model_dict, cls=ExtendedJSONEncoder)
588
+
589
+ result = {
590
+ "json_string": json_string,
591
+ "json_object": json.loads(json_string),
592
+ "usage": accumulated_usage,
593
+ "field_results": field_results,
594
+ }
595
+ result["model"] = model_instance
596
+ return type(
597
+ "ExtractResult",
598
+ (dict,),
599
+ {"__getattr__": lambda self, key: self.get(key), "__call__": lambda self: self["model"]},
600
+ )(result)
601
+ except Exception as e:
602
+ error_msg = f"Model validation error: {e!s}"
603
+ if "validation_errors" not in accumulated_usage:
604
+ accumulated_usage["validation_errors"] = []
605
+ accumulated_usage["validation_errors"].append(error_msg)
606
+
607
+ error_result = {
608
+ "json_string": "{}",
609
+ "json_object": {},
610
+ "usage": accumulated_usage,
611
+ "field_results": field_results,
612
+ "error": error_msg,
613
+ }
614
+ return type(
615
+ "ExtractResult",
616
+ (dict,),
617
+ {"__getattr__": lambda self, key: self.get(key), "__call__": lambda self: None},
618
+ )(error_result)
619
+
620
+
621
+ def _has_default(field_name: str, field_info: Any, field_definitions: dict[str, Any] | None) -> bool:
622
+ """Check whether a Pydantic field has a usable default value."""
623
+ if field_definitions and field_name in field_definitions:
624
+ fd = field_definitions[field_name]
625
+ if isinstance(fd, dict) and "default" in fd:
626
+ return True
627
+ if hasattr(field_info, "default"):
628
+ val = field_info.default
629
+ if val is not ... and str(val) != "PydanticUndefined":
630
+ return True
631
+ return False