retab 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. retab-0.0.35.dist-info/METADATA +417 -0
  2. retab-0.0.35.dist-info/RECORD +111 -0
  3. retab-0.0.35.dist-info/WHEEL +5 -0
  4. retab-0.0.35.dist-info/top_level.txt +1 -0
  5. uiform/__init__.py +4 -0
  6. uiform/_resource.py +28 -0
  7. uiform/_utils/__init__.py +0 -0
  8. uiform/_utils/ai_models.py +100 -0
  9. uiform/_utils/benchmarking copy.py +588 -0
  10. uiform/_utils/benchmarking.py +485 -0
  11. uiform/_utils/chat.py +332 -0
  12. uiform/_utils/display.py +443 -0
  13. uiform/_utils/json_schema.py +2161 -0
  14. uiform/_utils/mime.py +168 -0
  15. uiform/_utils/responses.py +163 -0
  16. uiform/_utils/stream_context_managers.py +52 -0
  17. uiform/_utils/usage/__init__.py +0 -0
  18. uiform/_utils/usage/usage.py +300 -0
  19. uiform/client.py +701 -0
  20. uiform/py.typed +0 -0
  21. uiform/resources/__init__.py +0 -0
  22. uiform/resources/consensus/__init__.py +3 -0
  23. uiform/resources/consensus/client.py +114 -0
  24. uiform/resources/consensus/completions.py +252 -0
  25. uiform/resources/consensus/completions_stream.py +278 -0
  26. uiform/resources/consensus/responses.py +325 -0
  27. uiform/resources/consensus/responses_stream.py +373 -0
  28. uiform/resources/deployments/__init__.py +9 -0
  29. uiform/resources/deployments/client.py +78 -0
  30. uiform/resources/deployments/endpoints.py +322 -0
  31. uiform/resources/deployments/links.py +452 -0
  32. uiform/resources/deployments/logs.py +211 -0
  33. uiform/resources/deployments/mailboxes.py +496 -0
  34. uiform/resources/deployments/outlook.py +531 -0
  35. uiform/resources/deployments/tests.py +158 -0
  36. uiform/resources/documents/__init__.py +3 -0
  37. uiform/resources/documents/client.py +255 -0
  38. uiform/resources/documents/extractions.py +441 -0
  39. uiform/resources/evals.py +812 -0
  40. uiform/resources/files.py +24 -0
  41. uiform/resources/finetuning.py +62 -0
  42. uiform/resources/jsonlUtils.py +1046 -0
  43. uiform/resources/models.py +45 -0
  44. uiform/resources/openai_example.py +22 -0
  45. uiform/resources/processors/__init__.py +3 -0
  46. uiform/resources/processors/automations/__init__.py +9 -0
  47. uiform/resources/processors/automations/client.py +78 -0
  48. uiform/resources/processors/automations/endpoints.py +317 -0
  49. uiform/resources/processors/automations/links.py +356 -0
  50. uiform/resources/processors/automations/logs.py +211 -0
  51. uiform/resources/processors/automations/mailboxes.py +435 -0
  52. uiform/resources/processors/automations/outlook.py +444 -0
  53. uiform/resources/processors/automations/tests.py +158 -0
  54. uiform/resources/processors/client.py +474 -0
  55. uiform/resources/prompt_optimization.py +76 -0
  56. uiform/resources/schemas.py +369 -0
  57. uiform/resources/secrets/__init__.py +9 -0
  58. uiform/resources/secrets/client.py +20 -0
  59. uiform/resources/secrets/external_api_keys.py +109 -0
  60. uiform/resources/secrets/webhook.py +62 -0
  61. uiform/resources/usage.py +271 -0
  62. uiform/types/__init__.py +0 -0
  63. uiform/types/ai_models.py +645 -0
  64. uiform/types/automations/__init__.py +0 -0
  65. uiform/types/automations/cron.py +58 -0
  66. uiform/types/automations/endpoints.py +21 -0
  67. uiform/types/automations/links.py +28 -0
  68. uiform/types/automations/mailboxes.py +60 -0
  69. uiform/types/automations/outlook.py +68 -0
  70. uiform/types/automations/webhooks.py +21 -0
  71. uiform/types/chat.py +8 -0
  72. uiform/types/completions.py +93 -0
  73. uiform/types/consensus.py +10 -0
  74. uiform/types/db/__init__.py +0 -0
  75. uiform/types/db/annotations.py +24 -0
  76. uiform/types/db/files.py +36 -0
  77. uiform/types/deployments/__init__.py +0 -0
  78. uiform/types/deployments/cron.py +59 -0
  79. uiform/types/deployments/endpoints.py +28 -0
  80. uiform/types/deployments/links.py +36 -0
  81. uiform/types/deployments/mailboxes.py +67 -0
  82. uiform/types/deployments/outlook.py +76 -0
  83. uiform/types/deployments/webhooks.py +21 -0
  84. uiform/types/documents/__init__.py +0 -0
  85. uiform/types/documents/correct_orientation.py +13 -0
  86. uiform/types/documents/create_messages.py +226 -0
  87. uiform/types/documents/extractions.py +297 -0
  88. uiform/types/evals.py +207 -0
  89. uiform/types/events.py +76 -0
  90. uiform/types/extractions.py +85 -0
  91. uiform/types/jobs/__init__.py +0 -0
  92. uiform/types/jobs/base.py +150 -0
  93. uiform/types/jobs/batch_annotation.py +22 -0
  94. uiform/types/jobs/evaluation.py +133 -0
  95. uiform/types/jobs/finetune.py +6 -0
  96. uiform/types/jobs/prompt_optimization.py +41 -0
  97. uiform/types/jobs/webcrawl.py +6 -0
  98. uiform/types/logs.py +231 -0
  99. uiform/types/mime.py +257 -0
  100. uiform/types/modalities.py +68 -0
  101. uiform/types/pagination.py +6 -0
  102. uiform/types/schemas/__init__.py +0 -0
  103. uiform/types/schemas/enhance.py +53 -0
  104. uiform/types/schemas/evaluate.py +55 -0
  105. uiform/types/schemas/generate.py +32 -0
  106. uiform/types/schemas/layout.py +58 -0
  107. uiform/types/schemas/object.py +631 -0
  108. uiform/types/schemas/templates.py +107 -0
  109. uiform/types/secrets/__init__.py +0 -0
  110. uiform/types/secrets/external_api_keys.py +22 -0
  111. uiform/types/standards.py +39 -0
@@ -0,0 +1,631 @@
1
+ import copy
2
+ import datetime
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Iterable, Literal, Self, cast
6
+
7
+ from anthropic.types.message_param import MessageParam
8
+
9
+ from google.genai.types import ContentUnionDict # type: ignore
10
+ from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
11
+ from openai.types.responses.response_input_param import ResponseInputItemParam
12
+ from pydantic import BaseModel, Field, PrivateAttr, computed_field, model_validator
13
+
14
+ from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format
15
+ from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
16
+ from ..._utils.json_schema import (
17
+ convert_basemodel_to_partial_basemodel,
18
+ convert_json_schema_to_basemodel,
19
+ create_reasoning_schema,
20
+ expand_refs,
21
+ generate_schema_data_id,
22
+ generate_schema_id,
23
+ json_schema_to_nlp_data_structure,
24
+ json_schema_to_strict_openai_schema,
25
+ json_schema_to_typescript_interface,
26
+ load_json_schema,
27
+ schema_to_ts_type,
28
+ )
29
+ from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
30
+ from ...types.standards import StreamingBaseModel
31
+ from ..chat import ChatCompletionUiformMessage
32
+
33
+
34
+ class PartialSchema(BaseModel):
35
+ """Response from the Generate Schema API -- A partial Schema object with no validation"""
36
+
37
+ object: Literal["schema"] = "schema"
38
+ created_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc))
39
+ json_schema: dict[str, Any] = {}
40
+ strict: bool = True
41
+
42
+
43
+ class PartialSchemaChunk(StreamingBaseModel):
44
+ object: Literal["schema.chunk"] = "schema.chunk"
45
+ created_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc))
46
+ delta_json_schema_flat: dict[str, Any] = {}
47
+
48
+
49
+ # class PartialSchemaStreaming(StreamingBaseModel, PartialSchema): pass
50
+
51
+
52
+ class Schema(PartialSchema):
53
+ """A full Schema object with validation."""
54
+
55
+ object: Literal["schema"] = "schema"
56
+ """The type of object being preprocessed."""
57
+
58
+ created_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc))
59
+ """The timestamp of when the schema was created."""
60
+
61
+ json_schema: dict[str, Any] = {}
62
+ """The JSON schema to use for loading."""
63
+
64
+ # This is a computed field, it is exposed when serializing the object
65
+ @computed_field # type: ignore
66
+ @property
67
+ def data_id(self) -> str:
68
+ """Returns the SHA1 hash of the schema data, ignoring all prompt/description/default fields.
69
+
70
+ Returns:
71
+ str: A SHA1 hash string representing the schema data version.
72
+ """
73
+ return generate_schema_data_id(self.json_schema)
74
+
75
+ # This is a computed field, it is exposed when serializing the object
76
+ @computed_field # type: ignore
77
+ @property
78
+ def id(self) -> str:
79
+ """Returns the SHA1 hash of the complete schema.
80
+
81
+ Returns:
82
+ str: A SHA1 hash string representing the complete schema version.
83
+ """
84
+ return generate_schema_id(self.json_schema)
85
+
86
+ pydantic_model: type[BaseModel] = Field(default=None, exclude=True, repr=False) # type: ignore
87
+
88
+ _partial_pydantic_model: type[BaseModel] = PrivateAttr()
89
+ """The Pydantic model to use for loading."""
90
+
91
+ @property
92
+ def inference_pydantic_model(self) -> type[BaseModel]:
93
+ """Converts the structured output schema to a Pydantic model, with the LLMDescription and ReasoningDescription fields added.
94
+
95
+ Returns:
96
+ type[BaseModel]: A Pydantic model class generated from the schema.
97
+ """
98
+ return convert_json_schema_to_basemodel(self.inference_json_schema)
99
+
100
+ @property
101
+ def inference_json_schema(self) -> dict[str, Any]:
102
+ """Returns the schema formatted for structured output, with the LLMDescription and ReasoningDescription fields added.
103
+
104
+ Returns:
105
+ dict[str, Any]: The schema formatted for structured output processing.
106
+ """
107
+ if self.strict:
108
+ inference_json_schema_ = json_schema_to_strict_openai_schema(copy.deepcopy(self._reasoning_object_schema))
109
+ assert isinstance(inference_json_schema_, dict), "Validation Error: The inference_json_schema is not a dict"
110
+ return inference_json_schema_
111
+ else:
112
+ return copy.deepcopy(self._reasoning_object_schema)
113
+
114
+ @property
115
+ def openai_messages(self) -> list[ChatCompletionMessageParam]:
116
+ """Returns the messages formatted for OpenAI's API.
117
+
118
+ Returns:
119
+ list[ChatCompletionMessageParam]: List of messages in OpenAI's format.
120
+ """
121
+ return convert_to_openai_completions_api_format(self.messages)
122
+
123
+ @property
124
+ def openai_responses_input(self) -> list[ResponseInputItemParam]:
125
+ """Returns the messages formatted for OpenAI's Responses API.
126
+
127
+ Returns:
128
+ list[ResponseInputItemParam]: List of messages in OpenAI's Responses API format.
129
+ """
130
+ return convert_to_openai_responses_api_format(self.messages)
131
+
132
+ @property
133
+ def anthropic_system_prompt(self) -> str:
134
+ """Returns the system message in Anthropic's Claude format.
135
+
136
+ Returns:
137
+ str : The system prompt formatted for Claude.
138
+ """
139
+ return "Return your response as a JSON object following the provided schema." + self.system_prompt
140
+
141
+ @property
142
+ def anthropic_messages(self) -> list[MessageParam]:
143
+ """Returns the messages in Anthropic's Claude format.
144
+
145
+ Returns:
146
+ list[MessageParam]: List of messages formatted for Claude.
147
+ """
148
+ return convert_to_anthropic_format(self.messages)[1]
149
+
150
+ @property
151
+ def gemini_system_prompt(self) -> str:
152
+ return convert_to_google_genai_format(self.messages)[0]
153
+
154
+ @property
155
+ def gemini_messages(self) -> list[ContentUnionDict]:
156
+ """Returns the messages formatted for Google's Gemini API."""
157
+ return convert_to_google_genai_format(self.messages)[1]
158
+
159
+ @property
160
+ def inference_gemini_json_schema(self) -> dict[str, Any]:
161
+ # Like OpenAI but does not accept "anyOf" typing, all fields must not be nullable
162
+ inference_json_schema_ = copy.deepcopy(self._reasoning_object_schema)
163
+
164
+ def json_schema_to_gemini_schema(schema: dict[str, Any]) -> None:
165
+ if "$defs" in schema:
166
+ for def_schema in schema["$defs"].values():
167
+ json_schema_to_gemini_schema(def_schema)
168
+ if "anyOf" in schema:
169
+ any_of = schema.pop("anyOf")
170
+ is_nullable = any(s.get("type") == "null" for s in any_of)
171
+ # Get the non-null subschemas
172
+ non_null_schemas = [s for s in any_of if s.get("type") != "null"]
173
+
174
+ if non_null_schemas:
175
+ subschema = non_null_schemas[0]
176
+ json_schema_to_gemini_schema(subschema)
177
+ # Take the first non-null subschema and merge it into the parent schema
178
+ schema.update(subschema)
179
+ else:
180
+ raise ValueError("No non-null subschemas found within anyOf")
181
+
182
+ if is_nullable and schema.get("type") not in ["object", "array"]:
183
+ schema["nullable"] = True
184
+
185
+ if "allOf" in schema:
186
+ for allof_schema in schema["allOf"]:
187
+ json_schema_to_gemini_schema(allof_schema)
188
+
189
+ if schema.get("type") == "object" and "properties" in schema:
190
+ for prop_schema in schema["properties"].values():
191
+ json_schema_to_gemini_schema(prop_schema)
192
+ schema["propertyOrdering"] = schema["required"] = list(schema["properties"].keys())
193
+
194
+ if schema.get("type") == "array" and "items" in schema:
195
+ json_schema_to_gemini_schema(schema["items"])
196
+ # Remove not allowed fields
197
+ for key in ["additionalProperties", "format"]:
198
+ schema.pop(key, None)
199
+
200
+ json_schema_to_gemini_schema(inference_json_schema_)
201
+ return inference_json_schema_
202
+
203
+ @property
204
+ def inference_typescript_interface(self) -> str:
205
+ """Returns the TypeScript interface representation of the inference schema, that is more readable than the JSON schema.
206
+
207
+ Returns:
208
+ str: A string containing the TypeScript interface definition.
209
+ """
210
+ return json_schema_to_typescript_interface(self._reasoning_object_schema, add_field_description=False)
211
+
212
+ @property
213
+ def inference_nlp_data_structure(self) -> str:
214
+ """Returns the NLP data structure representation of the inference schema, that is more readable than the JSON schema.
215
+
216
+ Returns:
217
+ str: A string containing the NLP data structure definition.
218
+ """
219
+ return json_schema_to_nlp_data_structure(self._reasoning_object_schema)
220
+
221
+ @property
222
+ def developer_system_prompt(self) -> str:
223
+ return '''
224
+ # General Instructions
225
+
226
+ You are an expert in data extraction and structured data outputs.
227
+
228
+ When provided with a **JSON schema** and a **document**, you must:
229
+
230
+ 1. Carefully extract all relevant data from the provided document according to the given schema.
231
+ 2. Return extracted data strictly formatted according to the provided schema.
232
+ 3. Make sure that the extracted values are **UTF-8** encodable strings.
233
+ 4. Avoid generating bytes, binary data, base64 encoded data, or other non-UTF-8 encodable data.
234
+
235
+ ---
236
+
237
+ ## Handling Missing and Nullable Fields
238
+
239
+ ### Nullable Leaf Attributes
240
+
241
+ - If valid data is missing or not explicitly present, set leaf attributes explicitly to `null`.
242
+ - **Do NOT** use empty strings (`""`), placeholder values, or fabricated data.
243
+
244
+ **Example:**
245
+
246
+ ```json
247
+ // Correct:
248
+ {"email": null}
249
+
250
+ // Incorrect:
251
+ {"email": ""}
252
+ ```
253
+
254
+ ### Nullable Nested Objects
255
+
256
+ - If an entire nested object’s data is missing or incomplete, **do NOT** set the object itself to `null`.
257
+ - Keep the object structure fully intact, explicitly setting each leaf attribute within to `null`.
258
+ - This preserves overall structure and explicitly communicates exactly which fields lack data.
259
+
260
+ **Example:**
261
+
262
+ ```json
263
+ // Correct (all information is missing):
264
+ {
265
+ "address": {
266
+ "street": null,
267
+ "zipCode": null,
268
+ "city": null
269
+ }
270
+ }
271
+
272
+ // Incorrect (all information is missing):
273
+ {
274
+ "address": null
275
+ }
276
+
277
+ // Correct (only some information is missing):
278
+ {
279
+ "address": {
280
+ "street": null,
281
+ "zipCode": null,
282
+ "city": "Paris"
283
+ }
284
+ }
285
+
286
+ // Incorrect (only some information is missing):
287
+ {
288
+ "address": {
289
+ "city": "Paris"
290
+ }
291
+ }
292
+ ```
293
+
294
+ ---
295
+
296
+ ## Reasoning Fields
297
+
298
+ Your schema includes special reasoning fields (`reasoning___*`) used exclusively to document your extraction logic. These fields are for detailed explanations and will not appear in final outputs.
299
+
300
+ | Reasoning Field Type | Field Naming Pattern |
301
+ |----------------------|----------------------------|
302
+ | Root Object | `reasoning___root` |
303
+ | Nested Objects | `reasoning___[objectname]` |
304
+ | Array Fields | `reasoning___[arrayname]` |
305
+ | Array Elements | `reasoning___item` |
306
+ | Leaf Attributes | `reasoning___[attributename]` |
307
+
308
+ You MUST include these details explicitly in your reasoning fields:
309
+
310
+ - **Explicit Evidence**: Quote specific lines or phrases from the document confirming your extraction.
311
+ - **Decision Justification**: Clearly justify why specific data was chosen or rejected.
312
+ - **Calculations/Transformations**: Document explicitly any computations, unit conversions, or normalizations.
313
+ - **Alternative Interpretations**: Explicitly describe any alternative data interpretations considered and why you rejected them.
314
+ - **Confidence and Assumptions**: Clearly state your confidence level and explicitly articulate any assumptions.
315
+
316
+ **Example Reasoning:**
317
+
318
+ > Found company name 'ACME Corp' explicitly stated in the top-right corner of page 1, matching standard letterhead format. Confirmed by matching signature block ('ACME Corp') at bottom of page 3. Confidence high. Alternative interpretation (e.g., sender's name) explicitly rejected due to explicit labeling 'Client: ACME Corp' on page 1.
319
+
320
+ ---
321
+
322
+ ## Detailed Reasoning Examples
323
+
324
+ ### Array Reasoning (`reasoning___[arrayname]`)
325
+
326
+ - Explicitly describe how the entire array was identified.
327
+ - List explicitly all extracted items with clear details and source references.
328
+
329
+ **Example:**
330
+
331
+ ```markdown
332
+ Identified itemized invoice section clearly demarcated by header "Invoice Items" (page 2, lines 12–17). Extracted items explicitly listed:
333
+
334
+ 1. Office Supplies, quantity 5, unit price $4.99, total $24.95 (line 12)
335
+ 2. Printer Paper, quantity 1, unit price $5.99, total $5.99 (line 13)
336
+ 3. Stapler, quantity 1, unit price $4.07, total $4.07 (line 14)
337
+
338
+ No ambiguity detected.
339
+ ```
340
+
341
+ ### Array Item Reasoning (`reasoning___item`)
342
+
343
+ Explicitly document evidence for each individual item:
344
+
345
+ ```markdown
346
+ Extracted explicitly from line 12: 'Office Supplies x5 $4.99ea $24.95'. Quantity (5 units) multiplied explicitly by unit price ($4.99) matches listed total ($24.95). Format consistent across invoice, high confidence.
347
+ ```
348
+
349
+ ---
350
+
351
+ ## Principles for Accurate Extraction
352
+
353
+ When performing extraction, explicitly follow these core principles:
354
+
355
+ - **Transparency**: Explicitly document and justify every extraction decision.
356
+ - **Precision**: Always verify explicitly using direct quotes from the source document.
357
+ - **Conservatism**: Set explicitly fields as `null` when data is explicitly missing or ambiguous—never fabricate or guess.
358
+ - **Structure Preservation**: Always maintain explicitly the full schema structure, even when entire nested objects lack data (leaf attributes as null).
359
+
360
+
361
+ ## Source Fields
362
+
363
+ Some leaf fields require you to explicitly provide the source of the data (verbatim from the document).
364
+ The idea is to simply provide a verbatim quote from the document, without any additional formatting or commentary, keeping it as close as possible to the original text.
365
+ Make sure to reasonably include some surrounding text to provide context about the quote.
366
+
367
+ You can easily identify the fields that require a source by the `quote___[attributename]` naming pattern.
368
+
369
+ **Example:**
370
+
371
+ ```json
372
+ {
373
+ "quote___name": "NAME:\nJohn Doe",
374
+ "name": "John Doe"
375
+ }
376
+ ```
377
+
378
+ ---
379
+
380
+ # User Defined System Prompt
381
+
382
+ '''
383
+
384
+ @property
385
+ def user_system_prompt(self) -> str:
386
+ return self.json_schema.get("X-SystemPrompt", "")
387
+
388
+ @property
389
+ def schema_system_prompt(self) -> str:
390
+ return (
391
+ self.inference_nlp_data_structure + "\n---\n" + "## Expected output schema as a TypeScript interface for better readability:\n\n" + self.inference_typescript_interface
392
+ )
393
+
394
+ @property
395
+ def system_prompt(self) -> str:
396
+ """Returns the system prompt combining custom prompt and TypeScript interface.
397
+
398
+ Returns:
399
+ str: The combined system prompt string.
400
+ """
401
+ return self.developer_system_prompt + "\n\n" + self.user_system_prompt + "\n\n" + self.schema_system_prompt
402
+
403
+ @property
404
+ def title(self) -> str:
405
+ """Returns the title of the schema.
406
+
407
+ Returns:
408
+ str: The schema title or 'NoTitle' if not specified.
409
+ """
410
+ return self.json_schema.get("title", "NoTitle")
411
+
412
+ @property
413
+ def _expanded_object_schema(self) -> dict[str, Any]:
414
+ """Returns the schema with all references expanded inline.
415
+
416
+ Returns:
417
+ dict[str, Any]: The expanded schema with resolved references. If the schema is not expandable, it is returned as is.
418
+ """
419
+ return expand_refs(copy.deepcopy(self.json_schema))
420
+
421
+ @property
422
+ def _reasoning_object_schema(self) -> dict[str, Any]:
423
+ """Returns the schema with inference-specific modifications.
424
+
425
+ Returns:
426
+ dict[str, Any]: The modified schema with reasoning fields added to the structure.
427
+ """
428
+ inference_schema = create_reasoning_schema(copy.deepcopy(self._expanded_object_schema)) # Automatically populates the reasoning fields into the structure.
429
+ assert isinstance(inference_schema, dict), "Validation Error: The inference_json_schema is not a dict"
430
+ return inference_schema
431
+
432
+ @property
433
+ def _validation_object_schema(self) -> dict[str, Any]:
434
+ """Returns a loose validation schema where all fields are optional.
435
+
436
+ This schema ignores all 'required' properties, allowing partial data validation.
437
+
438
+ Returns:
439
+ dict[str, Any]: The modified schema for validation purposes.
440
+ """
441
+ # This ignores all 'required' properties (hence making all fields optional)
442
+ # This is a 'loose' validation schema that allows for partial data to be validated.
443
+ _validation_object_schema_ = copy.deepcopy(self._reasoning_object_schema)
444
+
445
+ def rec_remove_required(schema: dict[str, Any]) -> None:
446
+ if "required" in schema:
447
+ schema.pop("required")
448
+ if "properties" in schema:
449
+ for prop_schema in schema["properties"].values():
450
+ rec_remove_required(prop_schema)
451
+ if "items" in schema:
452
+ rec_remove_required(schema["items"])
453
+ if "$defs" in schema:
454
+ for def_schema in schema["$defs"].values():
455
+ rec_remove_required(def_schema)
456
+ if "anyOf" in schema:
457
+ for anyof_schema in schema["anyOf"]:
458
+ rec_remove_required(anyof_schema)
459
+ if "allOf" in schema:
460
+ for allof_schema in schema["allOf"]:
461
+ rec_remove_required(allof_schema)
462
+
463
+ rec_remove_required(_validation_object_schema_)
464
+ return _validation_object_schema_
465
+
466
+ def _get_pattern_attribute(self, pattern: str, attribute: Literal['X-FieldPrompt', 'X-ReasoningPrompt', 'type']) -> str | None:
467
+ """
468
+ Given a JSON Schema and a pattern (like "my_object.my_array.*.my_property"),
469
+ navigate the schema and return the specified attribute of the identified node.
470
+ """
471
+
472
+ # Special case: "*" means the root schema itself
473
+ current_schema = self._expanded_object_schema
474
+ if pattern.strip() == "*":
475
+ if attribute == "X-FieldPrompt":
476
+ return current_schema.get(attribute) or current_schema.get("description")
477
+ return current_schema.get(attribute)
478
+
479
+ parts = pattern.split(".")
480
+ index = 0 # Start at the first part
481
+
482
+ while index < len(parts):
483
+ part = parts[index]
484
+
485
+ if part == "*" or part.isdigit():
486
+ # Handle wildcard case for arrays
487
+ if "items" in current_schema:
488
+ current_schema = current_schema["items"]
489
+ index += 1 # Move to the next part
490
+ else:
491
+ # Invalid use of "*" for the current schema
492
+ return None
493
+ elif "properties" in current_schema and part in current_schema["properties"]:
494
+ # Handle normal property navigation
495
+ current_schema = current_schema["properties"][part]
496
+ index += 1 # Move to the next part
497
+ else:
498
+ # If we encounter a structure without "properties" or invalid part
499
+ return None
500
+
501
+ # At this point, we've navigated to the target node
502
+ if attribute == "X-FieldPrompt":
503
+ return current_schema.get(attribute) or current_schema.get("description")
504
+ elif attribute == "type":
505
+ # Convert schema type to TypeScript type
506
+ return schema_to_ts_type(current_schema, {}, {}, 0, 0, add_field_description=False)
507
+ return current_schema.get(attribute)
508
+
509
+ def _set_pattern_attribute(self, pattern: str, attribute: Literal['X-FieldPrompt', 'X-ReasoningPrompt', 'X-SystemPrompt', 'description'], value: str) -> None:
510
+ """Sets an attribute value at a specific path in the schema.
511
+
512
+ Args:
513
+ pattern (str): The path pattern to navigate the schema (e.g., "my_object.my_array.*.my_property")
514
+ attribute (Literal): The attribute to set ('description', 'X-FieldPrompt', etc.)
515
+ value (str): The value to set for the attribute
516
+ """
517
+ current_schema = self.json_schema
518
+ definitions = self.json_schema.get("$defs", {})
519
+ parts = pattern.split(".")
520
+ path_stack: list[tuple[str, Any]] = [] # Keep track of how we navigated the schema
521
+
522
+ if pattern.strip() == "*":
523
+ # Special case: "*" means the root schema itself
524
+ current_schema[attribute] = value
525
+ return
526
+ assert attribute != "X-SystemPrompt", "Cannot set the X-SystemPrompt attribute other than at the root schema."
527
+
528
+ index = 0 # Index for the parts list
529
+ while index < len(parts):
530
+ part = parts[index]
531
+ if part == "*" or part.isdigit():
532
+ # Handle the array case
533
+ if "items" in current_schema:
534
+ current_schema = current_schema["items"]
535
+ path_stack.append(("items", None))
536
+ index += 1 # Move to the next part
537
+ else:
538
+ return # Invalid pattern for the current schema
539
+
540
+ elif "properties" in current_schema and part in current_schema["properties"]:
541
+ # Handle the properties case
542
+ current_schema = current_schema["properties"][part]
543
+ path_stack.append(("properties", part))
544
+ index += 1 # Move to the next part
545
+ elif "$ref" in current_schema:
546
+ # Handle the $ref case
547
+ ref = current_schema["$ref"]
548
+ assert isinstance(ref, str), "Validation Error: The $ref is not a string"
549
+ assert ref.startswith("#/$defs/"), "Validation Error: The $ref is not a definition reference"
550
+ ref_name = ref.split("/")[-1]
551
+ assert ref_name in definitions, "Validation Error: The $ref is not a definition reference"
552
+
553
+ # Count how many times this ref is used in the entire schema
554
+ ref_count = json.dumps(self.json_schema).count(f"\"{ref}\"")
555
+
556
+ if ref_count > 1:
557
+ # Create a unique copy name by appending a number
558
+ copy_num = 1
559
+ next_copy_name = f"{ref_name}Copy{copy_num}"
560
+ while next_copy_name in definitions:
561
+ copy_num += 1
562
+ next_copy_name = f"{ref_name}Copy{copy_num}"
563
+
564
+ # Create a copy of the definition
565
+ def_copy = copy.deepcopy(definitions[ref_name])
566
+
567
+ # Change the title and name of the definition
568
+ if "title" in def_copy:
569
+ def_copy["title"] = f"{def_copy['title']} Copy {copy_num}"
570
+ if "name" in def_copy:
571
+ def_copy["name"] = next_copy_name
572
+
573
+ # Add the new copy to definitions
574
+ definitions[next_copy_name] = def_copy
575
+
576
+ # Update the reference
577
+ current_schema["$ref"] = f"#/$defs/{next_copy_name}"
578
+ ref_name = next_copy_name
579
+ # Reference is used only once or a copy is created; directly navigate to the definition
580
+ current_schema = definitions[ref_name]
581
+ else:
582
+ # Cannot navigate further; invalid pattern
583
+ return
584
+
585
+ # Once we have navigated to the correct node, set the attribute
586
+ current_schema[attribute] = value
587
+
588
+ @model_validator(mode="before")
589
+ def validate_schema_and_model(cls, data: Any) -> Any:
590
+ """Validate schema and model logic."""
591
+ # Extract from data
592
+ json_schema: dict[str, Any] | None = data.get('json_schema', None)
593
+ pydantic_model: type[BaseModel] | None = data.get('pydantic_model', None)
594
+
595
+ # Check if either json_schema or pydantic_model is provided
596
+ if json_schema and pydantic_model:
597
+ raise ValueError("Cannot provide both json_schema and pydantic_model")
598
+
599
+ if not json_schema and not pydantic_model:
600
+ raise ValueError("Must provide either json_schema or pydantic_model")
601
+
602
+ if json_schema:
603
+ json_schema = load_json_schema(json_schema)
604
+ data['pydantic_model'] = convert_json_schema_to_basemodel(json_schema)
605
+ data['json_schema'] = json_schema
606
+ if pydantic_model:
607
+ data['pydantic_model'] = pydantic_model
608
+ data['json_schema'] = pydantic_model.model_json_schema()
609
+
610
+ return data
611
+
612
+ @property
613
+ def messages(self) -> list[ChatCompletionUiformMessage]:
614
+ return [ChatCompletionUiformMessage(role="developer", content=self.system_prompt)]
615
+
616
+ @model_validator(mode="after")
617
+ def model_after_validator(self) -> Self:
618
+ # Set the partial_pydantic_model
619
+ self._partial_pydantic_model = convert_basemodel_to_partial_basemodel(self.pydantic_model)
620
+
621
+ return self
622
+
623
+ def save(self, path: Path | str) -> None:
624
+ """Save a JSON schema to a file.
625
+
626
+ Args:
627
+ json_schema: The JSON schema to save, can be a dict, Path, or string
628
+ schema_path: Output path for the schema file
629
+ """
630
+ with open(path, 'w', encoding='utf-8') as f:
631
+ json.dump(self.json_schema, f, ensure_ascii=False, indent=2)