openai-sdk-helpers 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. openai_sdk_helpers/__init__.py +41 -7
  2. openai_sdk_helpers/agent/__init__.py +1 -2
  3. openai_sdk_helpers/agent/base.py +169 -190
  4. openai_sdk_helpers/agent/configuration.py +12 -20
  5. openai_sdk_helpers/agent/coordinator.py +14 -17
  6. openai_sdk_helpers/agent/runner.py +3 -45
  7. openai_sdk_helpers/agent/search/base.py +49 -71
  8. openai_sdk_helpers/agent/search/vector.py +82 -110
  9. openai_sdk_helpers/agent/search/web.py +103 -81
  10. openai_sdk_helpers/agent/summarizer.py +20 -28
  11. openai_sdk_helpers/agent/translator.py +17 -23
  12. openai_sdk_helpers/agent/validator.py +17 -23
  13. openai_sdk_helpers/errors.py +9 -0
  14. openai_sdk_helpers/extract/__init__.py +23 -0
  15. openai_sdk_helpers/extract/extractor.py +157 -0
  16. openai_sdk_helpers/extract/generator.py +476 -0
  17. openai_sdk_helpers/files_api.py +1 -0
  18. openai_sdk_helpers/logging.py +12 -1
  19. openai_sdk_helpers/prompt/extractor_config_agent_instructions.jinja +6 -0
  20. openai_sdk_helpers/prompt/extractor_config_generator.jinja +37 -0
  21. openai_sdk_helpers/prompt/extractor_config_generator_instructions.jinja +9 -0
  22. openai_sdk_helpers/prompt/extractor_prompt_optimizer_agent_instructions.jinja +4 -0
  23. openai_sdk_helpers/prompt/extractor_prompt_optimizer_request.jinja +11 -0
  24. openai_sdk_helpers/response/__init__.py +2 -6
  25. openai_sdk_helpers/response/base.py +233 -164
  26. openai_sdk_helpers/response/configuration.py +39 -14
  27. openai_sdk_helpers/response/files.py +41 -2
  28. openai_sdk_helpers/response/runner.py +1 -48
  29. openai_sdk_helpers/response/tool_call.py +0 -141
  30. openai_sdk_helpers/response/vector_store.py +8 -5
  31. openai_sdk_helpers/streamlit_app/app.py +1 -9
  32. openai_sdk_helpers/structure/__init__.py +16 -0
  33. openai_sdk_helpers/structure/base.py +239 -278
  34. openai_sdk_helpers/structure/extraction.py +1228 -0
  35. openai_sdk_helpers/structure/plan/plan.py +0 -20
  36. openai_sdk_helpers/structure/plan/task.py +0 -33
  37. openai_sdk_helpers/structure/prompt.py +16 -0
  38. openai_sdk_helpers/structure/responses.py +2 -2
  39. openai_sdk_helpers/structure/web_search.py +0 -10
  40. openai_sdk_helpers/tools.py +346 -99
  41. openai_sdk_helpers/utils/__init__.py +7 -0
  42. openai_sdk_helpers/utils/json/base_model.py +315 -32
  43. openai_sdk_helpers/utils/langextract.py +194 -0
  44. openai_sdk_helpers/vector_storage/cleanup.py +7 -2
  45. openai_sdk_helpers/vector_storage/storage.py +37 -7
  46. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/METADATA +21 -6
  47. openai_sdk_helpers-0.5.1.dist-info/RECORD +95 -0
  48. openai_sdk_helpers/streamlit_app/streamlit_web_search.py +0 -75
  49. openai_sdk_helpers-0.4.3.dist-info/RECORD +0 -86
  50. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/WHEEL +0 -0
  51. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/entry_points.txt +0 -0
  52. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,157 @@
1
+ """Document extraction helpers powered by LangExtract."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import typing
8
+
9
+ import langextract as lx
10
+ from langextract.core import format_handler as lx_format_handler
11
+ from langextract.core.data import AnnotatedDocument as LXAnnotatedDocument
12
+
13
+ from ..errors import ExtractionError
14
+ from ..structure.extraction import (
15
+ AnnotatedDocumentStructure,
16
+ DocumentStructure,
17
+ ExampleDataStructure,
18
+ )
19
+
20
+
21
+ class DocumentExtractor:
22
+ """Extract structured data from documents using LangExtract.
23
+
24
+ Parameters
25
+ ----------
26
+ prompt_description : str
27
+ Prompt description used by LangExtract.
28
+ examples : Sequence[ExampleDataStructure]
29
+ Example payloads supplied to LangExtract.
30
+ model_id : str
31
+ Model identifier to pass to LangExtract.
32
+ max_workers : int, optional
33
+ Maximum number of workers for concurrent extraction. Default is 1.
34
+
35
+ Methods
36
+ -------
37
+ extract(input_text)
38
+ Extract structured data from one or more documents.
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ prompt_description: str,
44
+ examples: typing.Sequence[ExampleDataStructure],
45
+ model_id: str,
46
+ max_workers: int = 1,
47
+ ) -> None:
48
+ """Initialize the extractor.
49
+
50
+ Parameters
51
+ ----------
52
+ prompt_description : str
53
+ Prompt description used by LangExtract.
54
+ examples : Sequence[ExampleDataStructure]
55
+ Example payloads supplied to LangExtract.
56
+ model_id : str
57
+ Model identifier to pass to LangExtract.
58
+ max_workers : int, optional
59
+ Maximum number of workers for concurrent extraction. Default is 1.
60
+ """
61
+ if not examples:
62
+ raise ValueError(
63
+ "Examples are required for reliable extraction. "
64
+ "Provide at least one ExampleDataStructure instance."
65
+ )
66
+ self.model_id = model_id
67
+ self.prompt = prompt_description
68
+ self.examples = examples
69
+ self.max_workers = max_workers
70
+
71
+ def extract(
72
+ self, input_text: DocumentStructure | list[DocumentStructure]
73
+ ) -> list[AnnotatedDocumentStructure]:
74
+ """Run the extraction.
75
+
76
+ Parameters
77
+ ----------
78
+ input_text : DocumentStructure | list[DocumentStructure]
79
+ Document or list of documents to extract data from.
80
+
81
+ Returns
82
+ -------
83
+ list[AnnotatedDocumentStructure]
84
+ Extracted items for the provided documents.
85
+ """
86
+ if isinstance(input_text, DocumentStructure):
87
+ input_documents = [input_text]
88
+ else:
89
+ input_documents = input_text
90
+ documents = DocumentStructure.to_dataclass_list(input_documents)
91
+ examples = ExampleDataStructure.to_dataclass_list(self.examples)
92
+ resolver_params = {"format_handler": _SanitizingFormatHandler()}
93
+ result = lx.extract(
94
+ text_or_documents=documents,
95
+ prompt_description=self.prompt,
96
+ examples=examples,
97
+ model_id=self.model_id, # Automatically selects OpenAI provider
98
+ api_key=os.environ.get("OPENAI_API_KEY"),
99
+ fence_output=True,
100
+ use_schema_constraints=False,
101
+ resolver_params=resolver_params,
102
+ )
103
+
104
+ def _convert(data: typing.Any) -> AnnotatedDocumentStructure:
105
+ if isinstance(data, LXAnnotatedDocument):
106
+ return AnnotatedDocumentStructure.from_dataclass(data)
107
+ return AnnotatedDocumentStructure.model_validate(data)
108
+
109
+ if isinstance(result, list):
110
+ return [_convert(doc) for doc in result]
111
+
112
+ return [_convert(result)]
113
+
114
+
115
+ def _sanitize_extraction_items(
116
+ items: typing.Sequence[typing.Mapping[str, lx_format_handler.ExtractionValueType]],
117
+ attribute_suffix: str,
118
+ ) -> list[dict[str, lx_format_handler.ExtractionValueType]]:
119
+ sanitized: list[dict[str, lx_format_handler.ExtractionValueType]] = []
120
+ for item in items:
121
+ updated: dict[str, lx_format_handler.ExtractionValueType] = {}
122
+ for key, value in item.items():
123
+ keep, cleaned = _sanitize_extraction_value(key, value, attribute_suffix)
124
+ if not keep:
125
+ continue
126
+ updated[key] = cleaned
127
+ sanitized.append(updated)
128
+ return sanitized
129
+
130
+
131
+ def _sanitize_extraction_value(
132
+ key: str,
133
+ value: lx_format_handler.ExtractionValueType,
134
+ attribute_suffix: str,
135
+ ) -> tuple[bool, lx_format_handler.ExtractionValueType]:
136
+ if value is None:
137
+ return False, None
138
+ if key.endswith(attribute_suffix):
139
+ if isinstance(value, dict):
140
+ return True, value
141
+ return False, None
142
+ if isinstance(value, (str, int, float)):
143
+ return True, value
144
+ return True, json.dumps(value, ensure_ascii=False)
145
+
146
+
147
+ class _SanitizingFormatHandler(lx_format_handler.FormatHandler):
148
+ """Sanitize LangExtract output before the resolver validates types."""
149
+
150
+ def parse_output(
151
+ self, text: str, *, strict: bool | None = None
152
+ ) -> typing.Sequence[typing.Mapping[str, lx_format_handler.ExtractionValueType]]:
153
+ items = super().parse_output(text, strict=strict)
154
+ return _sanitize_extraction_items(items, self.attribute_suffix)
155
+
156
+
157
+ __all__ = ["DocumentExtractor", "ExtractionError"]
@@ -0,0 +1,476 @@
1
+ """Prompt optimization and configuration helpers for document extraction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Sequence
8
+
9
+ from ..agent.base import AgentBase
10
+ from ..agent.configuration import AgentConfiguration
11
+ from ..prompt import PromptRenderer
12
+ from ..response.configuration import ResponseConfiguration
13
+ from ..response.prompter import PROMPTER
14
+ from ..settings import OpenAISettings
15
+ from ..structure.extraction import DocumentExtractorConfig, ExampleDataStructure
16
+ from ..structure.prompt import PromptStructure
17
+
18
+ EXTRACTOR_CONFIG_TEMPLATE_NAME = "extractor_config_generator.jinja"
19
+ EXTRACTOR_CONFIG_AGENT_INSTRUCTIONS_TEMPLATE = (
20
+ "extractor_config_agent_instructions.jinja"
21
+ )
22
+ EXTRACTOR_CONFIG_GENERATOR_INSTRUCTIONS_TEMPLATE = (
23
+ "extractor_config_generator_instructions.jinja"
24
+ )
25
+ EXTRACTOR_PROMPT_OPTIMIZER_INSTRUCTIONS_TEMPLATE = (
26
+ "extractor_prompt_optimizer_agent_instructions.jinja"
27
+ )
28
+ EXTRACTOR_PROMPT_OPTIMIZER_REQUEST_TEMPLATE = "extractor_prompt_optimizer_request.jinja"
29
+ PROMPT_RENDERER = PromptRenderer()
30
+
31
+ DEFAULT_EXAMPLE_COUNT = 3
32
+
33
+
34
+ def _render_prompt_template(
35
+ template_name: str,
36
+ context: dict[str, object] | None = None,
37
+ ) -> str:
38
+ """Render a prompt template from the prompt directory.
39
+
40
+ Parameters
41
+ ----------
42
+ template_name : str
43
+ Prompt template file name.
44
+ context : dict[str, object] or None, default None
45
+ Context variables for template rendering.
46
+
47
+ Returns
48
+ -------
49
+ str
50
+ Rendered prompt content.
51
+ """
52
+ return PROMPT_RENDERER.render(template_name, context=context or {})
53
+
54
+
55
+ EXTRACTOR_CONFIG_GENERATOR = ResponseConfiguration(
56
+ name="document_extractor_config_generator",
57
+ instructions=_render_prompt_template(
58
+ EXTRACTOR_CONFIG_GENERATOR_INSTRUCTIONS_TEMPLATE
59
+ ),
60
+ tools=None,
61
+ input_structure=None,
62
+ output_structure=DocumentExtractorConfig,
63
+ add_output_instructions=True,
64
+ )
65
+
66
+ PROMPT_OPTIMIZER_AGENT_INSTRUCTIONS = _render_prompt_template(
67
+ EXTRACTOR_PROMPT_OPTIMIZER_INSTRUCTIONS_TEMPLATE,
68
+ context={"prompt_schema": PromptStructure.get_prompt()},
69
+ )
70
+
71
+ EXTRACTOR_CONFIG_AGENT_INSTRUCTIONS = _render_prompt_template(
72
+ EXTRACTOR_CONFIG_AGENT_INSTRUCTIONS_TEMPLATE,
73
+ context={"config_schema": DocumentExtractorConfig.get_prompt()},
74
+ )
75
+
76
+
77
+ def _format_extractor_prompt_request(
78
+ prompt: str,
79
+ extraction_classes: Sequence[str],
80
+ additional_context: str | None,
81
+ ) -> str:
82
+ """Format the prompt-optimization request payload.
83
+
84
+ Parameters
85
+ ----------
86
+ prompt : str
87
+ User-provided prompt content.
88
+ extraction_classes : Sequence[str]
89
+ Extraction classes to include.
90
+ additional_context : str or None
91
+ Optional extra context to include.
92
+
93
+ Returns
94
+ -------
95
+ str
96
+ Formatted prompt optimization request.
97
+ """
98
+ return _render_prompt_template(
99
+ EXTRACTOR_PROMPT_OPTIMIZER_REQUEST_TEMPLATE,
100
+ context={
101
+ "prompt": prompt,
102
+ "extraction_classes": list(extraction_classes),
103
+ "additional_context": additional_context,
104
+ },
105
+ )
106
+
107
+
108
+ def _format_extractor_config_request(
109
+ name: str,
110
+ prompt_description: str,
111
+ extraction_classes: Sequence[str],
112
+ *,
113
+ example_files: Sequence[str | Path] | None = None,
114
+ example_count: int = DEFAULT_EXAMPLE_COUNT,
115
+ ) -> str:
116
+ """Format the extractor configuration request payload.
117
+
118
+ Parameters
119
+ ----------
120
+ name : str
121
+ Name for the extractor configuration.
122
+ prompt_description : str
123
+ Optimized prompt description to use.
124
+ extraction_classes : Sequence[str]
125
+ Extraction classes to include.
126
+ example_files : Sequence[str or Path] or None, default None
127
+ Optional file paths to ground the generated examples.
128
+ example_count : int, default 3
129
+ Number of examples to generate.
130
+
131
+ Returns
132
+ -------
133
+ str
134
+ Formatted configuration request.
135
+ """
136
+ return PROMPT_RENDERER.render(
137
+ EXTRACTOR_CONFIG_TEMPLATE_NAME,
138
+ context={
139
+ "name": name,
140
+ "prompt_description": prompt_description,
141
+ "extraction_classes": list(extraction_classes),
142
+ "example_count": example_count,
143
+ "example_files": _load_example_files(example_files),
144
+ "examples_json": "- None provided. You must generate examples.",
145
+ "example_requirements": [
146
+ f"Generate {example_count} high-quality examples that align with the prompt.",
147
+ "Ensure each example includes realistic source text and extractions.",
148
+ "Cover every extraction class across the examples.",
149
+ ],
150
+ },
151
+ )
152
+
153
+
154
+ def _format_extractor_config_request_with_examples(
155
+ name: str,
156
+ prompt_description: str,
157
+ extraction_classes: Sequence[str],
158
+ examples: Sequence[ExampleDataStructure],
159
+ ) -> str:
160
+ """Format the extractor configuration request payload with examples.
161
+
162
+ Parameters
163
+ ----------
164
+ name : str
165
+ Name for the extractor configuration.
166
+ prompt_description : str
167
+ Optimized prompt description to use.
168
+ extraction_classes : Sequence[str]
169
+ Extraction classes to include.
170
+ examples : Sequence[ExampleDataStructure]
171
+ Example payloads to include.
172
+
173
+ Returns
174
+ -------
175
+ str
176
+ Formatted configuration request.
177
+ """
178
+ serialized_examples = [example.to_json() for example in examples]
179
+ examples_json = json.dumps(serialized_examples, indent=2)
180
+ return PROMPT_RENDERER.render(
181
+ EXTRACTOR_CONFIG_TEMPLATE_NAME,
182
+ context={
183
+ "name": name,
184
+ "prompt_description": prompt_description,
185
+ "extraction_classes": list(extraction_classes),
186
+ "example_count": DEFAULT_EXAMPLE_COUNT,
187
+ "example_files": [],
188
+ "examples_json": examples_json,
189
+ "example_requirements": ["Use the provided examples exactly as written."],
190
+ },
191
+ )
192
+
193
+
194
+ def _load_example_files(
195
+ example_files: Sequence[str | Path] | None,
196
+ ) -> list[dict[str, str]]:
197
+ """Load optional example files for grounded extraction generation.
198
+
199
+ Parameters
200
+ ----------
201
+ example_files : Sequence[str or Path] or None
202
+ File paths to load for grounding examples.
203
+
204
+ Returns
205
+ -------
206
+ list of dict[str, str]
207
+ Loaded file metadata including path and content.
208
+
209
+ Raises
210
+ ------
211
+ FileNotFoundError
212
+ If any provided file does not exist.
213
+ """
214
+ if not example_files:
215
+ return []
216
+ loaded_files: list[dict[str, str]] = []
217
+ for file_path in example_files:
218
+ path = Path(file_path)
219
+ content = path.read_text()
220
+ loaded_files.append({"path": str(path), "content": content})
221
+ return loaded_files
222
+
223
+
224
+ def optimize_extractor_prompt(
225
+ openai_settings: OpenAISettings,
226
+ prompt: str,
227
+ extraction_classes: Sequence[str],
228
+ *,
229
+ additional_context: str | None = None,
230
+ ) -> str:
231
+ """Generate an optimized prompt description for extraction.
232
+
233
+ Parameters
234
+ ----------
235
+ openai_settings : OpenAISettings
236
+ Settings used to configure the OpenAI client.
237
+ prompt : str
238
+ User-supplied prompt content.
239
+ extraction_classes : Sequence[str]
240
+ Extraction classes to include in the optimized prompt.
241
+ additional_context : str or None, default None
242
+ Optional context that should influence prompt generation.
243
+
244
+ Returns
245
+ -------
246
+ str
247
+ Optimized prompt description.
248
+
249
+ Raises
250
+ ------
251
+ TypeError
252
+ If the prompter response does not return a prompt string.
253
+ """
254
+ request_text = _format_extractor_prompt_request(
255
+ prompt,
256
+ extraction_classes,
257
+ additional_context,
258
+ )
259
+ response = PROMPTER.gen_response(openai_settings=openai_settings)
260
+ try:
261
+ result = response.run_sync(request_text)
262
+ finally:
263
+ response.close()
264
+
265
+ if isinstance(result, PromptStructure):
266
+ return result.prompt
267
+ if isinstance(result, str):
268
+ return result
269
+ raise TypeError("Prompter response must return a PromptStructure or string.")
270
+
271
+
272
+ def optimize_extractor_prompt_with_agent(
273
+ openai_settings: OpenAISettings,
274
+ prompt: str,
275
+ extraction_classes: Sequence[str],
276
+ *,
277
+ additional_context: str | None = None,
278
+ ) -> str:
279
+ """Generate an optimized prompt description using AgentBase.
280
+
281
+ Parameters
282
+ ----------
283
+ openai_settings : OpenAISettings
284
+ Settings used to configure the agent model.
285
+ prompt : str
286
+ User-supplied prompt content.
287
+ extraction_classes : Sequence[str]
288
+ Extraction classes to include in the optimized prompt.
289
+ additional_context : str or None, default None
290
+ Optional context that should influence prompt generation.
291
+
292
+ Returns
293
+ -------
294
+ str
295
+ Optimized prompt description.
296
+
297
+ Raises
298
+ ------
299
+ TypeError
300
+ If the agent response does not return a prompt string.
301
+ ValueError
302
+ If no default model is configured.
303
+ """
304
+ if not openai_settings.default_model:
305
+ raise ValueError("OpenAISettings.default_model is required for agent runs.")
306
+ request_text = _format_extractor_prompt_request(
307
+ prompt,
308
+ extraction_classes,
309
+ additional_context,
310
+ )
311
+ configuration = AgentConfiguration(
312
+ name="extractor_prompt_optimizer",
313
+ description="Optimize extraction prompt descriptions.",
314
+ model=openai_settings.default_model,
315
+ instructions=PROMPT_OPTIMIZER_AGENT_INSTRUCTIONS,
316
+ output_structure=PromptStructure,
317
+ )
318
+ agent = AgentBase(configuration=configuration)
319
+ result = agent.run_sync(request_text)
320
+
321
+ if isinstance(result, PromptStructure):
322
+ return result.prompt
323
+ if isinstance(result, str):
324
+ return result
325
+ raise TypeError("Agent response must return a PromptStructure or string.")
326
+
327
+
328
+ def generate_document_extractor_config(
329
+ openai_settings: OpenAISettings,
330
+ name: str,
331
+ prompt: str,
332
+ extraction_classes: Sequence[str],
333
+ *,
334
+ example_files: Sequence[str | Path] | None = None,
335
+ example_count: int = DEFAULT_EXAMPLE_COUNT,
336
+ additional_context: str | None = None,
337
+ ) -> DocumentExtractorConfig:
338
+ """Generate a DocumentExtractorConfig using response-based helpers.
339
+
340
+ Parameters
341
+ ----------
342
+ openai_settings : OpenAISettings
343
+ Settings used to configure the OpenAI client.
344
+ name : str
345
+ Name for the extractor configuration.
346
+ prompt : str
347
+ User-supplied prompt content.
348
+ extraction_classes : Sequence[str]
349
+ Extraction classes to include in the configuration.
350
+ example_files : Sequence[str or Path] or None, default None
351
+ Optional file paths used to ground the generated examples.
352
+ example_count : int, default 3
353
+ Number of examples to generate.
354
+ additional_context : str or None, default None
355
+ Optional context that should influence prompt generation.
356
+
357
+ Returns
358
+ -------
359
+ DocumentExtractorConfig
360
+ Generated extractor configuration.
361
+
362
+ Raises
363
+ ------
364
+ TypeError
365
+ If the generator response does not return a DocumentExtractorConfig.
366
+ """
367
+ prompt_description = optimize_extractor_prompt(
368
+ openai_settings,
369
+ prompt,
370
+ extraction_classes,
371
+ additional_context=additional_context,
372
+ )
373
+ request_text = _format_extractor_config_request(
374
+ name,
375
+ prompt_description,
376
+ extraction_classes,
377
+ example_files=example_files,
378
+ example_count=example_count,
379
+ )
380
+ response = EXTRACTOR_CONFIG_GENERATOR.gen_response(openai_settings=openai_settings)
381
+ try:
382
+ result = response.run_sync(request_text)
383
+ finally:
384
+ response.close()
385
+
386
+ if isinstance(result, DocumentExtractorConfig):
387
+ return result
388
+ if isinstance(result, dict):
389
+ return DocumentExtractorConfig.model_validate(result)
390
+ raise TypeError(
391
+ "Extractor config generator must return a DocumentExtractorConfig or dict."
392
+ )
393
+
394
+
395
+ def generate_document_extractor_config_with_agent(
396
+ openai_settings: OpenAISettings,
397
+ name: str,
398
+ prompt: str,
399
+ extraction_classes: Sequence[str],
400
+ examples: Sequence[ExampleDataStructure],
401
+ *,
402
+ additional_context: str | None = None,
403
+ ) -> DocumentExtractorConfig:
404
+ """Generate a DocumentExtractorConfig using AgentBase workflows.
405
+
406
+ Parameters
407
+ ----------
408
+ openai_settings : OpenAISettings
409
+ Settings used to configure the agent model.
410
+ name : str
411
+ Name for the extractor configuration.
412
+ prompt : str
413
+ User-supplied prompt content.
414
+ extraction_classes : Sequence[str]
415
+ Extraction classes to include in the configuration.
416
+ examples : Sequence[ExampleDataStructure]
417
+ Example payloads supplied to LangExtract.
418
+ additional_context : str or None, default None
419
+ Optional context that should influence prompt generation.
420
+
421
+ Returns
422
+ -------
423
+ DocumentExtractorConfig
424
+ Generated extractor configuration.
425
+
426
+ Raises
427
+ ------
428
+ TypeError
429
+ If the agent response does not return a DocumentExtractorConfig.
430
+ ValueError
431
+ If no examples are provided.
432
+ """
433
+ if not examples:
434
+ raise ValueError("At least one ExampleDataStructure instance is required.")
435
+ if not openai_settings.default_model:
436
+ raise ValueError("OpenAISettings.default_model is required for agent runs.")
437
+ prompt_description = optimize_extractor_prompt_with_agent(
438
+ openai_settings,
439
+ prompt,
440
+ extraction_classes,
441
+ additional_context=additional_context,
442
+ )
443
+ request_text = _format_extractor_config_request_with_examples(
444
+ name,
445
+ prompt_description,
446
+ extraction_classes,
447
+ examples,
448
+ )
449
+ configuration = AgentConfiguration(
450
+ name="extractor_config_generator",
451
+ description="Generate DocumentExtractorConfig instances.",
452
+ model=openai_settings.default_model,
453
+ instructions=EXTRACTOR_CONFIG_AGENT_INSTRUCTIONS,
454
+ output_structure=DocumentExtractorConfig,
455
+ )
456
+ agent = AgentBase(configuration=configuration)
457
+ result = agent.run_sync(request_text)
458
+
459
+ if isinstance(result, DocumentExtractorConfig):
460
+ return result
461
+ if isinstance(result, dict):
462
+ return DocumentExtractorConfig.model_validate(result)
463
+ raise TypeError(
464
+ "Agent config generator must return a DocumentExtractorConfig or dict."
465
+ )
466
+
467
+
468
+ __all__ = [
469
+ "EXTRACTOR_CONFIG_GENERATOR",
470
+ "EXTRACTOR_CONFIG_AGENT_INSTRUCTIONS",
471
+ "PROMPT_OPTIMIZER_AGENT_INSTRUCTIONS",
472
+ "generate_document_extractor_config",
473
+ "generate_document_extractor_config_with_agent",
474
+ "optimize_extractor_prompt",
475
+ "optimize_extractor_prompt_with_agent",
476
+ ]
@@ -374,6 +374,7 @@ class FilesAPIManager:
374
374
  log(
375
375
  f"Error deleting tracked file {file_id}: {exc}",
376
376
  level=logging.WARNING,
377
+ exc=exc,
377
378
  )
378
379
  results[file_id] = False
379
380
 
@@ -8,6 +8,7 @@ def log(
8
8
  level: int = logging.INFO,
9
9
  *,
10
10
  logger_name: str = "openai_sdk_helpers",
11
+ exc: BaseException | None = None,
11
12
  ) -> None:
12
13
  """Log a message using Python's standard logging.
13
14
 
@@ -20,6 +21,13 @@ def log(
20
21
  Default is logging.INFO.
21
22
  logger_name : str
22
23
  Name of the logger. Default is "openai_sdk_helpers".
24
+ exc : BaseException or None, optional
25
+ Exception instance to include with the log record. Default is None.
26
+
27
+ Returns
28
+ -------
29
+ None
30
+ Return None after emitting the log entry.
23
31
 
24
32
  Examples
25
33
  --------
@@ -28,7 +36,10 @@ def log(
28
36
  >>> log("Debug info", level=logging.DEBUG)
29
37
  """
30
38
  logger = logging.getLogger(logger_name)
31
- logger.log(level, message)
39
+ exc_info = None
40
+ if exc is not None:
41
+ exc_info = (type(exc), exc, exc.__traceback__)
42
+ logger.log(level, message, exc_info=exc_info)
32
43
 
33
44
 
34
45
  __all__ = ["log"]
@@ -0,0 +1,6 @@
1
+ Generate a DocumentExtractorConfig using the provided details. Follow the example
2
+ approach: examples should be high-quality and match the prompt. Set the configuration
3
+ name exactly as provided. Preserve the prompt description, extraction classes, and
4
+ examples. Include meaningful attributes when applicable.
5
+
6
+ {{ config_schema }}