openai-sdk-helpers 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/agent/__init__.py +2 -0
- openai_sdk_helpers/agent/base.py +88 -12
- openai_sdk_helpers/agent/classifier.py +905 -94
- openai_sdk_helpers/agent/configuration.py +42 -0
- openai_sdk_helpers/agent/files.py +120 -0
- openai_sdk_helpers/agent/runner.py +9 -9
- openai_sdk_helpers/agent/translator.py +2 -2
- openai_sdk_helpers/files_api.py +46 -1
- openai_sdk_helpers/prompt/classifier.jinja +28 -7
- openai_sdk_helpers/settings.py +65 -0
- openai_sdk_helpers/structure/__init__.py +4 -0
- openai_sdk_helpers/structure/base.py +79 -55
- openai_sdk_helpers/structure/classification.py +265 -43
- openai_sdk_helpers/structure/plan/enum.py +4 -0
- {openai_sdk_helpers-0.6.0.dist-info → openai_sdk_helpers-0.6.2.dist-info}/METADATA +12 -1
- {openai_sdk_helpers-0.6.0.dist-info → openai_sdk_helpers-0.6.2.dist-info}/RECORD +19 -18
- {openai_sdk_helpers-0.6.0.dist-info → openai_sdk_helpers-0.6.2.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.6.0.dist-info → openai_sdk_helpers-0.6.2.dist-info}/entry_points.txt +0 -0
- {openai_sdk_helpers-0.6.0.dist-info → openai_sdk_helpers-0.6.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,6 +13,7 @@ from ..utils.json.data_class import DataclassJSONSerializable
|
|
|
13
13
|
from ..utils.registry import RegistryBase
|
|
14
14
|
from ..utils.instructions import resolve_instructions_from_path
|
|
15
15
|
from ..structure.base import StructureBase
|
|
16
|
+
from ..settings import OpenAISettings
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class AgentRegistry(RegistryBase["AgentConfiguration"]):
|
|
@@ -152,6 +153,8 @@ class AgentConfiguration(DataclassJSONSerializable):
|
|
|
152
153
|
Resolve the prompt template path for this configuration.
|
|
153
154
|
gen_agent(run_context_wrapper)
|
|
154
155
|
Create a AgentBase instance from this configuration.
|
|
156
|
+
to_openai_settings(dotenv_path=None, **overrides)
|
|
157
|
+
Build OpenAISettings using this configuration as defaults.
|
|
155
158
|
replace(**changes)
|
|
156
159
|
Create a new AgentConfiguration with specified fields replaced.
|
|
157
160
|
to_json()
|
|
@@ -272,6 +275,45 @@ class AgentConfiguration(DataclassJSONSerializable):
|
|
|
272
275
|
"""Resolve instructions from string or file path."""
|
|
273
276
|
return resolve_instructions_from_path(self.instructions)
|
|
274
277
|
|
|
278
|
+
def to_openai_settings(
|
|
279
|
+
self, *, dotenv_path: Path | None = None, **overrides: Any
|
|
280
|
+
) -> OpenAISettings:
|
|
281
|
+
"""Build OpenAI settings using this configuration as defaults.
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
dotenv_path : Path or None, optional
|
|
286
|
+
Optional dotenv file path for loading environment variables.
|
|
287
|
+
overrides : Any
|
|
288
|
+
Keyword overrides applied on top of environment values. Use this
|
|
289
|
+
to supply API credentials and override defaults.
|
|
290
|
+
|
|
291
|
+
Returns
|
|
292
|
+
-------
|
|
293
|
+
OpenAISettings
|
|
294
|
+
OpenAI settings instance with defaults derived from this
|
|
295
|
+
configuration.
|
|
296
|
+
|
|
297
|
+
Raises
|
|
298
|
+
------
|
|
299
|
+
ValueError
|
|
300
|
+
If no API key is supplied via overrides or environment variables.
|
|
301
|
+
|
|
302
|
+
Examples
|
|
303
|
+
--------
|
|
304
|
+
>>> configuration = AgentConfiguration(
|
|
305
|
+
... name="summarizer",
|
|
306
|
+
... instructions="Summarize text",
|
|
307
|
+
... model="gpt-4o-mini",
|
|
308
|
+
... )
|
|
309
|
+
>>> settings = configuration.to_openai_settings(api_key="sk-...")
|
|
310
|
+
>>> # Or rely on environment variables like OPENAI_API_KEY
|
|
311
|
+
>>> settings = configuration.to_openai_settings()
|
|
312
|
+
"""
|
|
313
|
+
if self.model and "default_model" not in overrides:
|
|
314
|
+
overrides["default_model"] = self.model
|
|
315
|
+
return OpenAISettings.from_env(dotenv_path=dotenv_path, **overrides)
|
|
316
|
+
|
|
275
317
|
def resolve_prompt_path(self, prompt_dir: Path | None = None) -> Path | None:
|
|
276
318
|
"""Resolve the prompt template path for this configuration.
|
|
277
319
|
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""File attachment helpers for the Agents SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
from ..files_api import FilePurpose, FilesAPIManager
|
|
8
|
+
from ..settings import OpenAISettings
|
|
9
|
+
from ..utils import create_image_data_url, ensure_list, is_image_file
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_agent_input_messages(
|
|
13
|
+
content: str | list[str],
|
|
14
|
+
files: str | list[str] | None = None,
|
|
15
|
+
*,
|
|
16
|
+
files_manager: FilesAPIManager | None = None,
|
|
17
|
+
openai_settings: OpenAISettings | None = None,
|
|
18
|
+
file_purpose: FilePurpose = "user_data",
|
|
19
|
+
image_detail: Literal["low", "high", "auto"] = "auto",
|
|
20
|
+
) -> list[dict[str, Any]]:
|
|
21
|
+
"""Build Agents SDK input messages with file attachments.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
content : str or list[str]
|
|
26
|
+
Prompt text or list of prompt texts to send.
|
|
27
|
+
files : str, list[str], or None, default None
|
|
28
|
+
Optional file path or list of file paths. Image files are sent as
|
|
29
|
+
base64-encoded ``input_image`` entries. Document files are uploaded
|
|
30
|
+
using ``files_manager`` and sent as ``input_file`` entries.
|
|
31
|
+
files_manager : FilesAPIManager or None, default None
|
|
32
|
+
File upload helper used to create file IDs for document uploads.
|
|
33
|
+
Required when ``files`` contains non-image documents.
|
|
34
|
+
openai_settings : OpenAISettings or None, default None
|
|
35
|
+
Optional OpenAI settings used to build a FilesAPIManager when one is
|
|
36
|
+
not provided. When supplied, ``openai_settings.create_client()`` is
|
|
37
|
+
used to initialize the Files API manager.
|
|
38
|
+
file_purpose : FilePurpose, default "user_data"
|
|
39
|
+
Purpose passed to the Files API when uploading document files.
|
|
40
|
+
image_detail : {"low", "high", "auto"}, default "auto"
|
|
41
|
+
Detail hint passed along with base64-encoded image inputs.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
list[dict[str, Any]]
|
|
46
|
+
Agents SDK input messages that include text and optional file entries.
|
|
47
|
+
|
|
48
|
+
Raises
|
|
49
|
+
------
|
|
50
|
+
ValueError
|
|
51
|
+
If document files are provided without a ``files_manager``.
|
|
52
|
+
|
|
53
|
+
Examples
|
|
54
|
+
--------
|
|
55
|
+
>>> from openai import OpenAI
|
|
56
|
+
>>> from openai_sdk_helpers.files_api import FilesAPIManager
|
|
57
|
+
>>> from openai_sdk_helpers.agent.files import build_agent_input_messages
|
|
58
|
+
>>> client = OpenAI()
|
|
59
|
+
>>> files_manager = FilesAPIManager(client)
|
|
60
|
+
>>> messages = build_agent_input_messages(
|
|
61
|
+
... "Summarize this document",
|
|
62
|
+
... files="report.pdf",
|
|
63
|
+
... files_manager=files_manager,
|
|
64
|
+
... )
|
|
65
|
+
"""
|
|
66
|
+
contents = ensure_list(content)
|
|
67
|
+
all_files = ensure_list(files)
|
|
68
|
+
|
|
69
|
+
image_files: list[str] = []
|
|
70
|
+
document_files: list[str] = []
|
|
71
|
+
for file_path in all_files:
|
|
72
|
+
if is_image_file(file_path):
|
|
73
|
+
image_files.append(file_path)
|
|
74
|
+
else:
|
|
75
|
+
document_files.append(file_path)
|
|
76
|
+
|
|
77
|
+
attachments: list[dict[str, Any]] = []
|
|
78
|
+
|
|
79
|
+
if document_files:
|
|
80
|
+
if files_manager is None and openai_settings is not None:
|
|
81
|
+
files_manager = FilesAPIManager(openai_settings.create_client())
|
|
82
|
+
if files_manager is None:
|
|
83
|
+
raise ValueError(
|
|
84
|
+
"files_manager is required to upload document files for agent input."
|
|
85
|
+
)
|
|
86
|
+
expires_after = 86400 if file_purpose == "user_data" else None
|
|
87
|
+
if hasattr(files_manager, "batch_upload"):
|
|
88
|
+
uploaded_files = files_manager.batch_upload(
|
|
89
|
+
document_files,
|
|
90
|
+
purpose=file_purpose,
|
|
91
|
+
expires_after=expires_after,
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
uploaded_files = [
|
|
95
|
+
files_manager.create(
|
|
96
|
+
file_path, purpose=file_purpose, expires_after=expires_after
|
|
97
|
+
)
|
|
98
|
+
for file_path in document_files
|
|
99
|
+
]
|
|
100
|
+
for uploaded_file in uploaded_files:
|
|
101
|
+
attachments.append({"type": "input_file", "file_id": uploaded_file.id})
|
|
102
|
+
|
|
103
|
+
for image_path in image_files:
|
|
104
|
+
image_url, detail = create_image_data_url(image_path, detail=image_detail)
|
|
105
|
+
attachments.append(
|
|
106
|
+
{"type": "input_image", "image_url": image_url, "detail": detail}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
messages: list[dict[str, Any]] = []
|
|
110
|
+
for index, raw_content in enumerate(contents):
|
|
111
|
+
text = raw_content.strip()
|
|
112
|
+
content_items: list[dict[str, Any]] = [{"type": "input_text", "text": text}]
|
|
113
|
+
if index == 0:
|
|
114
|
+
content_items.extend(attachments)
|
|
115
|
+
messages.append({"role": "user", "content": content_items})
|
|
116
|
+
|
|
117
|
+
return messages
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
__all__ = ["build_agent_input_messages"]
|
|
@@ -7,7 +7,7 @@ signatures whether they need asynchronous or synchronous results.
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
-
from typing import Any, Dict, Optional
|
|
10
|
+
from typing import Any, Dict, Optional, cast
|
|
11
11
|
|
|
12
12
|
from agents import Agent, RunResult, Runner, Session
|
|
13
13
|
|
|
@@ -17,7 +17,7 @@ from ..structure.base import StructureBase
|
|
|
17
17
|
|
|
18
18
|
async def run_async(
|
|
19
19
|
agent: Agent,
|
|
20
|
-
input: str,
|
|
20
|
+
input: str | list[dict[str, Any]],
|
|
21
21
|
*,
|
|
22
22
|
context: Optional[Dict[str, Any]] = None,
|
|
23
23
|
output_structure: Optional[type[StructureBase]] = None,
|
|
@@ -29,8 +29,8 @@ async def run_async(
|
|
|
29
29
|
----------
|
|
30
30
|
agent : Agent
|
|
31
31
|
Configured agent instance to execute.
|
|
32
|
-
input : str
|
|
33
|
-
Prompt or
|
|
32
|
+
input : str or list[dict[str, Any]]
|
|
33
|
+
Prompt text or structured input for the agent.
|
|
34
34
|
context : dict or None, default=None
|
|
35
35
|
Optional context dictionary passed to the agent.
|
|
36
36
|
output_structure : type[StructureBase] or None, default=None
|
|
@@ -53,7 +53,7 @@ async def run_async(
|
|
|
53
53
|
... return result
|
|
54
54
|
>>> asyncio.run(example()) # doctest: +SKIP
|
|
55
55
|
"""
|
|
56
|
-
result = await Runner.run(agent, input, context=context, session=session)
|
|
56
|
+
result = await Runner.run(agent, cast(Any, input), context=context, session=session)
|
|
57
57
|
if output_structure is not None:
|
|
58
58
|
return result.final_output_as(output_structure)
|
|
59
59
|
return result
|
|
@@ -61,7 +61,7 @@ async def run_async(
|
|
|
61
61
|
|
|
62
62
|
def run_sync(
|
|
63
63
|
agent: Agent,
|
|
64
|
-
input: str,
|
|
64
|
+
input: str | list[dict[str, Any]],
|
|
65
65
|
*,
|
|
66
66
|
context: Optional[Dict[str, Any]] = None,
|
|
67
67
|
output_structure: Optional[type[StructureBase]] = None,
|
|
@@ -77,8 +77,8 @@ def run_sync(
|
|
|
77
77
|
----------
|
|
78
78
|
agent : Agent
|
|
79
79
|
Configured agent instance to execute.
|
|
80
|
-
input : str
|
|
81
|
-
Prompt or
|
|
80
|
+
input : str or list[dict[str, Any]]
|
|
81
|
+
Prompt text or structured input for the agent.
|
|
82
82
|
context : dict or None, default=None
|
|
83
83
|
Optional context dictionary passed to the agent.
|
|
84
84
|
output_structure : type[StructureBase] or None, default=None
|
|
@@ -102,7 +102,7 @@ def run_sync(
|
|
|
102
102
|
>>> agent = Agent(name="test", instructions="test", model="gpt-4o-mini")
|
|
103
103
|
>>> result = run_sync(agent, "What is 2+2?") # doctest: +SKIP
|
|
104
104
|
"""
|
|
105
|
-
coro = Runner.run(agent, input, context=context, session=session)
|
|
105
|
+
coro = Runner.run(agent, cast(Any, input), context=context, session=session)
|
|
106
106
|
result: RunResult = run_coroutine_with_fallback(coro)
|
|
107
107
|
if output_structure is not None:
|
|
108
108
|
return result.final_output_as(output_structure)
|
|
@@ -138,7 +138,7 @@ class TranslatorAgent(AgentBase):
|
|
|
138
138
|
|
|
139
139
|
def run_sync(
|
|
140
140
|
self,
|
|
141
|
-
input: str,
|
|
141
|
+
input: str | list[dict[str, Any]],
|
|
142
142
|
*,
|
|
143
143
|
context: Optional[Dict[str, Any]] = None,
|
|
144
144
|
output_structure: Optional[type[StructureBase]] = None,
|
|
@@ -149,7 +149,7 @@ class TranslatorAgent(AgentBase):
|
|
|
149
149
|
|
|
150
150
|
Parameters
|
|
151
151
|
----------
|
|
152
|
-
input : str
|
|
152
|
+
input : str or list[dict[str, Any]]
|
|
153
153
|
Source content to translate.
|
|
154
154
|
context : dict or None, default=None
|
|
155
155
|
Additional context values to merge into the prompt.
|
openai_sdk_helpers/files_api.py
CHANGED
|
@@ -12,7 +12,7 @@ from __future__ import annotations
|
|
|
12
12
|
|
|
13
13
|
import logging
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Any, BinaryIO, Literal, cast
|
|
15
|
+
from typing import Any, BinaryIO, Literal, Sequence, cast
|
|
16
16
|
|
|
17
17
|
from openai import OpenAI, NOT_GIVEN
|
|
18
18
|
from openai.types import FileDeleted, FileObject
|
|
@@ -62,6 +62,8 @@ class FilesAPIManager:
|
|
|
62
62
|
Delete a specific file.
|
|
63
63
|
retrieve_content(file_id)
|
|
64
64
|
Download file content.
|
|
65
|
+
batch_upload(files, purpose, track, expires_after)
|
|
66
|
+
Upload multiple files to the Files API.
|
|
65
67
|
cleanup()
|
|
66
68
|
Delete all tracked files.
|
|
67
69
|
|
|
@@ -350,6 +352,49 @@ class FilesAPIManager:
|
|
|
350
352
|
"""
|
|
351
353
|
return self._client.files.content(file_id).read()
|
|
352
354
|
|
|
355
|
+
def batch_upload(
|
|
356
|
+
self,
|
|
357
|
+
files: Sequence[BinaryIO | Path | str],
|
|
358
|
+
purpose: FilePurpose,
|
|
359
|
+
track: bool | None = None,
|
|
360
|
+
expires_after: int | None = None,
|
|
361
|
+
) -> list[FileObject]:
|
|
362
|
+
"""Upload multiple files to the OpenAI Files API.
|
|
363
|
+
|
|
364
|
+
Parameters
|
|
365
|
+
----------
|
|
366
|
+
files : Sequence[BinaryIO | Path | str]
|
|
367
|
+
File-like objects or file paths to upload.
|
|
368
|
+
purpose : FilePurpose
|
|
369
|
+
The intended purpose of the uploaded files.
|
|
370
|
+
track : bool or None, default None
|
|
371
|
+
Override auto_track for these uploads. If None, uses instance setting.
|
|
372
|
+
expires_after : int or None, default None
|
|
373
|
+
Number of seconds after which files expire. See ``create`` for details.
|
|
374
|
+
|
|
375
|
+
Returns
|
|
376
|
+
-------
|
|
377
|
+
list[FileObject]
|
|
378
|
+
Uploaded file objects in the same order as ``files``.
|
|
379
|
+
|
|
380
|
+
Examples
|
|
381
|
+
--------
|
|
382
|
+
>>> files = ["doc1.pdf", "doc2.pdf"]
|
|
383
|
+
>>> uploaded = manager.batch_upload(files, purpose="user_data")
|
|
384
|
+
>>> [file.id for file in uploaded]
|
|
385
|
+
"""
|
|
386
|
+
if not files:
|
|
387
|
+
return []
|
|
388
|
+
return [
|
|
389
|
+
self.create(
|
|
390
|
+
file_path,
|
|
391
|
+
purpose=purpose,
|
|
392
|
+
track=track,
|
|
393
|
+
expires_after=expires_after,
|
|
394
|
+
)
|
|
395
|
+
for file_path in files
|
|
396
|
+
]
|
|
397
|
+
|
|
353
398
|
def cleanup(self) -> dict[str, bool]:
|
|
354
399
|
"""Delete all tracked files.
|
|
355
400
|
|
|
@@ -1,18 +1,39 @@
|
|
|
1
1
|
You are a taxonomy classification assistant.
|
|
2
2
|
|
|
3
3
|
Instructions:
|
|
4
|
-
- Review the text and select
|
|
5
|
-
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
4
|
+
- Review the text and select all matching taxonomy nodes from the list.
|
|
5
|
+
- Populate selected_nodes as a list of taxonomy node ids for multi-class matches.
|
|
6
|
+
- Use selected_node when a single best match is appropriate.
|
|
7
|
+
- Provide a confidence score between 0 and 1 for the selections; higher means more certain.
|
|
8
|
+
- Interpret confidence as:
|
|
9
|
+
- 0.90–1.00: explicit lexical match.
|
|
10
|
+
- 0.70–0.89: strong semantic alignment.
|
|
11
|
+
- 0.40–0.69: weak or ambiguous alignment.
|
|
12
|
+
- <0.40: low-confidence inference.
|
|
13
|
+
- Use only taxonomy identifiers from the candidate list for any selections.
|
|
14
|
+
- Use the stop_reason enum values only: "continue", "stop", "no_match", "max_depth", "no_children".
|
|
15
|
+
- Stop reason semantics:
|
|
16
|
+
- continue: valid match exists and deeper traversal is required.
|
|
17
|
+
- stop: low confidence, terminate to avoid false precision.
|
|
18
|
+
- no_match: no semantic fit in candidates.
|
|
19
|
+
- max_depth: taxonomy depth limit reached.
|
|
20
|
+
- no_children: matched node has no children.
|
|
21
|
+
- Decision mapping:
|
|
22
|
+
- High or medium confidence with children available: continue.
|
|
23
|
+
- High confidence with terminal node: no_children.
|
|
24
|
+
- Low confidence match: stop.
|
|
25
|
+
- No semantic alignment: no_match.
|
|
26
|
+
- Depth limit reached: max_depth.
|
|
27
|
+
- Provide a concise rationale in one sentence.
|
|
28
|
+
- Keep rationale evidence-based and avoid restating taxonomy labels.
|
|
29
|
+
- Avoid verbosity, speculation, stylistic language, narrative explanation, redundancy, or creativity.
|
|
9
30
|
|
|
10
31
|
Current depth: {{ depth }}
|
|
11
32
|
|
|
12
33
|
Previous path:
|
|
13
34
|
{% if path %}
|
|
14
35
|
{% for step in path %}
|
|
15
|
-
- {{ step.
|
|
36
|
+
- {{ step.selected_node }} (confidence={{ step.confidence }}, stop_reason={{ step.stop_reason }})
|
|
16
37
|
{% endfor %}
|
|
17
38
|
{% else %}
|
|
18
39
|
- None
|
|
@@ -20,7 +41,7 @@ Previous path:
|
|
|
20
41
|
|
|
21
42
|
Candidate taxonomy nodes:
|
|
22
43
|
{% for node in taxonomy_nodes %}
|
|
23
|
-
-
|
|
44
|
+
- identifier: {{ node.identifier }}
|
|
24
45
|
label: {{ node.label }}
|
|
25
46
|
description: {{ node.description or "None" }}
|
|
26
47
|
{% endfor %}
|
openai_sdk_helpers/settings.py
CHANGED
|
@@ -48,6 +48,8 @@ class OpenAISettings(BaseModel):
|
|
|
48
48
|
-------
|
|
49
49
|
from_env(dotenv_path, **overrides)
|
|
50
50
|
Build settings from environment variables and optional overrides.
|
|
51
|
+
from_secrets(secrets, **overrides)
|
|
52
|
+
Build settings from a secrets mapping and optional overrides.
|
|
51
53
|
client_kwargs()
|
|
52
54
|
Return keyword arguments for ``OpenAI`` initialization.
|
|
53
55
|
create_client()
|
|
@@ -190,6 +192,69 @@ class OpenAISettings(BaseModel):
|
|
|
190
192
|
|
|
191
193
|
return settings
|
|
192
194
|
|
|
195
|
+
@classmethod
|
|
196
|
+
def from_secrets(
|
|
197
|
+
cls,
|
|
198
|
+
secrets: Mapping[str, Any] | None = None,
|
|
199
|
+
**overrides: Any,
|
|
200
|
+
) -> OpenAISettings:
|
|
201
|
+
"""Load settings from a secrets mapping and optional overrides.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
secrets : Mapping[str, Any] or None, optional
|
|
206
|
+
Mapping of secret values keyed by environment variable names.
|
|
207
|
+
Defaults to environment variables.
|
|
208
|
+
overrides : Any
|
|
209
|
+
Keyword overrides applied on top of secret values.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
OpenAISettings
|
|
214
|
+
Settings instance populated from secret values and overrides.
|
|
215
|
+
|
|
216
|
+
Raises
|
|
217
|
+
------
|
|
218
|
+
ValueError
|
|
219
|
+
If OPENAI_API_KEY is not found in the secrets mapping.
|
|
220
|
+
"""
|
|
221
|
+
secret_values: Mapping[str, Any] = secrets or os.environ
|
|
222
|
+
|
|
223
|
+
def first_non_none(*candidates: Any) -> Any:
|
|
224
|
+
for candidate in candidates:
|
|
225
|
+
if candidate is not None:
|
|
226
|
+
return candidate
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
def resolve_value(override_key: str, secret_key: str) -> Any:
|
|
230
|
+
return first_non_none(
|
|
231
|
+
overrides.get(override_key),
|
|
232
|
+
secret_values.get(secret_key),
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
timeout_raw = resolve_value("timeout", "OPENAI_TIMEOUT")
|
|
236
|
+
max_retries_raw = resolve_value("max_retries", "OPENAI_MAX_RETRIES")
|
|
237
|
+
|
|
238
|
+
values: dict[str, Any] = {
|
|
239
|
+
"api_key": resolve_value("api_key", "OPENAI_API_KEY"),
|
|
240
|
+
"org_id": resolve_value("org_id", "OPENAI_ORG_ID"),
|
|
241
|
+
"project_id": resolve_value("project_id", "OPENAI_PROJECT_ID"),
|
|
242
|
+
"base_url": resolve_value("base_url", "OPENAI_BASE_URL"),
|
|
243
|
+
"default_model": resolve_value("default_model", "OPENAI_MODEL"),
|
|
244
|
+
"timeout": coerce_optional_float(timeout_raw),
|
|
245
|
+
"max_retries": coerce_optional_int(max_retries_raw),
|
|
246
|
+
"extra_client_kwargs": coerce_dict(overrides.get("extra_client_kwargs")),
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
settings = cls(**values)
|
|
250
|
+
if not settings.api_key:
|
|
251
|
+
raise ValueError(
|
|
252
|
+
"OPENAI_API_KEY is required to configure the OpenAI client"
|
|
253
|
+
" and was not found in secrets."
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return settings
|
|
257
|
+
|
|
193
258
|
def client_kwargs(self) -> dict[str, Any]:
|
|
194
259
|
"""Return keyword arguments for constructing an OpenAI client.
|
|
195
260
|
|
|
@@ -80,8 +80,10 @@ from .classification import (
|
|
|
80
80
|
ClassificationResult,
|
|
81
81
|
ClassificationStep,
|
|
82
82
|
ClassificationStopReason,
|
|
83
|
+
Taxonomy,
|
|
83
84
|
TaxonomyNode,
|
|
84
85
|
flatten_taxonomy,
|
|
86
|
+
taxonomy_enum_path,
|
|
85
87
|
)
|
|
86
88
|
from .extraction import (
|
|
87
89
|
AnnotatedDocumentStructure,
|
|
@@ -108,8 +110,10 @@ __all__ = [
|
|
|
108
110
|
"ClassificationResult",
|
|
109
111
|
"ClassificationStep",
|
|
110
112
|
"ClassificationStopReason",
|
|
113
|
+
"Taxonomy",
|
|
111
114
|
"TaxonomyNode",
|
|
112
115
|
"flatten_taxonomy",
|
|
116
|
+
"taxonomy_enum_path",
|
|
113
117
|
"TaskStructure",
|
|
114
118
|
"PlanStructure",
|
|
115
119
|
"create_plan",
|
|
@@ -134,9 +134,21 @@ def _ensure_items_have_schema(target: Any) -> None:
|
|
|
134
134
|
|
|
135
135
|
def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
|
|
136
136
|
"""Ensure a schema dictionary includes a type entry when possible."""
|
|
137
|
+
any_of = schema.get("anyOf")
|
|
138
|
+
if isinstance(any_of, list):
|
|
139
|
+
for entry in any_of:
|
|
140
|
+
if isinstance(entry, dict):
|
|
141
|
+
_ensure_schema_has_type(entry)
|
|
142
|
+
properties = schema.get("properties")
|
|
143
|
+
if isinstance(properties, dict):
|
|
144
|
+
for value in properties.values():
|
|
145
|
+
if isinstance(value, dict):
|
|
146
|
+
_ensure_schema_has_type(value)
|
|
147
|
+
items = schema.get("items")
|
|
148
|
+
if isinstance(items, dict):
|
|
149
|
+
_ensure_schema_has_type(items)
|
|
137
150
|
if "type" in schema or "$ref" in schema:
|
|
138
151
|
return
|
|
139
|
-
any_of = schema.get("anyOf")
|
|
140
152
|
if isinstance(any_of, list):
|
|
141
153
|
inferred_types: set[str] = set()
|
|
142
154
|
for entry in any_of:
|
|
@@ -162,6 +174,68 @@ def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
|
|
|
162
174
|
schema.update(_build_any_value_schema())
|
|
163
175
|
|
|
164
176
|
|
|
177
|
+
def _hydrate_ref_types(schema: dict[str, Any]) -> None:
|
|
178
|
+
"""Attach explicit types to $ref nodes when available.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
schema : dict[str, Any]
|
|
183
|
+
Schema dictionary to hydrate in place.
|
|
184
|
+
"""
|
|
185
|
+
definitions = schema.get("$defs") or schema.get("definitions") or {}
|
|
186
|
+
if not isinstance(definitions, dict):
|
|
187
|
+
definitions = {}
|
|
188
|
+
|
|
189
|
+
def _infer_enum_type(values: list[Any]) -> list[str] | str | None:
|
|
190
|
+
type_map = {
|
|
191
|
+
str: "string",
|
|
192
|
+
int: "integer",
|
|
193
|
+
float: "number",
|
|
194
|
+
bool: "boolean",
|
|
195
|
+
type(None): "null",
|
|
196
|
+
}
|
|
197
|
+
inferred: set[str] = set()
|
|
198
|
+
for value in values:
|
|
199
|
+
inferred_type = type_map.get(type(value))
|
|
200
|
+
if inferred_type is not None:
|
|
201
|
+
inferred.add(inferred_type)
|
|
202
|
+
if not inferred:
|
|
203
|
+
return None
|
|
204
|
+
if len(inferred) == 1:
|
|
205
|
+
return next(iter(inferred))
|
|
206
|
+
return sorted(inferred)
|
|
207
|
+
|
|
208
|
+
def _resolve_ref_type(ref: str) -> list[str] | str | None:
|
|
209
|
+
prefixes = ("#/$defs/", "#/definitions/")
|
|
210
|
+
if not ref.startswith(prefixes):
|
|
211
|
+
return None
|
|
212
|
+
key = ref.split("/", maxsplit=2)[-1]
|
|
213
|
+
definition = definitions.get(key)
|
|
214
|
+
if not isinstance(definition, dict):
|
|
215
|
+
return None
|
|
216
|
+
ref_type = definition.get("type")
|
|
217
|
+
if isinstance(ref_type, (str, list)):
|
|
218
|
+
return ref_type
|
|
219
|
+
enum_values = definition.get("enum")
|
|
220
|
+
if isinstance(enum_values, list):
|
|
221
|
+
return _infer_enum_type(enum_values)
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
def _walk(node: Any) -> None:
|
|
225
|
+
if isinstance(node, dict):
|
|
226
|
+
if "$ref" in node and "type" not in node:
|
|
227
|
+
ref_type = _resolve_ref_type(node["$ref"])
|
|
228
|
+
if ref_type is not None:
|
|
229
|
+
node["type"] = ref_type
|
|
230
|
+
for value in node.values():
|
|
231
|
+
_walk(value)
|
|
232
|
+
elif isinstance(node, list):
|
|
233
|
+
for item in node:
|
|
234
|
+
_walk(item)
|
|
235
|
+
|
|
236
|
+
_walk(schema)
|
|
237
|
+
|
|
238
|
+
|
|
165
239
|
class StructureBase(BaseModelJSONSerializable):
|
|
166
240
|
"""Base class for structured output models with schema generation.
|
|
167
241
|
|
|
@@ -471,7 +545,7 @@ class StructureBase(BaseModelJSONSerializable):
|
|
|
471
545
|
if isinstance(obj, dict):
|
|
472
546
|
if "$ref" in obj:
|
|
473
547
|
for key in list(obj.keys()):
|
|
474
|
-
if key
|
|
548
|
+
if key not in {"$ref", "type"}:
|
|
475
549
|
obj.pop(key, None)
|
|
476
550
|
for v in obj.values():
|
|
477
551
|
clean_refs(v)
|
|
@@ -482,60 +556,10 @@ class StructureBase(BaseModelJSONSerializable):
|
|
|
482
556
|
|
|
483
557
|
cleaned_schema = cast(dict[str, Any], clean_refs(schema))
|
|
484
558
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
root: dict[str, Any],
|
|
488
|
-
seen: set[str],
|
|
489
|
-
) -> dict[str, Any] | None:
|
|
490
|
-
if not ref.startswith("#/"):
|
|
491
|
-
return None
|
|
492
|
-
if ref in seen:
|
|
493
|
-
return None
|
|
494
|
-
seen.add(ref)
|
|
495
|
-
|
|
496
|
-
current: Any = root
|
|
497
|
-
for part in ref.lstrip("#/").split("/"):
|
|
498
|
-
part = part.replace("~1", "/").replace("~0", "~")
|
|
499
|
-
if isinstance(current, dict) and part in current:
|
|
500
|
-
current = current[part]
|
|
501
|
-
else:
|
|
502
|
-
seen.discard(ref)
|
|
503
|
-
return None
|
|
504
|
-
if isinstance(current, dict):
|
|
505
|
-
resolved = cast(dict[str, Any], json.loads(json.dumps(current)))
|
|
506
|
-
else:
|
|
507
|
-
resolved = None
|
|
508
|
-
seen.discard(ref)
|
|
509
|
-
return resolved
|
|
510
|
-
|
|
511
|
-
def _inline_anyof_refs(obj: Any, root: dict[str, Any], seen: set[str]) -> Any:
|
|
512
|
-
if isinstance(obj, dict):
|
|
513
|
-
updated: dict[str, Any] = {}
|
|
514
|
-
for key, value in obj.items():
|
|
515
|
-
if key == "anyOf" and isinstance(value, list):
|
|
516
|
-
updated_items = []
|
|
517
|
-
for item in value:
|
|
518
|
-
if (
|
|
519
|
-
isinstance(item, dict)
|
|
520
|
-
and "$ref" in item
|
|
521
|
-
and "type" not in item
|
|
522
|
-
):
|
|
523
|
-
resolved = _resolve_ref(item["$ref"], root, seen)
|
|
524
|
-
if resolved is not None:
|
|
525
|
-
item = resolved
|
|
526
|
-
updated_items.append(_inline_anyof_refs(item, root, seen))
|
|
527
|
-
updated[key] = updated_items
|
|
528
|
-
else:
|
|
529
|
-
updated[key] = _inline_anyof_refs(value, root, seen)
|
|
530
|
-
return updated
|
|
531
|
-
if isinstance(obj, list):
|
|
532
|
-
return [_inline_anyof_refs(item, root, seen) for item in obj]
|
|
533
|
-
return obj
|
|
534
|
-
|
|
535
|
-
cleaned_schema = cast(
|
|
536
|
-
dict[str, Any], _inline_anyof_refs(cleaned_schema, schema, set())
|
|
537
|
-
)
|
|
559
|
+
cleaned_schema = cast(dict[str, Any], cleaned_schema)
|
|
560
|
+
_hydrate_ref_types(cleaned_schema)
|
|
538
561
|
_ensure_items_have_schema(cleaned_schema)
|
|
562
|
+
_ensure_schema_has_type(cleaned_schema)
|
|
539
563
|
|
|
540
564
|
nullable_fields = {
|
|
541
565
|
name
|