openai-sdk-helpers 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/agent/__init__.py +2 -0
- openai_sdk_helpers/agent/base.py +13 -5
- openai_sdk_helpers/agent/classifier.py +848 -0
- openai_sdk_helpers/prompt/classifier.jinja +31 -0
- openai_sdk_helpers/response/base.py +26 -7
- openai_sdk_helpers/settings.py +65 -0
- openai_sdk_helpers/structure/__init__.py +12 -0
- openai_sdk_helpers/structure/base.py +79 -55
- openai_sdk_helpers/structure/classification.py +453 -0
- openai_sdk_helpers/structure/plan/enum.py +4 -0
- {openai_sdk_helpers-0.5.2.dist-info → openai_sdk_helpers-0.6.1.dist-info}/METADATA +12 -1
- {openai_sdk_helpers-0.5.2.dist-info → openai_sdk_helpers-0.6.1.dist-info}/RECORD +15 -12
- {openai_sdk_helpers-0.5.2.dist-info → openai_sdk_helpers-0.6.1.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.5.2.dist-info → openai_sdk_helpers-0.6.1.dist-info}/entry_points.txt +0 -0
- {openai_sdk_helpers-0.5.2.dist-info → openai_sdk_helpers-0.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
You are a taxonomy classification assistant.
|
|
2
|
+
|
|
3
|
+
Instructions:
|
|
4
|
+
- Review the text and select all matching taxonomy nodes from the list.
|
|
5
|
+
- Populate selected_nodes as a list of taxonomy node ids for multi-class matches.
|
|
6
|
+
- Use selected_node when a single best match is appropriate.
|
|
7
|
+
- Provide a confidence score between 0 and 1 for the selections; higher means more certain.
|
|
8
|
+
- Use only taxonomy identifiers from the candidate list for any selections.
|
|
9
|
+
- Use the stop_reason enum values only: "continue", "stop", "no_match", "max_depth", "no_children".
|
|
10
|
+
- If a child level should be explored, set stop_reason to "continue".
|
|
11
|
+
- If no appropriate node exists, set stop_reason to "no_match" and leave selections empty.
|
|
12
|
+
- If you are confident this is the final level, set stop_reason to "stop".
|
|
13
|
+
- Provide a concise rationale in one or two sentences.
|
|
14
|
+
|
|
15
|
+
Current depth: {{ depth }}
|
|
16
|
+
|
|
17
|
+
Previous path:
|
|
18
|
+
{% if path %}
|
|
19
|
+
{% for step in path %}
|
|
20
|
+
- {{ step.selected_node }} (confidence={{ step.confidence }}, stop_reason={{ step.stop_reason }})
|
|
21
|
+
{% endfor %}
|
|
22
|
+
{% else %}
|
|
23
|
+
- None
|
|
24
|
+
{% endif %}
|
|
25
|
+
|
|
26
|
+
Candidate taxonomy nodes:
|
|
27
|
+
{% for node in taxonomy_nodes %}
|
|
28
|
+
- identifier: {{ node.identifier }}
|
|
29
|
+
label: {{ node.label }}
|
|
30
|
+
description: {{ node.description or "None" }}
|
|
31
|
+
{% endfor %}
|
|
@@ -509,6 +509,7 @@ class ResponseBase(Generic[T]):
|
|
|
509
509
|
content: str | list[str],
|
|
510
510
|
files: str | list[str] | None = None,
|
|
511
511
|
use_vector_store: bool = False,
|
|
512
|
+
save_messages: bool = True,
|
|
512
513
|
) -> T | str:
|
|
513
514
|
"""Generate a response asynchronously from the OpenAI API.
|
|
514
515
|
|
|
@@ -531,6 +532,9 @@ class ResponseBase(Generic[T]):
|
|
|
531
532
|
use_vector_store : bool, default False
|
|
532
533
|
If True, non-image files are uploaded to a vector store
|
|
533
534
|
for RAG-enabled search instead of inline base64 encoding.
|
|
535
|
+
save_messages : bool, default True
|
|
536
|
+
When True, persist the message history after each response or
|
|
537
|
+
tool call.
|
|
534
538
|
|
|
535
539
|
Returns
|
|
536
540
|
-------
|
|
@@ -621,7 +625,8 @@ class ResponseBase(Generic[T]):
|
|
|
621
625
|
self.messages.add_tool_message(
|
|
622
626
|
content=response_output, output=tool_output
|
|
623
627
|
)
|
|
624
|
-
|
|
628
|
+
if save_messages:
|
|
629
|
+
self.save()
|
|
625
630
|
except Exception as exc:
|
|
626
631
|
log(
|
|
627
632
|
f"Error executing tool handler '{tool_name}': {exc}",
|
|
@@ -646,7 +651,8 @@ class ResponseBase(Generic[T]):
|
|
|
646
651
|
self.messages.add_assistant_message(
|
|
647
652
|
response_output, metadata=kwargs
|
|
648
653
|
)
|
|
649
|
-
|
|
654
|
+
if save_messages:
|
|
655
|
+
self.save()
|
|
650
656
|
if hasattr(response, "output_text") and response.output_text:
|
|
651
657
|
raw_text = response.output_text
|
|
652
658
|
log("No tool call. Parsing output_text.")
|
|
@@ -682,6 +688,7 @@ class ResponseBase(Generic[T]):
|
|
|
682
688
|
*,
|
|
683
689
|
files: str | list[str] | None = None,
|
|
684
690
|
use_vector_store: bool = False,
|
|
691
|
+
save_messages: bool = True,
|
|
685
692
|
) -> T | str:
|
|
686
693
|
"""Execute run_async synchronously with proper event loop handling.
|
|
687
694
|
|
|
@@ -704,6 +711,9 @@ class ResponseBase(Generic[T]):
|
|
|
704
711
|
use_vector_store : bool, default False
|
|
705
712
|
If True, non-image files are uploaded to a vector store
|
|
706
713
|
for RAG-enabled search instead of inline base64 encoding.
|
|
714
|
+
save_messages : bool, default True
|
|
715
|
+
When True, persist the message history after each response or
|
|
716
|
+
tool call.
|
|
707
717
|
|
|
708
718
|
Returns
|
|
709
719
|
-------
|
|
@@ -739,6 +749,7 @@ class ResponseBase(Generic[T]):
|
|
|
739
749
|
content=content,
|
|
740
750
|
files=files,
|
|
741
751
|
use_vector_store=use_vector_store,
|
|
752
|
+
save_messages=save_messages,
|
|
742
753
|
)
|
|
743
754
|
|
|
744
755
|
try:
|
|
@@ -871,9 +882,11 @@ class ResponseBase(Generic[T]):
|
|
|
871
882
|
|
|
872
883
|
Notes
|
|
873
884
|
-----
|
|
874
|
-
If no filepath is provided, the save operation
|
|
875
|
-
|
|
876
|
-
|
|
885
|
+
If no filepath is provided, the save operation writes to the
|
|
886
|
+
session data path. If the configured data path already ends with
|
|
887
|
+
the response name, it writes to data_path / uuid.json. Otherwise,
|
|
888
|
+
it writes to data_path / name / uuid.json. The data path is
|
|
889
|
+
configured during initialization and defaults to get_data_path().
|
|
877
890
|
|
|
878
891
|
Raises
|
|
879
892
|
------
|
|
@@ -889,7 +902,7 @@ class ResponseBase(Generic[T]):
|
|
|
889
902
|
target = Path(filepath)
|
|
890
903
|
else:
|
|
891
904
|
filename = f"{str(self.uuid).lower()}.json"
|
|
892
|
-
target = self.
|
|
905
|
+
target = self._session_path(filename)
|
|
893
906
|
|
|
894
907
|
checked = check_filepath(filepath=target)
|
|
895
908
|
self.messages.to_json_file(str(checked))
|
|
@@ -919,12 +932,18 @@ class ResponseBase(Generic[T]):
|
|
|
919
932
|
traceback.format_exception(type(exc), exc, exc.__traceback__)
|
|
920
933
|
)
|
|
921
934
|
filename = f"{str(self.uuid).lower()}_error.txt"
|
|
922
|
-
target = self.
|
|
935
|
+
target = self._session_path(filename)
|
|
923
936
|
checked = check_filepath(filepath=target)
|
|
924
937
|
checked.write_text(error_text, encoding="utf-8")
|
|
925
938
|
log(f"Saved error details to {checked}")
|
|
926
939
|
return checked
|
|
927
940
|
|
|
941
|
+
def _session_path(self, filename: str) -> Path:
|
|
942
|
+
"""Return the resolved session filepath for a given filename."""
|
|
943
|
+
if self._data_path.name == self._name:
|
|
944
|
+
return self._data_path / filename
|
|
945
|
+
return self._data_path / self._name / filename
|
|
946
|
+
|
|
928
947
|
def __repr__(self) -> str:
|
|
929
948
|
"""Return a detailed string representation of the response session.
|
|
930
949
|
|
openai_sdk_helpers/settings.py
CHANGED
|
@@ -48,6 +48,8 @@ class OpenAISettings(BaseModel):
|
|
|
48
48
|
-------
|
|
49
49
|
from_env(dotenv_path, **overrides)
|
|
50
50
|
Build settings from environment variables and optional overrides.
|
|
51
|
+
from_secrets(secrets, **overrides)
|
|
52
|
+
Build settings from a secrets mapping and optional overrides.
|
|
51
53
|
client_kwargs()
|
|
52
54
|
Return keyword arguments for ``OpenAI`` initialization.
|
|
53
55
|
create_client()
|
|
@@ -190,6 +192,69 @@ class OpenAISettings(BaseModel):
|
|
|
190
192
|
|
|
191
193
|
return settings
|
|
192
194
|
|
|
195
|
+
@classmethod
|
|
196
|
+
def from_secrets(
|
|
197
|
+
cls,
|
|
198
|
+
secrets: Mapping[str, Any] | None = None,
|
|
199
|
+
**overrides: Any,
|
|
200
|
+
) -> OpenAISettings:
|
|
201
|
+
"""Load settings from a secrets mapping and optional overrides.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
secrets : Mapping[str, Any] or None, optional
|
|
206
|
+
Mapping of secret values keyed by environment variable names.
|
|
207
|
+
Defaults to environment variables.
|
|
208
|
+
overrides : Any
|
|
209
|
+
Keyword overrides applied on top of secret values.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
OpenAISettings
|
|
214
|
+
Settings instance populated from secret values and overrides.
|
|
215
|
+
|
|
216
|
+
Raises
|
|
217
|
+
------
|
|
218
|
+
ValueError
|
|
219
|
+
If OPENAI_API_KEY is not found in the secrets mapping.
|
|
220
|
+
"""
|
|
221
|
+
secret_values: Mapping[str, Any] = secrets or os.environ
|
|
222
|
+
|
|
223
|
+
def first_non_none(*candidates: Any) -> Any:
|
|
224
|
+
for candidate in candidates:
|
|
225
|
+
if candidate is not None:
|
|
226
|
+
return candidate
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
def resolve_value(override_key: str, secret_key: str) -> Any:
|
|
230
|
+
return first_non_none(
|
|
231
|
+
overrides.get(override_key),
|
|
232
|
+
secret_values.get(secret_key),
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
timeout_raw = resolve_value("timeout", "OPENAI_TIMEOUT")
|
|
236
|
+
max_retries_raw = resolve_value("max_retries", "OPENAI_MAX_RETRIES")
|
|
237
|
+
|
|
238
|
+
values: dict[str, Any] = {
|
|
239
|
+
"api_key": resolve_value("api_key", "OPENAI_API_KEY"),
|
|
240
|
+
"org_id": resolve_value("org_id", "OPENAI_ORG_ID"),
|
|
241
|
+
"project_id": resolve_value("project_id", "OPENAI_PROJECT_ID"),
|
|
242
|
+
"base_url": resolve_value("base_url", "OPENAI_BASE_URL"),
|
|
243
|
+
"default_model": resolve_value("default_model", "OPENAI_MODEL"),
|
|
244
|
+
"timeout": coerce_optional_float(timeout_raw),
|
|
245
|
+
"max_retries": coerce_optional_int(max_retries_raw),
|
|
246
|
+
"extra_client_kwargs": coerce_dict(overrides.get("extra_client_kwargs")),
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
settings = cls(**values)
|
|
250
|
+
if not settings.api_key:
|
|
251
|
+
raise ValueError(
|
|
252
|
+
"OPENAI_API_KEY is required to configure the OpenAI client"
|
|
253
|
+
" and was not found in secrets."
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return settings
|
|
257
|
+
|
|
193
258
|
def client_kwargs(self) -> dict[str, Any]:
|
|
194
259
|
"""Return keyword arguments for constructing an OpenAI client.
|
|
195
260
|
|
|
@@ -76,6 +76,13 @@ from __future__ import annotations
|
|
|
76
76
|
|
|
77
77
|
from .agent_blueprint import AgentBlueprint
|
|
78
78
|
from .base import *
|
|
79
|
+
from .classification import (
|
|
80
|
+
ClassificationResult,
|
|
81
|
+
ClassificationStep,
|
|
82
|
+
ClassificationStopReason,
|
|
83
|
+
TaxonomyNode,
|
|
84
|
+
flatten_taxonomy,
|
|
85
|
+
)
|
|
79
86
|
from .extraction import (
|
|
80
87
|
AnnotatedDocumentStructure,
|
|
81
88
|
AttributeStructure,
|
|
@@ -98,6 +105,11 @@ __all__ = [
|
|
|
98
105
|
"spec_field",
|
|
99
106
|
"AgentBlueprint",
|
|
100
107
|
"AgentEnum",
|
|
108
|
+
"ClassificationResult",
|
|
109
|
+
"ClassificationStep",
|
|
110
|
+
"ClassificationStopReason",
|
|
111
|
+
"TaxonomyNode",
|
|
112
|
+
"flatten_taxonomy",
|
|
101
113
|
"TaskStructure",
|
|
102
114
|
"PlanStructure",
|
|
103
115
|
"create_plan",
|
|
@@ -134,9 +134,21 @@ def _ensure_items_have_schema(target: Any) -> None:
|
|
|
134
134
|
|
|
135
135
|
def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
|
|
136
136
|
"""Ensure a schema dictionary includes a type entry when possible."""
|
|
137
|
+
any_of = schema.get("anyOf")
|
|
138
|
+
if isinstance(any_of, list):
|
|
139
|
+
for entry in any_of:
|
|
140
|
+
if isinstance(entry, dict):
|
|
141
|
+
_ensure_schema_has_type(entry)
|
|
142
|
+
properties = schema.get("properties")
|
|
143
|
+
if isinstance(properties, dict):
|
|
144
|
+
for value in properties.values():
|
|
145
|
+
if isinstance(value, dict):
|
|
146
|
+
_ensure_schema_has_type(value)
|
|
147
|
+
items = schema.get("items")
|
|
148
|
+
if isinstance(items, dict):
|
|
149
|
+
_ensure_schema_has_type(items)
|
|
137
150
|
if "type" in schema or "$ref" in schema:
|
|
138
151
|
return
|
|
139
|
-
any_of = schema.get("anyOf")
|
|
140
152
|
if isinstance(any_of, list):
|
|
141
153
|
inferred_types: set[str] = set()
|
|
142
154
|
for entry in any_of:
|
|
@@ -162,6 +174,68 @@ def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
|
|
|
162
174
|
schema.update(_build_any_value_schema())
|
|
163
175
|
|
|
164
176
|
|
|
177
|
+
def _hydrate_ref_types(schema: dict[str, Any]) -> None:
|
|
178
|
+
"""Attach explicit types to $ref nodes when available.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
schema : dict[str, Any]
|
|
183
|
+
Schema dictionary to hydrate in place.
|
|
184
|
+
"""
|
|
185
|
+
definitions = schema.get("$defs") or schema.get("definitions") or {}
|
|
186
|
+
if not isinstance(definitions, dict):
|
|
187
|
+
definitions = {}
|
|
188
|
+
|
|
189
|
+
def _infer_enum_type(values: list[Any]) -> list[str] | str | None:
|
|
190
|
+
type_map = {
|
|
191
|
+
str: "string",
|
|
192
|
+
int: "integer",
|
|
193
|
+
float: "number",
|
|
194
|
+
bool: "boolean",
|
|
195
|
+
type(None): "null",
|
|
196
|
+
}
|
|
197
|
+
inferred: set[str] = set()
|
|
198
|
+
for value in values:
|
|
199
|
+
inferred_type = type_map.get(type(value))
|
|
200
|
+
if inferred_type is not None:
|
|
201
|
+
inferred.add(inferred_type)
|
|
202
|
+
if not inferred:
|
|
203
|
+
return None
|
|
204
|
+
if len(inferred) == 1:
|
|
205
|
+
return next(iter(inferred))
|
|
206
|
+
return sorted(inferred)
|
|
207
|
+
|
|
208
|
+
def _resolve_ref_type(ref: str) -> list[str] | str | None:
|
|
209
|
+
prefixes = ("#/$defs/", "#/definitions/")
|
|
210
|
+
if not ref.startswith(prefixes):
|
|
211
|
+
return None
|
|
212
|
+
key = ref.split("/", maxsplit=2)[-1]
|
|
213
|
+
definition = definitions.get(key)
|
|
214
|
+
if not isinstance(definition, dict):
|
|
215
|
+
return None
|
|
216
|
+
ref_type = definition.get("type")
|
|
217
|
+
if isinstance(ref_type, (str, list)):
|
|
218
|
+
return ref_type
|
|
219
|
+
enum_values = definition.get("enum")
|
|
220
|
+
if isinstance(enum_values, list):
|
|
221
|
+
return _infer_enum_type(enum_values)
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
def _walk(node: Any) -> None:
|
|
225
|
+
if isinstance(node, dict):
|
|
226
|
+
if "$ref" in node and "type" not in node:
|
|
227
|
+
ref_type = _resolve_ref_type(node["$ref"])
|
|
228
|
+
if ref_type is not None:
|
|
229
|
+
node["type"] = ref_type
|
|
230
|
+
for value in node.values():
|
|
231
|
+
_walk(value)
|
|
232
|
+
elif isinstance(node, list):
|
|
233
|
+
for item in node:
|
|
234
|
+
_walk(item)
|
|
235
|
+
|
|
236
|
+
_walk(schema)
|
|
237
|
+
|
|
238
|
+
|
|
165
239
|
class StructureBase(BaseModelJSONSerializable):
|
|
166
240
|
"""Base class for structured output models with schema generation.
|
|
167
241
|
|
|
@@ -471,7 +545,7 @@ class StructureBase(BaseModelJSONSerializable):
|
|
|
471
545
|
if isinstance(obj, dict):
|
|
472
546
|
if "$ref" in obj:
|
|
473
547
|
for key in list(obj.keys()):
|
|
474
|
-
if key
|
|
548
|
+
if key not in {"$ref", "type"}:
|
|
475
549
|
obj.pop(key, None)
|
|
476
550
|
for v in obj.values():
|
|
477
551
|
clean_refs(v)
|
|
@@ -482,60 +556,10 @@ class StructureBase(BaseModelJSONSerializable):
|
|
|
482
556
|
|
|
483
557
|
cleaned_schema = cast(dict[str, Any], clean_refs(schema))
|
|
484
558
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
root: dict[str, Any],
|
|
488
|
-
seen: set[str],
|
|
489
|
-
) -> dict[str, Any] | None:
|
|
490
|
-
if not ref.startswith("#/"):
|
|
491
|
-
return None
|
|
492
|
-
if ref in seen:
|
|
493
|
-
return None
|
|
494
|
-
seen.add(ref)
|
|
495
|
-
|
|
496
|
-
current: Any = root
|
|
497
|
-
for part in ref.lstrip("#/").split("/"):
|
|
498
|
-
part = part.replace("~1", "/").replace("~0", "~")
|
|
499
|
-
if isinstance(current, dict) and part in current:
|
|
500
|
-
current = current[part]
|
|
501
|
-
else:
|
|
502
|
-
seen.discard(ref)
|
|
503
|
-
return None
|
|
504
|
-
if isinstance(current, dict):
|
|
505
|
-
resolved = cast(dict[str, Any], json.loads(json.dumps(current)))
|
|
506
|
-
else:
|
|
507
|
-
resolved = None
|
|
508
|
-
seen.discard(ref)
|
|
509
|
-
return resolved
|
|
510
|
-
|
|
511
|
-
def _inline_anyof_refs(obj: Any, root: dict[str, Any], seen: set[str]) -> Any:
|
|
512
|
-
if isinstance(obj, dict):
|
|
513
|
-
updated: dict[str, Any] = {}
|
|
514
|
-
for key, value in obj.items():
|
|
515
|
-
if key == "anyOf" and isinstance(value, list):
|
|
516
|
-
updated_items = []
|
|
517
|
-
for item in value:
|
|
518
|
-
if (
|
|
519
|
-
isinstance(item, dict)
|
|
520
|
-
and "$ref" in item
|
|
521
|
-
and "type" not in item
|
|
522
|
-
):
|
|
523
|
-
resolved = _resolve_ref(item["$ref"], root, seen)
|
|
524
|
-
if resolved is not None:
|
|
525
|
-
item = resolved
|
|
526
|
-
updated_items.append(_inline_anyof_refs(item, root, seen))
|
|
527
|
-
updated[key] = updated_items
|
|
528
|
-
else:
|
|
529
|
-
updated[key] = _inline_anyof_refs(value, root, seen)
|
|
530
|
-
return updated
|
|
531
|
-
if isinstance(obj, list):
|
|
532
|
-
return [_inline_anyof_refs(item, root, seen) for item in obj]
|
|
533
|
-
return obj
|
|
534
|
-
|
|
535
|
-
cleaned_schema = cast(
|
|
536
|
-
dict[str, Any], _inline_anyof_refs(cleaned_schema, schema, set())
|
|
537
|
-
)
|
|
559
|
+
cleaned_schema = cast(dict[str, Any], cleaned_schema)
|
|
560
|
+
_hydrate_ref_types(cleaned_schema)
|
|
538
561
|
_ensure_items_have_schema(cleaned_schema)
|
|
562
|
+
_ensure_schema_has_type(cleaned_schema)
|
|
539
563
|
|
|
540
564
|
nullable_fields = {
|
|
541
565
|
name
|