openai-sdk-helpers 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ You are a taxonomy classification assistant.
2
+
3
+ Instructions:
4
+ - Review the text and select all matching taxonomy nodes from the list.
5
+ - Populate selected_nodes as a list of taxonomy node ids for multi-class matches.
6
+ - Use selected_node when a single best match is appropriate.
7
+ - Provide a confidence score between 0 and 1 for the selections; higher means more certain.
8
+ - Use only taxonomy identifiers from the candidate list for any selections.
9
+ - Use the stop_reason enum values only: "continue", "stop", "no_match", "max_depth", "no_children".
10
+ - If a child level should be explored, set stop_reason to "continue".
11
+ - If no appropriate node exists, set stop_reason to "no_match" and leave selections empty.
12
+ - If you are confident this is the final level, set stop_reason to "stop".
13
+ - Provide a concise rationale in one or two sentences.
14
+
15
+ Current depth: {{ depth }}
16
+
17
+ Previous path:
18
+ {% if path %}
19
+ {% for step in path %}
20
+ - {{ step.selected_node }} (confidence={{ step.confidence }}, stop_reason={{ step.stop_reason }})
21
+ {% endfor %}
22
+ {% else %}
23
+ - None
24
+ {% endif %}
25
+
26
+ Candidate taxonomy nodes:
27
+ {% for node in taxonomy_nodes %}
28
+ - identifier: {{ node.identifier }}
29
+ label: {{ node.label }}
30
+ description: {{ node.description or "None" }}
31
+ {% endfor %}
@@ -509,6 +509,7 @@ class ResponseBase(Generic[T]):
509
509
  content: str | list[str],
510
510
  files: str | list[str] | None = None,
511
511
  use_vector_store: bool = False,
512
+ save_messages: bool = True,
512
513
  ) -> T | str:
513
514
  """Generate a response asynchronously from the OpenAI API.
514
515
 
@@ -531,6 +532,9 @@ class ResponseBase(Generic[T]):
531
532
  use_vector_store : bool, default False
532
533
  If True, non-image files are uploaded to a vector store
533
534
  for RAG-enabled search instead of inline base64 encoding.
535
+ save_messages : bool, default True
536
+ When True, persist the message history after each response or
537
+ tool call.
534
538
 
535
539
  Returns
536
540
  -------
@@ -621,7 +625,8 @@ class ResponseBase(Generic[T]):
621
625
  self.messages.add_tool_message(
622
626
  content=response_output, output=tool_output
623
627
  )
624
- self.save()
628
+ if save_messages:
629
+ self.save()
625
630
  except Exception as exc:
626
631
  log(
627
632
  f"Error executing tool handler '{tool_name}': {exc}",
@@ -646,7 +651,8 @@ class ResponseBase(Generic[T]):
646
651
  self.messages.add_assistant_message(
647
652
  response_output, metadata=kwargs
648
653
  )
649
- self.save()
654
+ if save_messages:
655
+ self.save()
650
656
  if hasattr(response, "output_text") and response.output_text:
651
657
  raw_text = response.output_text
652
658
  log("No tool call. Parsing output_text.")
@@ -682,6 +688,7 @@ class ResponseBase(Generic[T]):
682
688
  *,
683
689
  files: str | list[str] | None = None,
684
690
  use_vector_store: bool = False,
691
+ save_messages: bool = True,
685
692
  ) -> T | str:
686
693
  """Execute run_async synchronously with proper event loop handling.
687
694
 
@@ -704,6 +711,9 @@ class ResponseBase(Generic[T]):
704
711
  use_vector_store : bool, default False
705
712
  If True, non-image files are uploaded to a vector store
706
713
  for RAG-enabled search instead of inline base64 encoding.
714
+ save_messages : bool, default True
715
+ When True, persist the message history after each response or
716
+ tool call.
707
717
 
708
718
  Returns
709
719
  -------
@@ -739,6 +749,7 @@ class ResponseBase(Generic[T]):
739
749
  content=content,
740
750
  files=files,
741
751
  use_vector_store=use_vector_store,
752
+ save_messages=save_messages,
742
753
  )
743
754
 
744
755
  try:
@@ -871,9 +882,11 @@ class ResponseBase(Generic[T]):
871
882
 
872
883
  Notes
873
884
  -----
874
- If no filepath is provided, the save operation always writes to
875
- the session data path (data_path / name / uuid.json). The data path
876
- is configured during initialization and defaults to get_data_path().
885
+ If no filepath is provided, the save operation writes to the
886
+ session data path. If the configured data path already ends with
887
+ the response name, it writes to data_path / uuid.json. Otherwise,
888
+ it writes to data_path / name / uuid.json. The data path is
889
+ configured during initialization and defaults to get_data_path().
877
890
 
878
891
  Raises
879
892
  ------
@@ -889,7 +902,7 @@ class ResponseBase(Generic[T]):
889
902
  target = Path(filepath)
890
903
  else:
891
904
  filename = f"{str(self.uuid).lower()}.json"
892
- target = self._data_path / self._name / filename
905
+ target = self._session_path(filename)
893
906
 
894
907
  checked = check_filepath(filepath=target)
895
908
  self.messages.to_json_file(str(checked))
@@ -919,12 +932,18 @@ class ResponseBase(Generic[T]):
919
932
  traceback.format_exception(type(exc), exc, exc.__traceback__)
920
933
  )
921
934
  filename = f"{str(self.uuid).lower()}_error.txt"
922
- target = self._data_path / self._name / filename
935
+ target = self._session_path(filename)
923
936
  checked = check_filepath(filepath=target)
924
937
  checked.write_text(error_text, encoding="utf-8")
925
938
  log(f"Saved error details to {checked}")
926
939
  return checked
927
940
 
941
+ def _session_path(self, filename: str) -> Path:
942
+ """Return the resolved session filepath for a given filename."""
943
+ if self._data_path.name == self._name:
944
+ return self._data_path / filename
945
+ return self._data_path / self._name / filename
946
+
928
947
  def __repr__(self) -> str:
929
948
  """Return a detailed string representation of the response session.
930
949
 
@@ -48,6 +48,8 @@ class OpenAISettings(BaseModel):
48
48
  -------
49
49
  from_env(dotenv_path, **overrides)
50
50
  Build settings from environment variables and optional overrides.
51
+ from_secrets(secrets, **overrides)
52
+ Build settings from a secrets mapping and optional overrides.
51
53
  client_kwargs()
52
54
  Return keyword arguments for ``OpenAI`` initialization.
53
55
  create_client()
@@ -190,6 +192,69 @@ class OpenAISettings(BaseModel):
190
192
 
191
193
  return settings
192
194
 
195
+ @classmethod
196
+ def from_secrets(
197
+ cls,
198
+ secrets: Mapping[str, Any] | None = None,
199
+ **overrides: Any,
200
+ ) -> OpenAISettings:
201
+ """Load settings from a secrets mapping and optional overrides.
202
+
203
+ Parameters
204
+ ----------
205
+ secrets : Mapping[str, Any] or None, optional
206
+ Mapping of secret values keyed by environment variable names.
207
+ Defaults to environment variables.
208
+ overrides : Any
209
+ Keyword overrides applied on top of secret values.
210
+
211
+ Returns
212
+ -------
213
+ OpenAISettings
214
+ Settings instance populated from secret values and overrides.
215
+
216
+ Raises
217
+ ------
218
+ ValueError
219
+ If OPENAI_API_KEY is not found in the secrets mapping.
220
+ """
221
+ secret_values: Mapping[str, Any] = secrets or os.environ
222
+
223
+ def first_non_none(*candidates: Any) -> Any:
224
+ for candidate in candidates:
225
+ if candidate is not None:
226
+ return candidate
227
+ return None
228
+
229
+ def resolve_value(override_key: str, secret_key: str) -> Any:
230
+ return first_non_none(
231
+ overrides.get(override_key),
232
+ secret_values.get(secret_key),
233
+ )
234
+
235
+ timeout_raw = resolve_value("timeout", "OPENAI_TIMEOUT")
236
+ max_retries_raw = resolve_value("max_retries", "OPENAI_MAX_RETRIES")
237
+
238
+ values: dict[str, Any] = {
239
+ "api_key": resolve_value("api_key", "OPENAI_API_KEY"),
240
+ "org_id": resolve_value("org_id", "OPENAI_ORG_ID"),
241
+ "project_id": resolve_value("project_id", "OPENAI_PROJECT_ID"),
242
+ "base_url": resolve_value("base_url", "OPENAI_BASE_URL"),
243
+ "default_model": resolve_value("default_model", "OPENAI_MODEL"),
244
+ "timeout": coerce_optional_float(timeout_raw),
245
+ "max_retries": coerce_optional_int(max_retries_raw),
246
+ "extra_client_kwargs": coerce_dict(overrides.get("extra_client_kwargs")),
247
+ }
248
+
249
+ settings = cls(**values)
250
+ if not settings.api_key:
251
+ raise ValueError(
252
+ "OPENAI_API_KEY is required to configure the OpenAI client"
253
+ " and was not found in secrets."
254
+ )
255
+
256
+ return settings
257
+
193
258
  def client_kwargs(self) -> dict[str, Any]:
194
259
  """Return keyword arguments for constructing an OpenAI client.
195
260
 
@@ -76,6 +76,13 @@ from __future__ import annotations
76
76
 
77
77
  from .agent_blueprint import AgentBlueprint
78
78
  from .base import *
79
+ from .classification import (
80
+ ClassificationResult,
81
+ ClassificationStep,
82
+ ClassificationStopReason,
83
+ TaxonomyNode,
84
+ flatten_taxonomy,
85
+ )
79
86
  from .extraction import (
80
87
  AnnotatedDocumentStructure,
81
88
  AttributeStructure,
@@ -98,6 +105,11 @@ __all__ = [
98
105
  "spec_field",
99
106
  "AgentBlueprint",
100
107
  "AgentEnum",
108
+ "ClassificationResult",
109
+ "ClassificationStep",
110
+ "ClassificationStopReason",
111
+ "TaxonomyNode",
112
+ "flatten_taxonomy",
101
113
  "TaskStructure",
102
114
  "PlanStructure",
103
115
  "create_plan",
@@ -134,9 +134,21 @@ def _ensure_items_have_schema(target: Any) -> None:
134
134
 
135
135
  def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
136
136
  """Ensure a schema dictionary includes a type entry when possible."""
137
+ any_of = schema.get("anyOf")
138
+ if isinstance(any_of, list):
139
+ for entry in any_of:
140
+ if isinstance(entry, dict):
141
+ _ensure_schema_has_type(entry)
142
+ properties = schema.get("properties")
143
+ if isinstance(properties, dict):
144
+ for value in properties.values():
145
+ if isinstance(value, dict):
146
+ _ensure_schema_has_type(value)
147
+ items = schema.get("items")
148
+ if isinstance(items, dict):
149
+ _ensure_schema_has_type(items)
137
150
  if "type" in schema or "$ref" in schema:
138
151
  return
139
- any_of = schema.get("anyOf")
140
152
  if isinstance(any_of, list):
141
153
  inferred_types: set[str] = set()
142
154
  for entry in any_of:
@@ -162,6 +174,68 @@ def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
162
174
  schema.update(_build_any_value_schema())
163
175
 
164
176
 
177
+ def _hydrate_ref_types(schema: dict[str, Any]) -> None:
178
+ """Attach explicit types to $ref nodes when available.
179
+
180
+ Parameters
181
+ ----------
182
+ schema : dict[str, Any]
183
+ Schema dictionary to hydrate in place.
184
+ """
185
+ definitions = schema.get("$defs") or schema.get("definitions") or {}
186
+ if not isinstance(definitions, dict):
187
+ definitions = {}
188
+
189
+ def _infer_enum_type(values: list[Any]) -> list[str] | str | None:
190
+ type_map = {
191
+ str: "string",
192
+ int: "integer",
193
+ float: "number",
194
+ bool: "boolean",
195
+ type(None): "null",
196
+ }
197
+ inferred: set[str] = set()
198
+ for value in values:
199
+ inferred_type = type_map.get(type(value))
200
+ if inferred_type is not None:
201
+ inferred.add(inferred_type)
202
+ if not inferred:
203
+ return None
204
+ if len(inferred) == 1:
205
+ return next(iter(inferred))
206
+ return sorted(inferred)
207
+
208
+ def _resolve_ref_type(ref: str) -> list[str] | str | None:
209
+ prefixes = ("#/$defs/", "#/definitions/")
210
+ if not ref.startswith(prefixes):
211
+ return None
212
+ key = ref.split("/", maxsplit=2)[-1]
213
+ definition = definitions.get(key)
214
+ if not isinstance(definition, dict):
215
+ return None
216
+ ref_type = definition.get("type")
217
+ if isinstance(ref_type, (str, list)):
218
+ return ref_type
219
+ enum_values = definition.get("enum")
220
+ if isinstance(enum_values, list):
221
+ return _infer_enum_type(enum_values)
222
+ return None
223
+
224
+ def _walk(node: Any) -> None:
225
+ if isinstance(node, dict):
226
+ if "$ref" in node and "type" not in node:
227
+ ref_type = _resolve_ref_type(node["$ref"])
228
+ if ref_type is not None:
229
+ node["type"] = ref_type
230
+ for value in node.values():
231
+ _walk(value)
232
+ elif isinstance(node, list):
233
+ for item in node:
234
+ _walk(item)
235
+
236
+ _walk(schema)
237
+
238
+
165
239
  class StructureBase(BaseModelJSONSerializable):
166
240
  """Base class for structured output models with schema generation.
167
241
 
@@ -471,7 +545,7 @@ class StructureBase(BaseModelJSONSerializable):
471
545
  if isinstance(obj, dict):
472
546
  if "$ref" in obj:
473
547
  for key in list(obj.keys()):
474
- if key != "$ref":
548
+ if key not in {"$ref", "type"}:
475
549
  obj.pop(key, None)
476
550
  for v in obj.values():
477
551
  clean_refs(v)
@@ -482,60 +556,10 @@ class StructureBase(BaseModelJSONSerializable):
482
556
 
483
557
  cleaned_schema = cast(dict[str, Any], clean_refs(schema))
484
558
 
485
- def _resolve_ref(
486
- ref: str,
487
- root: dict[str, Any],
488
- seen: set[str],
489
- ) -> dict[str, Any] | None:
490
- if not ref.startswith("#/"):
491
- return None
492
- if ref in seen:
493
- return None
494
- seen.add(ref)
495
-
496
- current: Any = root
497
- for part in ref.lstrip("#/").split("/"):
498
- part = part.replace("~1", "/").replace("~0", "~")
499
- if isinstance(current, dict) and part in current:
500
- current = current[part]
501
- else:
502
- seen.discard(ref)
503
- return None
504
- if isinstance(current, dict):
505
- resolved = cast(dict[str, Any], json.loads(json.dumps(current)))
506
- else:
507
- resolved = None
508
- seen.discard(ref)
509
- return resolved
510
-
511
- def _inline_anyof_refs(obj: Any, root: dict[str, Any], seen: set[str]) -> Any:
512
- if isinstance(obj, dict):
513
- updated: dict[str, Any] = {}
514
- for key, value in obj.items():
515
- if key == "anyOf" and isinstance(value, list):
516
- updated_items = []
517
- for item in value:
518
- if (
519
- isinstance(item, dict)
520
- and "$ref" in item
521
- and "type" not in item
522
- ):
523
- resolved = _resolve_ref(item["$ref"], root, seen)
524
- if resolved is not None:
525
- item = resolved
526
- updated_items.append(_inline_anyof_refs(item, root, seen))
527
- updated[key] = updated_items
528
- else:
529
- updated[key] = _inline_anyof_refs(value, root, seen)
530
- return updated
531
- if isinstance(obj, list):
532
- return [_inline_anyof_refs(item, root, seen) for item in obj]
533
- return obj
534
-
535
- cleaned_schema = cast(
536
- dict[str, Any], _inline_anyof_refs(cleaned_schema, schema, set())
537
- )
559
+ cleaned_schema = cast(dict[str, Any], cleaned_schema)
560
+ _hydrate_ref_types(cleaned_schema)
538
561
  _ensure_items_have_schema(cleaned_schema)
562
+ _ensure_schema_has_type(cleaned_schema)
539
563
 
540
564
  nullable_fields = {
541
565
  name