PyPI - openai-sdk-helpers - Versions diffs - 0.6.1__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

openai-sdk-helpers 0.6.1py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

openai_sdk_helpers/agent/classifier.py CHANGED Viewed

@@ -3,19 +3,26 @@
 from __future__ import annotations
 import asyncio
+import threading
 import re
 from dataclasses import dataclass, field
 from enum import Enum
 from pathlib import Path
 from typing import Any, Awaitable, Dict, Iterable, Optional, Sequence, cast
+from agents.model_settings import ModelSettings
 from ..structure import (
     ClassificationResult,
     ClassificationStep,
     ClassificationStopReason,
     StructureBase,
+    Taxonomy,
     TaxonomyNode,
+    format_path_identifier,
+    split_path_identifier,
 )
+from ..utils import ensure_list
 from .base import AgentBase
 from .configuration import AgentConfiguration
@@ -29,11 +36,15 @@ class TaxonomyClassifierAgent(AgentBase):
         Optional template file path for prompt rendering.
     model : str | None, default=None
         Model identifier to use for classification.
+    model_settings : ModelSettings | None, default=None
+        Optional model settings to apply to the classifier agent.
     Methods
     -------
-    run_agent(text, taxonomy, context, max_depth)
-        Classify text by recursively walking the taxonomy tree.
+    run_async(input, context, max_depth, confidence_threshold)
+        Classify text asynchronously using taxonomy traversal.
+    run_sync(input, context, max_depth, confidence_threshold)
+        Classify text synchronously using taxonomy traversal.
     Examples
     --------
@@ -51,6 +62,7 @@ class TaxonomyClassifierAgent(AgentBase):
         *,
         template_path: Path | str | None = None,
         model: str | None = None,
+        model_settings: ModelSettings | None = None,
         taxonomy: TaxonomyNode | Sequence[TaxonomyNode],
     ) -> None:
         """Initialize the taxonomy classifier agent configuration.
@@ -61,6 +73,8 @@ class TaxonomyClassifierAgent(AgentBase):
             Optional template file path for prompt rendering.
         model : str | None, default=None
             Model identifier to use for classification.
+        model_settings : ModelSettings | None, default=None
+            Optional model settings to apply to the classifier agent.
         taxonomy : TaxonomyNode | Sequence[TaxonomyNode]
             Root taxonomy node or list of root nodes.
@@ -85,17 +99,19 @@ class TaxonomyClassifierAgent(AgentBase):
             template_path=resolved_template_path,
             output_structure=ClassificationStep,
             model=model,
+            model_settings=model_settings,
         )
         super().__init__(configuration=configuration)
-    async def run_agent(
+    async def _run_agent(
         self,
         text: str,
         *,
         context: Optional[Dict[str, Any]] = None,
+        file_ids: str | Sequence[str] | None = None,
         max_depth: Optional[int] = None,
         confidence_threshold: float | None = None,
-        single_class: bool = False,
+        session: Optional[Any] = None,
     ) -> ClassificationResult:
         """Classify ``text`` by recursively walking taxonomy levels.
@@ -105,12 +121,14 @@ class TaxonomyClassifierAgent(AgentBase):
             Source text to classify.
         context : dict or None, default=None
             Additional context values to merge into the prompt.
+        file_ids : str or Sequence[str] or None, default=None
+            Optional file IDs to attach to each classification step.
         max_depth : int or None, default=None
             Maximum depth to traverse before stopping.
         confidence_threshold : float or None, default=None
             Minimum confidence required to accept a classification step.
-        single_class : bool, default=False
-            Whether to keep only the highest-priority selection per step.
+        session : Session or None, default=None
+            Optional session for maintaining conversation history across runs.
         Returns
         -------
@@ -125,61 +143,222 @@ class TaxonomyClassifierAgent(AgentBase):
         True
         """
         state = _TraversalState()
+        input_payload = _build_input_payload(text, file_ids)
         await self._classify_nodes(
-            text=text,
+            input_payload=input_payload,
             nodes=list(self._root_nodes),
             depth=0,
             parent_path=[],
             context=context,
+            file_ids=file_ids,
             max_depth=max_depth,
             confidence_threshold=confidence_threshold,
-            single_class=single_class,
+            session=session,
             state=state,
         )
         final_nodes_value = state.final_nodes or None
-        final_node = state.final_nodes[0] if state.final_nodes else None
         stop_reason = _resolve_stop_reason(state)
         return ClassificationResult(
-            final_node=final_node,
             final_nodes=final_nodes_value,
             confidence=state.best_confidence,
             stop_reason=stop_reason,
-            path=state.path,
-            path_nodes=state.path_nodes,
+            steps=state.steps,
+        )
+    async def run_async(
+        self,
+        input: str | list[dict[str, Any]],
+        *,
+        context: Optional[Dict[str, Any]] = None,
+        output_structure: Optional[type[StructureBase]] = None,
+        session: Optional[Any] = None,
+        file_ids: str | Sequence[str] | None = None,
+        max_depth: Optional[int] = None,
+        confidence_threshold: float | None = None,
+    ) -> ClassificationResult:
+        """Classify ``input`` asynchronously with taxonomy traversal.
+        Parameters
+        ----------
+        input : str or list[dict[str, Any]]
+            Source text to classify.
+        context : dict or None, default=None
+            Additional context values to merge into the prompt.
+        output_structure : type[StructureBase] or None, default=None
+            Unused in taxonomy traversal. Present for API compatibility.
+        session : Session or None, default=None
+            Optional session for maintaining conversation history across runs.
+        file_ids : str or Sequence[str] or None, default=None
+            Optional file IDs to attach to each classification step.
+        max_depth : int or None, default=None
+            Maximum depth to traverse before stopping.
+        confidence_threshold : float or None, default=None
+            Minimum confidence required to accept a classification step.
+        Returns
+        -------
+        ClassificationResult
+            Structured classification result describing the traversal.
+        """
+        _ = output_structure
+        if not isinstance(input, str):
+            msg = "TaxonomyClassifierAgent run_async requires text input."
+            raise TypeError(msg)
+        kwargs: Dict[str, Any] = {
+            "context": context,
+            "file_ids": file_ids,
+            "max_depth": max_depth,
+            "confidence_threshold": confidence_threshold,
+        }
+        if session is not None:
+            kwargs["session"] = session
+        return await self._run_agent(input, **kwargs)
+    def run_sync(
+        self,
+        input: str | list[dict[str, Any]],
+        *,
+        context: Optional[Dict[str, Any]] = None,
+        output_structure: Optional[type[StructureBase]] = None,
+        session: Optional[Any] = None,
+        file_ids: str | Sequence[str] | None = None,
+        max_depth: Optional[int] = None,
+        confidence_threshold: float | None = None,
+    ) -> ClassificationResult:
+        """Classify ``input`` synchronously with taxonomy traversal.
+        Parameters
+        ----------
+        input : str or list[dict[str, Any]]
+            Source text to classify.
+        context : dict or None, default=None
+            Additional context values to merge into the prompt.
+        output_structure : type[StructureBase] or None, default=None
+            Unused in taxonomy traversal. Present for API compatibility.
+        session : Session or None, default=None
+            Optional session for maintaining conversation history across runs.
+        file_ids : str or Sequence[str] or None, default=None
+            Optional file IDs to attach to each classification step.
+        max_depth : int or None, default=None
+            Maximum depth to traverse before stopping.
+        confidence_threshold : float or None, default=None
+            Minimum confidence required to accept a classification step.
+        Returns
+        -------
+        ClassificationResult
+            Structured classification result describing the traversal.
+        """
+        _ = output_structure
+        if not isinstance(input, str):
+            msg = "TaxonomyClassifierAgent run_sync requires text input."
+            raise TypeError(msg)
+        kwargs: Dict[str, Any] = {
+            "context": context,
+            "file_ids": file_ids,
+            "max_depth": max_depth,
+            "confidence_threshold": confidence_threshold,
+        }
+        if session is not None:
+            kwargs["session"] = session
+        async def runner() -> ClassificationResult:
+            return await self._run_agent(input, **kwargs)
+        try:
+            asyncio.get_running_loop()
+        except RuntimeError:
+            return asyncio.run(runner())
+        result: ClassificationResult | None = None
+        error: Exception | None = None
+        def _thread_func() -> None:
+            nonlocal error, result
+            try:
+                result = asyncio.run(runner())
+            except Exception as exc:
+                error = exc
+        thread = threading.Thread(target=_thread_func)
+        thread.start()
+        thread.join()
+        if error is not None:
+            raise error
+        if result is None:
+            msg = "Classification did not return a result"
+            raise RuntimeError(msg)
+        return result
+    async def _run_step_async(
+        self,
+        *,
+        input: str | list[dict[str, Any]],
+        context: Optional[Dict[str, Any]] = None,
+        output_structure: Optional[type[StructureBase]] = None,
+        session: Optional[Any] = None,
+    ) -> StructureBase:
+        """Execute a single classification step asynchronously.
+        Parameters
+        ----------
+        input : str or list[dict[str, Any]]
+            Prompt or structured input for the agent.
+        context : dict or None, default=None
+            Optional dictionary passed to the agent.
+        output_structure : type[StructureBase] or None, default=None
+            Optional type used to cast the final output.
+        session : Session or None, default=None
+            Optional session for maintaining conversation history across runs.
+        Returns
+        -------
+        StructureBase
+            Parsed result for the classification step.
+        """
+        return await super().run_async(
+            input=input,
+            context=context,
+            output_structure=output_structure,
+            session=session,
         )
     async def _classify_nodes(
         self,
         *,
-        text: str,
+        input_payload: str | list[dict[str, Any]],
         nodes: list[TaxonomyNode],
         depth: int,
         parent_path: list[str],
         context: Optional[Dict[str, Any]],
+        file_ids: str | Sequence[str] | None,
         max_depth: Optional[int],
         confidence_threshold: float | None,
-        single_class: bool,
+        session: Optional[Any],
         state: "_TraversalState",
     ) -> None:
         """Classify a taxonomy level and recursively traverse children.
         Parameters
         ----------
-        text : str
-            Source text to classify.
+        input_payload : str or list[dict[str, Any]]
+            Input payload used to prompt the agent.
         nodes : list[TaxonomyNode]
             Candidate taxonomy nodes for the current level.
         depth : int
             Current traversal depth.
         context : dict or None
             Additional context values to merge into the prompt.
+        file_ids : str or Sequence[str] or None
+            Optional file IDs attached to each classification step.
         max_depth : int or None
             Maximum traversal depth before stopping.
         confidence_threshold : float or None
             Minimum confidence required to accept a classification step.
-        single_class : bool
-            Whether to keep only the highest-priority selection per step.
+        session : Session or None
+            Optional session for maintaining conversation history across runs.
         state : _TraversalState
             Aggregated traversal state.
         """
@@ -192,18 +371,19 @@ class TaxonomyClassifierAgent(AgentBase):
         node_paths = _build_node_path_map(nodes, parent_path)
         template_context = _build_context(
             node_descriptors=_build_node_descriptors(node_paths),
-            path=state.path,
+            steps=state.steps,
             depth=depth,
             context=context,
         )
         step_structure = _build_step_structure(list(node_paths.keys()))
-        raw_step = await self.run_async(
-            input=text,
+        raw_step = await self._run_step_async(
+            input=input_payload,
             context=template_context,
             output_structure=step_structure,
+            session=session,
         )
         step = _normalize_step_output(raw_step, step_structure)
-        state.path.append(step)
+        state.steps.append(step)
         if (
             confidence_threshold is not None
@@ -213,10 +393,6 @@ class TaxonomyClassifierAgent(AgentBase):
             return
         resolved_nodes = _resolve_nodes(node_paths, step)
-        if resolved_nodes:
-            if single_class:
-                resolved_nodes = resolved_nodes[:1]
-            state.path_nodes.extend(resolved_nodes)
         if step.stop_reason.is_terminal:
             if resolved_nodes:
@@ -230,8 +406,7 @@ class TaxonomyClassifierAgent(AgentBase):
         if not resolved_nodes:
             return
-        base_path_len = len(state.path)
-        base_path_nodes_len = len(state.path_nodes)
+        base_steps_len = len(state.steps)
         child_tasks: list[tuple[Awaitable["_TraversalState"], int]] = []
         for node in resolved_nodes:
             if node.children:
@@ -242,14 +417,15 @@ class TaxonomyClassifierAgent(AgentBase):
                     (
                         self._classify_subtree(
                             sub_agent=sub_agent,
-                            text=text,
+                            input_payload=input_payload,
                             nodes=list(node.children),
                             depth=depth + 1,
                             parent_path=[*parent_path, node.label],
                             context=context,
+                            file_ids=file_ids,
                             max_depth=max_depth,
                             confidence_threshold=confidence_threshold,
-                            single_class=single_class,
+                            session=session,
                             state=sub_state,
                         ),
                         base_final_nodes_len,
@@ -268,8 +444,7 @@ class TaxonomyClassifierAgent(AgentBase):
             for child_state, (_, base_final_nodes_len) in zip(
                 child_states, child_tasks, strict=True
             ):
-                state.path.extend(child_state.path[base_path_len:])
-                state.path_nodes.extend(child_state.path_nodes[base_path_nodes_len:])
+                state.steps.extend(child_state.steps[base_steps_len:])
                 state.final_nodes.extend(child_state.final_nodes[base_final_nodes_len:])
                 state.best_confidence = _max_confidence(
                     state.best_confidence, child_state.best_confidence
@@ -323,23 +498,25 @@ class TaxonomyClassifierAgent(AgentBase):
         sub_agent = TaxonomyClassifierAgent(
             template_path=self._template_path,
             model=self._model,
+            model_settings=self._model_settings,
             taxonomy=list(nodes),
         )
-        sub_agent.run_async = self.run_async
+        sub_agent._run_step_async = self._run_step_async
         return sub_agent
     async def _classify_subtree(
         self,
         *,
         sub_agent: "TaxonomyClassifierAgent",
-        text: str,
+        input_payload: str | list[dict[str, Any]],
         nodes: list[TaxonomyNode],
         depth: int,
         parent_path: list[str],
         context: Optional[Dict[str, Any]],
+        file_ids: str | Sequence[str] | None,
         max_depth: Optional[int],
         confidence_threshold: float | None,
-        single_class: bool,
+        session: Optional[Any],
         state: "_TraversalState",
     ) -> "_TraversalState":
         """Classify a taxonomy subtree and return the traversal state.
@@ -348,8 +525,8 @@ class TaxonomyClassifierAgent(AgentBase):
         ----------
         sub_agent : TaxonomyClassifierAgent
             Sub-agent configured for the subtree traversal.
-        text : str
-            Source text to classify.
+        input_payload : str or list[dict[str, Any]]
+            Input payload used to prompt the agent.
         nodes : list[TaxonomyNode]
             Candidate taxonomy nodes for the subtree.
         depth : int
@@ -358,12 +535,14 @@ class TaxonomyClassifierAgent(AgentBase):
             Path segments leading to the current subtree.
         context : dict or None
             Additional context values to merge into the prompt.
+        file_ids : str or Sequence[str] or None
+            Optional file IDs attached to each classification step.
         max_depth : int or None
             Maximum traversal depth before stopping.
         confidence_threshold : float or None
             Minimum confidence required to accept a classification step.
-        single_class : bool
-            Whether to keep only the highest-priority selection per step.
+        session : Session or None
+            Optional session for maintaining conversation history across runs.
         state : _TraversalState
             Traversal state to populate for the subtree.
@@ -373,14 +552,15 @@ class TaxonomyClassifierAgent(AgentBase):
             Populated traversal state for the subtree.
         """
         await sub_agent._classify_nodes(
-            text=text,
+            input_payload=input_payload,
             nodes=nodes,
             depth=depth,
             parent_path=parent_path,
             context=context,
+            file_ids=file_ids,
             max_depth=max_depth,
             confidence_threshold=confidence_threshold,
-            single_class=single_class,
+            session=session,
             state=state,
         )
         return state
@@ -390,8 +570,7 @@ class TaxonomyClassifierAgent(AgentBase):
 class _TraversalState:
     """Track recursive traversal state."""
-    path: list[ClassificationStep] = field(default_factory=list)
-    path_nodes: list[TaxonomyNode] = field(default_factory=list)
+    steps: list[ClassificationStep] = field(default_factory=list)
     final_nodes: list[TaxonomyNode] = field(default_factory=list)
     best_confidence: float | None = None
     saw_max_depth: bool = False
@@ -413,8 +592,7 @@ def _copy_traversal_state(state: _TraversalState) -> _TraversalState:
         Cloned traversal state with copied collections.
     """
     return _TraversalState(
-        path=list(state.path),
-        path_nodes=list(state.path_nodes),
+        steps=list(state.steps),
         final_nodes=list(state.final_nodes),
         best_confidence=state.best_confidence,
         saw_max_depth=state.saw_max_depth,
@@ -464,6 +642,8 @@ def _normalize_roots(
     list[TaxonomyNode]
         Normalized list of root nodes.
     """
+    if isinstance(taxonomy, Taxonomy):
+        return [node for node in taxonomy.children if node is not None]
     if isinstance(taxonomy, TaxonomyNode):
         return [taxonomy]
     return [node for node in taxonomy if node is not None]
@@ -483,7 +663,7 @@ def _default_template_path() -> Path:
 def _build_context(
     *,
     node_descriptors: Iterable[dict[str, Any]],
-    path: Sequence[ClassificationStep],
+    steps: Sequence[ClassificationStep],
     depth: int,
     context: Optional[Dict[str, Any]],
 ) -> Dict[str, Any]:
@@ -493,7 +673,7 @@ def _build_context(
     ----------
     node_descriptors : Iterable[dict[str, Any]]
         Node descriptors available at the current taxonomy level.
-    path : Sequence[ClassificationStep]
+    steps : Sequence[ClassificationStep]
         Steps recorded so far in the traversal.
     depth : int
         Current traversal depth.
@@ -505,9 +685,14 @@ def _build_context(
     dict[str, Any]
         Context dictionary for prompt rendering.
     """
+    summarized_steps = [
+        step.as_summary()
+        for step in steps
+        if step.selected_nodes and any(node is not None for node in step.selected_nodes)
+    ]
     template_context: Dict[str, Any] = {
         "taxonomy_nodes": list(node_descriptors),
-        "path": [step.as_summary() for step in path],
+        "steps": summarized_steps,
         "depth": depth,
     }
     if context:
@@ -555,7 +740,7 @@ def _build_node_path_map(
     path_map: dict[str, TaxonomyNode] = {}
     seen: dict[str, int] = {}
     for node in nodes:
-        base_path = _format_path_identifier([*parent_path, node.label])
+        base_path = format_path_identifier([*parent_path, node.label])
         count = seen.get(base_path, 0) + 1
         seen[base_path] = count
         path = f"{base_path} ({count})" if count > 1 else base_path
@@ -584,33 +769,12 @@ def _build_node_descriptors(
             {
                 "identifier": path_id,
                 "label": node.label,
-                "description": node.description,
+                "computed_description": node.computed_description,
             }
         )
     return descriptors
-def _format_path_identifier(path_segments: Sequence[str]) -> str:
-    """Format path segments into a safe identifier string.
-    Parameters
-    ----------
-    path_segments : Sequence[str]
-        Path segments to format.
-    Returns
-    -------
-    str
-        Escaped path identifier string.
-    """
-    delimiter = " > "
-    escape_token = "\\>"
-    escaped_segments = [
-        segment.replace(delimiter, escape_token) for segment in path_segments
-    ]
-    return delimiter.join(escaped_segments)
 def _build_taxonomy_enum(name: str, values: Sequence[str]) -> type[Enum]:
     """Build a safe Enum from taxonomy node values.
@@ -635,25 +799,6 @@ def _build_taxonomy_enum(name: str, values: Sequence[str]) -> type[Enum]:
     return cast(type[Enum], Enum(name, members))
-def _split_taxonomy_path(value: str) -> list[str]:
-    """Split a taxonomy identifier into its path segments.
-    Parameters
-    ----------
-    value : str
-        Taxonomy path identifier to split.
-    Returns
-    -------
-    list[str]
-        Path segments with escaped delimiters restored.
-    """
-    delimiter = " > "
-    escape_token = "\\>"
-    segments = value.split(delimiter)
-    return [segment.replace(escape_token, delimiter) for segment in segments]
 def _sanitize_enum_member(
     value: str,
     index: int,
@@ -676,7 +821,7 @@ def _sanitize_enum_member(
         Sanitized enum member name.
     """
     normalized_segments: list[str] = []
-    for segment in _split_taxonomy_path(value):
+    for segment in split_path_identifier(value):
         normalized = re.sub(r"[^0-9a-zA-Z]+", "_", segment).strip("_").upper()
         if not normalized:
             normalized = "VALUE"
@@ -716,6 +861,40 @@ def _normalize_step_output(
     return ClassificationStep.from_json(payload)
+def _build_input_payload(
+    text: str,
+    file_ids: str | Sequence[str] | None,
+) -> str | list[dict[str, Any]]:
+    """Build input payloads with optional file attachments.
+    Parameters
+    ----------
+    text : str
+        Prompt text to send to the agent.
+    file_ids : str or Sequence[str] or None
+        Optional file IDs to include as ``input_file`` attachments.
+    Returns
+    -------
+    str or list[dict[str, Any]]
+        Input payload suitable for the Agents SDK.
+    """
+    normalized_file_ids = [
+        file_id for file_id in dict.fromkeys(ensure_list(file_ids)) if file_id
+    ]
+    if not normalized_file_ids:
+        return text
+    attachments = [
+        {"type": "input_file", "file_id": file_id} for file_id in normalized_file_ids
+    ]
+    return [
+        {
+            "role": "user",
+            "content": [{"type": "input_text", "text": text}, *attachments],
+        }
+    ]
 def _extract_enum_fields(
     step_structure: type[StructureBase],
 ) -> dict[str, type[Enum]]:
@@ -807,17 +986,20 @@ def _selected_nodes(step: ClassificationStep) -> list[str]:
     list[str]
         Selected identifiers in priority order.
     """
-    if step.selected_nodes is not None:
-        selected_nodes = [
-            str(_normalize_enum_value(selected_node, Enum))
-            for selected_node in step.selected_nodes
-            if selected_node
-        ]
-        if selected_nodes:
-            return selected_nodes
-    if step.selected_node:
-        return [str(_normalize_enum_value(step.selected_node, Enum))]
-    return []
+    enum_cls: type[Enum] | None = None
+    step_cls = step.__class__
+    if hasattr(step_cls, "model_fields"):
+        field = step_cls.model_fields.get("selected_nodes")
+        if field is not None:
+            enum_cls = step_cls._extract_enum_class(field.annotation)
+    if enum_cls is None:
+        enum_cls = Enum
+    selected_nodes = [
+        str(_normalize_enum_value(selected_node, enum_cls))
+        for selected_node in step.selected_nodes or []
+        if selected_node
+    ]
+    return selected_nodes
 def _max_confidence(

openai-sdk-helpers 0.6.1__py3-none-any.whl → 0.6.4__py3-none-any.whl

openai-sdk-helpers 0.6.1py3-none-any.whl → 0.6.4py3-none-any.whl