PyPI - openai-sdk-helpers - Versions diffs - 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

openai-sdk-helpers 0.6.0py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

openai_sdk_helpers/prompt/classifier.jinja CHANGED Viewed

@@ -1,9 +1,14 @@
 You are a taxonomy classification assistant.
 Instructions:
-- Review the text and select the best matching taxonomy node from the list.
+- Review the text and select all matching taxonomy nodes from the list.
+- Populate selected_nodes as a list of taxonomy node ids for multi-class matches.
+- Use selected_node when a single best match is appropriate.
+- Provide a confidence score between 0 and 1 for the selections; higher means more certain.
+- Use only taxonomy identifiers from the candidate list for any selections.
+- Use the stop_reason enum values only: "continue", "stop", "no_match", "max_depth", "no_children".
 - If a child level should be explored, set stop_reason to "continue".
-- If no appropriate node exists, set stop_reason to "no_match" and leave selected_id empty.
+- If no appropriate node exists, set stop_reason to "no_match" and leave selections empty.
 - If you are confident this is the final level, set stop_reason to "stop".
 - Provide a concise rationale in one or two sentences.
@@ -12,7 +17,7 @@ Current depth: {{ depth }}
 Previous path:
 {% if path %}
 {% for step in path %}
-- {{ step.selected_label }} (id={{ step.selected_id }}, confidence={{ step.confidence }})
+- {{ step.selected_node }} (confidence={{ step.confidence }}, stop_reason={{ step.stop_reason }})
 {% endfor %}
 {% else %}
 - None
@@ -20,7 +25,7 @@ Previous path:
 Candidate taxonomy nodes:
 {% for node in taxonomy_nodes %}
-- id: {{ node.id }}
+- identifier: {{ node.identifier }}
   label: {{ node.label }}
   description: {{ node.description or "None" }}
 {% endfor %}

openai_sdk_helpers/settings.py CHANGED Viewed

@@ -48,6 +48,8 @@ class OpenAISettings(BaseModel):
     -------
     from_env(dotenv_path, **overrides)
         Build settings from environment variables and optional overrides.
+    from_secrets(secrets, **overrides)
+        Build settings from a secrets mapping and optional overrides.
     client_kwargs()
         Return keyword arguments for ``OpenAI`` initialization.
     create_client()
@@ -190,6 +192,69 @@ class OpenAISettings(BaseModel):
         return settings
+    @classmethod
+    def from_secrets(
+        cls,
+        secrets: Mapping[str, Any] | None = None,
+        **overrides: Any,
+    ) -> OpenAISettings:
+        """Load settings from a secrets mapping and optional overrides.
+        Parameters
+        ----------
+        secrets : Mapping[str, Any] or None, optional
+            Mapping of secret values keyed by environment variable names.
+            Defaults to environment variables.
+        overrides : Any
+            Keyword overrides applied on top of secret values.
+        Returns
+        -------
+        OpenAISettings
+            Settings instance populated from secret values and overrides.
+        Raises
+        ------
+        ValueError
+            If OPENAI_API_KEY is not found in the secrets mapping.
+        """
+        secret_values: Mapping[str, Any] = secrets or os.environ
+        def first_non_none(*candidates: Any) -> Any:
+            for candidate in candidates:
+                if candidate is not None:
+                    return candidate
+            return None
+        def resolve_value(override_key: str, secret_key: str) -> Any:
+            return first_non_none(
+                overrides.get(override_key),
+                secret_values.get(secret_key),
+            )
+        timeout_raw = resolve_value("timeout", "OPENAI_TIMEOUT")
+        max_retries_raw = resolve_value("max_retries", "OPENAI_MAX_RETRIES")
+        values: dict[str, Any] = {
+            "api_key": resolve_value("api_key", "OPENAI_API_KEY"),
+            "org_id": resolve_value("org_id", "OPENAI_ORG_ID"),
+            "project_id": resolve_value("project_id", "OPENAI_PROJECT_ID"),
+            "base_url": resolve_value("base_url", "OPENAI_BASE_URL"),
+            "default_model": resolve_value("default_model", "OPENAI_MODEL"),
+            "timeout": coerce_optional_float(timeout_raw),
+            "max_retries": coerce_optional_int(max_retries_raw),
+            "extra_client_kwargs": coerce_dict(overrides.get("extra_client_kwargs")),
+        }
+        settings = cls(**values)
+        if not settings.api_key:
+            raise ValueError(
+                "OPENAI_API_KEY is required to configure the OpenAI client"
+                " and was not found in secrets."
+            )
+        return settings
     def client_kwargs(self) -> dict[str, Any]:
         """Return keyword arguments for constructing an OpenAI client.

openai_sdk_helpers/structure/base.py CHANGED Viewed

@@ -134,9 +134,21 @@ def _ensure_items_have_schema(target: Any) -> None:
 def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
     """Ensure a schema dictionary includes a type entry when possible."""
+    any_of = schema.get("anyOf")
+    if isinstance(any_of, list):
+        for entry in any_of:
+            if isinstance(entry, dict):
+                _ensure_schema_has_type(entry)
+    properties = schema.get("properties")
+    if isinstance(properties, dict):
+        for value in properties.values():
+            if isinstance(value, dict):
+                _ensure_schema_has_type(value)
+    items = schema.get("items")
+    if isinstance(items, dict):
+        _ensure_schema_has_type(items)
     if "type" in schema or "$ref" in schema:
         return
-    any_of = schema.get("anyOf")
     if isinstance(any_of, list):
         inferred_types: set[str] = set()
         for entry in any_of:
@@ -162,6 +174,68 @@ def _ensure_schema_has_type(schema: dict[str, Any]) -> None:
     schema.update(_build_any_value_schema())
+def _hydrate_ref_types(schema: dict[str, Any]) -> None:
+    """Attach explicit types to $ref nodes when available.
+    Parameters
+    ----------
+    schema : dict[str, Any]
+        Schema dictionary to hydrate in place.
+    """
+    definitions = schema.get("$defs") or schema.get("definitions") or {}
+    if not isinstance(definitions, dict):
+        definitions = {}
+    def _infer_enum_type(values: list[Any]) -> list[str] | str | None:
+        type_map = {
+            str: "string",
+            int: "integer",
+            float: "number",
+            bool: "boolean",
+            type(None): "null",
+        }
+        inferred: set[str] = set()
+        for value in values:
+            inferred_type = type_map.get(type(value))
+            if inferred_type is not None:
+                inferred.add(inferred_type)
+        if not inferred:
+            return None
+        if len(inferred) == 1:
+            return next(iter(inferred))
+        return sorted(inferred)
+    def _resolve_ref_type(ref: str) -> list[str] | str | None:
+        prefixes = ("#/$defs/", "#/definitions/")
+        if not ref.startswith(prefixes):
+            return None
+        key = ref.split("/", maxsplit=2)[-1]
+        definition = definitions.get(key)
+        if not isinstance(definition, dict):
+            return None
+        ref_type = definition.get("type")
+        if isinstance(ref_type, (str, list)):
+            return ref_type
+        enum_values = definition.get("enum")
+        if isinstance(enum_values, list):
+            return _infer_enum_type(enum_values)
+        return None
+    def _walk(node: Any) -> None:
+        if isinstance(node, dict):
+            if "$ref" in node and "type" not in node:
+                ref_type = _resolve_ref_type(node["$ref"])
+                if ref_type is not None:
+                    node["type"] = ref_type
+            for value in node.values():
+                _walk(value)
+        elif isinstance(node, list):
+            for item in node:
+                _walk(item)
+    _walk(schema)
 class StructureBase(BaseModelJSONSerializable):
     """Base class for structured output models with schema generation.
@@ -471,7 +545,7 @@ class StructureBase(BaseModelJSONSerializable):
             if isinstance(obj, dict):
                 if "$ref" in obj:
                     for key in list(obj.keys()):
-                        if key != "$ref":
+                        if key not in {"$ref", "type"}:
                             obj.pop(key, None)
                 for v in obj.values():
                     clean_refs(v)
@@ -482,60 +556,10 @@ class StructureBase(BaseModelJSONSerializable):
         cleaned_schema = cast(dict[str, Any], clean_refs(schema))
-        def _resolve_ref(
-            ref: str,
-            root: dict[str, Any],
-            seen: set[str],
-        ) -> dict[str, Any] | None:
-            if not ref.startswith("#/"):
-                return None
-            if ref in seen:
-                return None
-            seen.add(ref)
-            current: Any = root
-            for part in ref.lstrip("#/").split("/"):
-                part = part.replace("~1", "/").replace("~0", "~")
-                if isinstance(current, dict) and part in current:
-                    current = current[part]
-                else:
-                    seen.discard(ref)
-                    return None
-            if isinstance(current, dict):
-                resolved = cast(dict[str, Any], json.loads(json.dumps(current)))
-            else:
-                resolved = None
-            seen.discard(ref)
-            return resolved
-        def _inline_anyof_refs(obj: Any, root: dict[str, Any], seen: set[str]) -> Any:
-            if isinstance(obj, dict):
-                updated: dict[str, Any] = {}
-                for key, value in obj.items():
-                    if key == "anyOf" and isinstance(value, list):
-                        updated_items = []
-                        for item in value:
-                            if (
-                                isinstance(item, dict)
-                                and "$ref" in item
-                                and "type" not in item
-                            ):
-                                resolved = _resolve_ref(item["$ref"], root, seen)
-                                if resolved is not None:
-                                    item = resolved
-                            updated_items.append(_inline_anyof_refs(item, root, seen))
-                        updated[key] = updated_items
-                    else:
-                        updated[key] = _inline_anyof_refs(value, root, seen)
-                return updated
-            if isinstance(obj, list):
-                return [_inline_anyof_refs(item, root, seen) for item in obj]
-            return obj
-        cleaned_schema = cast(
-            dict[str, Any], _inline_anyof_refs(cleaned_schema, schema, set())
-        )
+        cleaned_schema = cast(dict[str, Any], cleaned_schema)
+        _hydrate_ref_types(cleaned_schema)
         _ensure_items_have_schema(cleaned_schema)
+        _ensure_schema_has_type(cleaned_schema)
         nullable_fields = {
             name

openai_sdk_helpers/structure/classification.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 from enum import Enum
-from typing import Any, Iterable, Optional
+from typing import Any, Iterable, Optional, cast
 from .base import StructureBase, spec_field
@@ -13,11 +13,9 @@ class TaxonomyNode(StructureBase):
     Attributes
     ----------
-    id : str
-        Unique identifier for the taxonomy node.
     label : str
         Human-readable label for the taxonomy node.
-    description : str or None
+    description : str | None
         Optional description of the node.
     children : list[TaxonomyNode]
         Child nodes in the taxonomy.
@@ -30,15 +28,14 @@ class TaxonomyNode(StructureBase):
         Return the computed path for the node.
     is_leaf
         Return True when the taxonomy node has no children.
-    child_by_id(node_id)
-        Return the child node matching the provided identifier.
+    child_by_path(path)
+        Return the child node matching the provided path.
     """
-    id: str = spec_field("id", description="Unique identifier for the taxonomy.")
     label: str = spec_field(
         "label", description="Human-readable label for the taxonomy node."
     )
-    description: Optional[str] = spec_field(
+    description: str | None = spec_field(
         "description",
         description="Optional description of the taxonomy node.",
         default=None,
@@ -88,22 +85,53 @@ class TaxonomyNode(StructureBase):
         """
         return self.build_path()
-    def child_by_id(self, node_id: str | None) -> Optional["TaxonomyNode"]:
-        """Return the child node matching the provided identifier.
+    def child_by_path(
+        self, path: Iterable[str] | str | None
+    ) -> Optional["TaxonomyNode"]:
+        """Return the child node matching the provided path.
         Parameters
         ----------
-        node_id : str or None
-            Identifier of the child node to locate.
+        path : Iterable[str] or str or None
+            Path segments or a delimited path string to locate.
         Returns
         -------
         TaxonomyNode or None
             Matching child node, if found.
         """
-        if node_id is None:
+        if path is None:
             return None
-        return next((child for child in self.children if child.id == node_id), None)
+        if isinstance(path, str):
+            path_segments = _split_path_identifier(path)
+        else:
+            path_segments = list(path)
+        last_segment = path_segments[-1] if path_segments else None
+        if not last_segment:
+            return None
+        return next(
+            (child for child in self.children if child.label == last_segment),
+            None,
+        )
+def _split_path_identifier(path: str) -> list[str]:
+    """Split a path identifier into label segments.
+    Parameters
+    ----------
+    path : str
+        Path identifier to split.
+    Returns
+    -------
+    list[str]
+        Label segments extracted from the path identifier.
+    """
+    delimiter = " > "
+    escape_token = "\\>"
+    segments = path.split(delimiter) if path else []
+    return [segment.replace(escape_token, delimiter) for segment in segments]
 class ClassificationStopReason(str, Enum):
@@ -139,14 +167,14 @@ class ClassificationStopReason(str, Enum):
 class ClassificationStep(StructureBase):
-    """Represent a single classification step within a taxonomy level.
+    """Represent a classification step constrained to taxonomy node enums.
     Attributes
     ----------
-    selected_id : str or None
-        Identifier of the selected taxonomy node.
-    selected_label : str or None
-        Label of the selected taxonomy node.
+    selected_node : Enum or None
+        Enum value of the selected taxonomy node.
+    selected_nodes : list[Enum] or None
+        Enum values of selected taxonomy nodes for multi-class classification.
     confidence : float or None
         Confidence score between 0 and 1.
     stop_reason : ClassificationStopReason
@@ -156,18 +184,34 @@ class ClassificationStep(StructureBase):
     Methods
     -------
+    build_for_enum(enum_cls)
+        Build a ClassificationStep subclass with enum-constrained selections.
     as_summary()
         Return a dictionary summary of the classification step.
+    Examples
+    --------
+    Create a multi-class step and summarize the selections:
+    >>> NodeEnum = Enum("NodeEnum", {"BILLING": "billing"})
+    >>> StepEnum = ClassificationStep.build_for_enum(NodeEnum)
+    >>> step = StepEnum(
+    ...     selected_nodes=[NodeEnum.BILLING],
+    ...     confidence=0.82,
+    ...     stop_reason=ClassificationStopReason.STOP,
+    ... )
+    >>> step.as_summary()["selected_nodes"]
+    [<NodeEnum.BILLING: 'billing'>]
     """
-    selected_id: Optional[str] = spec_field(
-        "selected_id",
-        description="Identifier of the selected taxonomy node.",
+    selected_node: Enum | None = spec_field(
+        "selected_node",
+        description="Path identifier of the selected taxonomy node.",
         default=None,
     )
-    selected_label: Optional[str] = spec_field(
-        "selected_label",
-        description="Label of the selected taxonomy node.",
+    selected_nodes: list[Enum] | None = spec_field(
+        "selected_nodes",
+        description="Path identifiers of selected taxonomy nodes.",
         default=None,
     )
     confidence: Optional[float] = spec_field(
@@ -179,6 +223,7 @@ class ClassificationStep(StructureBase):
         "stop_reason",
         description="Reason for stopping or continuing traversal.",
         default=ClassificationStopReason.STOP,
+        allow_null=False,
     )
     rationale: Optional[str] = spec_field(
         "rationale",
@@ -186,6 +231,38 @@ class ClassificationStep(StructureBase):
         default=None,
     )
+    @classmethod
+    def build_for_enum(cls, enum_cls: type[Enum]) -> type["ClassificationStep"]:
+        """Build a ClassificationStep subclass with enum-constrained fields.
+        Parameters
+        ----------
+        enum_cls : type[Enum]
+            Enum type to use for node selections.
+        Returns
+        -------
+        type[ClassificationStep]
+            Specialized ClassificationStep class bound to the enum.
+        """
+        namespace: dict[str, Any] = {
+            "__annotations__": {
+                "selected_node": enum_cls | None,
+                "selected_nodes": list[enum_cls] | None,
+            },
+            "selected_node": spec_field(
+                "selected_node",
+                description="Path identifier of the selected taxonomy node.",
+                default=None,
+            ),
+            "selected_nodes": spec_field(
+                "selected_nodes",
+                description="Path identifiers of selected taxonomy nodes.",
+                default=None,
+            ),
+        }
+        return cast(type["ClassificationStep"], type("BoundStep", (cls,), namespace))
     def as_summary(self) -> dict[str, Any]:
         """Return a dictionary summary of the classification step.
@@ -193,47 +270,93 @@ class ClassificationStep(StructureBase):
         -------
         dict[str, Any]
             Summary data for logging or inspection.
+        Examples
+        --------
+        >>> NodeEnum = Enum("NodeEnum", {"ROOT": "root"})
+        >>> StepEnum = ClassificationStep.build_for_enum(NodeEnum)
+        >>> step = StepEnum(selected_node=NodeEnum.ROOT)
+        >>> step.as_summary()["selected_node"]
+        <NodeEnum.ROOT: 'root'>
         """
+        selected_node = _normalize_enum_value(self.selected_node)
+        selected_nodes = [
+            _normalize_enum_value(item) for item in self.selected_nodes or []
+        ]
         return {
-            "selected_id": self.selected_id,
-            "selected_label": self.selected_label,
+            "selected_node": selected_node,
+            "selected_nodes": selected_nodes or None,
             "confidence": self.confidence,
             "stop_reason": self.stop_reason.value,
         }
+def _normalize_enum_value(value: Any) -> Any:
+    """Normalize enum values into raw primitives.
+    Parameters
+    ----------
+    value : Any
+        Value to normalize.
+    Returns
+    -------
+    Any
+        Primitive value suitable for summaries.
+    """
+    if isinstance(value, Enum):
+        return value.value
+    return value
 class ClassificationResult(StructureBase):
     """Represent the final result of taxonomy traversal.
     Attributes
     ----------
-    final_id : str or None
-        Identifier of the final taxonomy node selection.
-    final_label : str or None
-        Label of the final taxonomy node selection.
+    final_node : TaxonomyNode or None
+        Resolved taxonomy node for the final selection.
+    final_nodes : list[TaxonomyNode] or None
+        Resolved taxonomy nodes for the final selections across branches.
     confidence : float or None
         Confidence score for the final selection.
     stop_reason : ClassificationStopReason
         Reason the traversal ended.
     path : list[ClassificationStep]
         Ordered list of classification steps.
+    path_nodes : list[TaxonomyNode]
+        Resolved taxonomy nodes selected across the path.
     Methods
     -------
     depth
         Return the number of classification steps recorded.
-    path_labels
-        Return the labels selected at each step.
+    path_identifiers
+        Return the identifiers selected at each step.
+    Examples
+    --------
+    Summarize single and multi-class output:
+    >>> node = TaxonomyNode(label="Tax")
+    >>> result = ClassificationResult(
+    ...     final_node=node,
+    ...     final_nodes=[node],
+    ...     confidence=0.91,
+    ...     stop_reason=ClassificationStopReason.STOP,
+    ... )
+    >>> result.final_nodes
+    [TaxonomyNode(label='Tax', description=None, children=[])]
     """
-    final_id: Optional[str] = spec_field(
-        "final_id",
-        description="Identifier of the final taxonomy node selection.",
+    final_node: TaxonomyNode | None = spec_field(
+        "final_node",
+        description="Resolved taxonomy node for the final selection.",
         default=None,
     )
-    final_label: Optional[str] = spec_field(
-        "final_label",
-        description="Label of the final taxonomy node selection.",
+    final_nodes: list[TaxonomyNode] | None = spec_field(
+        "final_nodes",
+        description="Resolved taxonomy nodes for the final selections.",
         default=None,
     )
     confidence: Optional[float] = spec_field(
@@ -251,6 +374,11 @@ class ClassificationResult(StructureBase):
         description="Ordered list of classification steps.",
         default_factory=list,
     )
+    path_nodes: list[TaxonomyNode] = spec_field(
+        "path_nodes",
+        description="Resolved taxonomy nodes selected across the path.",
+        default_factory=list,
+    )
     @property
     def depth(self) -> int:
@@ -264,15 +392,35 @@ class ClassificationResult(StructureBase):
         return len(self.path)
     @property
-    def path_labels(self) -> list[str]:
-        """Return the labels selected at each step.
+    def path_identifiers(self) -> list[str]:
+        """Return the identifiers selected at each step.
         Returns
         -------
         list[str]
-            Labels selected at each classification step.
+            Identifiers selected at each classification step.
+        Examples
+        --------
+        >>> steps = [
+        ...     ClassificationStep(selected_node="Root"),
+        ...     ClassificationStep(selected_nodes=["Root > Leaf", "Root > Branch"]),
+        ... ]
+        >>> ClassificationResult(
+        ...     stop_reason=ClassificationStopReason.STOP,
+        ...     path=steps,
+        ... ).path_identifiers
+        ['Root', 'Root > Leaf', 'Root > Branch']
         """
-        return [step.selected_label for step in self.path if step.selected_label]
+        identifiers: list[str] = []
+        for step in self.path:
+            if step.selected_nodes:
+                identifiers.extend(
+                    _normalize_enum_value(value) for value in step.selected_nodes
+                )
+            elif step.selected_node:
+                identifiers.append(_normalize_enum_value(step.selected_node))
+        return [identifier for identifier in identifiers if identifier]
 def flatten_taxonomy(nodes: Iterable[TaxonomyNode]) -> list[TaxonomyNode]:

openai_sdk_helpers/structure/plan/enum.py CHANGED Viewed

@@ -29,6 +29,8 @@ class AgentEnum(CrosswalkJSONEnum):
         Translation agent for language conversion.
     VALIDATOR : str
         Validation agent for checking constraints and guardrails.
+    CLASSIFIER : str
+        Taxonomy classifier agent for structured label selection.
     PLANNER : str
         Meta-planning agent for generating execution plans.
     DESIGNER : str
@@ -58,6 +60,7 @@ class AgentEnum(CrosswalkJSONEnum):
     SUMMARIZER = "SummarizerAgent"
     TRANSLATOR = "TranslatorAgent"
     VALIDATOR = "ValidatorAgent"
+    CLASSIFIER = "TaxonomyClassifierAgent"
     PLANNER = "MetaPlanner"
     DESIGNER = "AgentDesigner"
     BUILDER = "AgentBuilder"
@@ -89,6 +92,7 @@ class AgentEnum(CrosswalkJSONEnum):
             "SUMMARIZER": {"value": "SummarizerAgent"},
             "TRANSLATOR": {"value": "TranslatorAgent"},
             "VALIDATOR": {"value": "ValidatorAgent"},
+            "CLASSIFIER": {"value": "TaxonomyClassifierAgent"},
             "PLANNER": {"value": "MetaPlanner"},
             "DESIGNER": {"value": "AgentDesigner"},
             "BUILDER": {"value": "AgentBuilder"},

openai-sdk-helpers 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

openai-sdk-helpers 0.6.0py3-none-any.whl → 0.6.1py3-none-any.whl