PyPI - data-designer-engine - Versions diffs - 0.4.0__py3-none-any.whl - Mend

data-designer-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

data_designer/engine/__init__.py +2 -0
data_designer/engine/_version.py +34 -0
data_designer/engine/analysis/column_profilers/base.py +49 -0
data_designer/engine/analysis/column_profilers/judge_score_profiler.py +153 -0
data_designer/engine/analysis/column_profilers/registry.py +22 -0
data_designer/engine/analysis/column_statistics.py +145 -0
data_designer/engine/analysis/dataset_profiler.py +149 -0
data_designer/engine/analysis/errors.py +9 -0
data_designer/engine/analysis/utils/column_statistics_calculations.py +234 -0
data_designer/engine/analysis/utils/judge_score_processing.py +132 -0
data_designer/engine/column_generators/__init__.py +2 -0
data_designer/engine/column_generators/generators/__init__.py +2 -0
data_designer/engine/column_generators/generators/base.py +122 -0
data_designer/engine/column_generators/generators/embedding.py +35 -0
data_designer/engine/column_generators/generators/expression.py +55 -0
data_designer/engine/column_generators/generators/llm_completion.py +116 -0
data_designer/engine/column_generators/generators/samplers.py +69 -0
data_designer/engine/column_generators/generators/seed_dataset.py +144 -0
data_designer/engine/column_generators/generators/validation.py +140 -0
data_designer/engine/column_generators/registry.py +60 -0
data_designer/engine/column_generators/utils/errors.py +15 -0
data_designer/engine/column_generators/utils/generator_classification.py +43 -0
data_designer/engine/column_generators/utils/judge_score_factory.py +58 -0
data_designer/engine/column_generators/utils/prompt_renderer.py +100 -0
data_designer/engine/compiler.py +97 -0
data_designer/engine/configurable_task.py +71 -0
data_designer/engine/dataset_builders/artifact_storage.py +283 -0
data_designer/engine/dataset_builders/column_wise_builder.py +354 -0
data_designer/engine/dataset_builders/errors.py +15 -0
data_designer/engine/dataset_builders/multi_column_configs.py +46 -0
data_designer/engine/dataset_builders/utils/__init__.py +2 -0
data_designer/engine/dataset_builders/utils/concurrency.py +212 -0
data_designer/engine/dataset_builders/utils/config_compiler.py +62 -0
data_designer/engine/dataset_builders/utils/dag.py +62 -0
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +200 -0
data_designer/engine/dataset_builders/utils/errors.py +15 -0
data_designer/engine/dataset_builders/utils/progress_tracker.py +122 -0
data_designer/engine/errors.py +51 -0
data_designer/engine/model_provider.py +77 -0
data_designer/engine/models/__init__.py +2 -0
data_designer/engine/models/errors.py +300 -0
data_designer/engine/models/facade.py +284 -0
data_designer/engine/models/factory.py +42 -0
data_designer/engine/models/litellm_overrides.py +179 -0
data_designer/engine/models/parsers/__init__.py +2 -0
data_designer/engine/models/parsers/errors.py +34 -0
data_designer/engine/models/parsers/parser.py +235 -0
data_designer/engine/models/parsers/postprocessors.py +93 -0
data_designer/engine/models/parsers/tag_parsers.py +62 -0
data_designer/engine/models/parsers/types.py +84 -0
data_designer/engine/models/recipes/base.py +81 -0
data_designer/engine/models/recipes/response_recipes.py +293 -0
data_designer/engine/models/registry.py +151 -0
data_designer/engine/models/telemetry.py +362 -0
data_designer/engine/models/usage.py +73 -0
data_designer/engine/models/utils.py +101 -0
data_designer/engine/processing/ginja/__init__.py +2 -0
data_designer/engine/processing/ginja/ast.py +65 -0
data_designer/engine/processing/ginja/environment.py +463 -0
data_designer/engine/processing/ginja/exceptions.py +56 -0
data_designer/engine/processing/ginja/record.py +32 -0
data_designer/engine/processing/gsonschema/__init__.py +2 -0
data_designer/engine/processing/gsonschema/exceptions.py +15 -0
data_designer/engine/processing/gsonschema/schema_transformers.py +83 -0
data_designer/engine/processing/gsonschema/types.py +10 -0
data_designer/engine/processing/gsonschema/validators.py +202 -0
data_designer/engine/processing/processors/base.py +13 -0
data_designer/engine/processing/processors/drop_columns.py +42 -0
data_designer/engine/processing/processors/registry.py +25 -0
data_designer/engine/processing/processors/schema_transform.py +71 -0
data_designer/engine/processing/utils.py +169 -0
data_designer/engine/registry/base.py +99 -0
data_designer/engine/registry/data_designer_registry.py +39 -0
data_designer/engine/registry/errors.py +12 -0
data_designer/engine/resources/managed_dataset_generator.py +39 -0
data_designer/engine/resources/managed_dataset_repository.py +197 -0
data_designer/engine/resources/managed_storage.py +65 -0
data_designer/engine/resources/resource_provider.py +77 -0
data_designer/engine/resources/seed_reader.py +154 -0
data_designer/engine/sampling_gen/column.py +91 -0
data_designer/engine/sampling_gen/constraints.py +100 -0
data_designer/engine/sampling_gen/data_sources/base.py +217 -0
data_designer/engine/sampling_gen/data_sources/errors.py +12 -0
data_designer/engine/sampling_gen/data_sources/sources.py +347 -0
data_designer/engine/sampling_gen/entities/__init__.py +2 -0
data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +90 -0
data_designer/engine/sampling_gen/entities/email_address_utils.py +171 -0
data_designer/engine/sampling_gen/entities/errors.py +10 -0
data_designer/engine/sampling_gen/entities/national_id_utils.py +102 -0
data_designer/engine/sampling_gen/entities/person.py +144 -0
data_designer/engine/sampling_gen/entities/phone_number.py +128 -0
data_designer/engine/sampling_gen/errors.py +26 -0
data_designer/engine/sampling_gen/generator.py +122 -0
data_designer/engine/sampling_gen/jinja_utils.py +64 -0
data_designer/engine/sampling_gen/people_gen.py +199 -0
data_designer/engine/sampling_gen/person_constants.py +56 -0
data_designer/engine/sampling_gen/schema.py +147 -0
data_designer/engine/sampling_gen/schema_builder.py +61 -0
data_designer/engine/sampling_gen/utils.py +46 -0
data_designer/engine/secret_resolver.py +82 -0
data_designer/engine/testing/__init__.py +12 -0
data_designer/engine/testing/stubs.py +133 -0
data_designer/engine/testing/utils.py +20 -0
data_designer/engine/validation.py +367 -0
data_designer/engine/validators/__init__.py +19 -0
data_designer/engine/validators/base.py +38 -0
data_designer/engine/validators/local_callable.py +39 -0
data_designer/engine/validators/python.py +254 -0
data_designer/engine/validators/remote.py +89 -0
data_designer/engine/validators/sql.py +65 -0
data_designer_engine-0.4.0.dist-info/METADATA +50 -0
data_designer_engine-0.4.0.dist-info/RECORD +114 -0
data_designer_engine-0.4.0.dist-info/WHEEL +4 -0

data_designer/engine/models/errors.py ADDED Viewed

@@ -0,0 +1,300 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+import logging
+from collections.abc import Callable
+from functools import wraps
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel
+from data_designer.engine.errors import DataDesignerError
+from data_designer.lazy_heavy_imports import litellm
+if TYPE_CHECKING:
+    import litellm
+logger = logging.getLogger(__name__)
+def get_exception_primary_cause(exception: BaseException) -> BaseException:
+    """Returns the primary cause of an exception by walking backwards.
+    This recursive walkback halts when it arrives at an exception which
+    has no provided __cause__ (e.g. __cause__ is None).
+    Args:
+        exception (Exception): An exception to start from.
+    Raises:
+        RecursionError: if for some reason exceptions have circular
+            dependencies (seems impossible in practice).
+    """
+    if exception.__cause__ is None:
+        return exception
+    else:
+        return get_exception_primary_cause(exception.__cause__)
+class GenerationValidationFailureError(Exception): ...
+class ModelRateLimitError(DataDesignerError): ...
+class ModelTimeoutError(DataDesignerError): ...
+class ModelContextWindowExceededError(DataDesignerError): ...
+class ModelAuthenticationError(DataDesignerError): ...
+class ModelPermissionDeniedError(DataDesignerError): ...
+class ModelNotFoundError(DataDesignerError): ...
+class ModelUnsupportedParamsError(DataDesignerError): ...
+class ModelBadRequestError(DataDesignerError): ...
+class ModelInternalServerError(DataDesignerError): ...
+class ModelAPIError(DataDesignerError): ...
+class ModelUnprocessableEntityError(DataDesignerError): ...
+class ModelAPIConnectionError(DataDesignerError): ...
+class ModelStructuredOutputError(DataDesignerError): ...
+class ModelGenerationValidationFailureError(DataDesignerError): ...
+class FormattedLLMErrorMessage(BaseModel):
+    cause: str
+    solution: str
+    def __str__(self) -> str:
+        return "\n".join(
+            [
+                "  |----------",
+                f"  | Cause: {self.cause}",
+                f"  | Solution: {self.solution}",
+                "  |----------",
+            ]
+        )
+def handle_llm_exceptions(
+    exception: Exception, model_name: str, model_provider_name: str, purpose: str | None = None
+) -> None:
+    """Handle LLM-related exceptions and convert them to appropriate DataDesignerError errors.
+    This method centralizes the exception handling logic for LLM operations,
+    making it reusable across different contexts.
+    Args:
+        exception: The exception that was raised
+        model_name: Name of the model that was being used
+        model_provider_name: Name of the model provider that was being used
+        purpose: The purpose of the model usage to show as context in the error message
+    Raises:
+        DataDesignerError: A more user-friendly error with appropriate error type and message
+    """
+    purpose = purpose or "running generation"
+    authentication_error = FormattedLLMErrorMessage(
+        cause=f"The API key provided for model {model_name!r} was found to be invalid or expired while {purpose}.",
+        solution=f"Verify your API key for model provider and update it in your settings for model provider {model_provider_name!r}.",
+    )
+    err_msg_parser = DownstreamLLMExceptionMessageParser(model_name, model_provider_name, purpose)
+    match exception:
+        # Common errors that can come from LiteLLM
+        case litellm.exceptions.APIError():
+            raise err_msg_parser.parse_api_error(exception, authentication_error) from None
+        case litellm.exceptions.APIConnectionError():
+            raise ModelAPIConnectionError(
+                FormattedLLMErrorMessage(
+                    cause=f"Connection to model {model_name!r} hosted on model provider {model_provider_name!r} failed while {purpose}.",
+                    solution="Check your network/proxy/firewall settings.",
+                )
+            ) from None
+        case litellm.exceptions.AuthenticationError():
+            raise ModelAuthenticationError(authentication_error) from None
+        case litellm.exceptions.ContextWindowExceededError():
+            raise err_msg_parser.parse_context_window_exceeded_error(exception) from None
+        case litellm.exceptions.UnsupportedParamsError():
+            raise ModelUnsupportedParamsError(
+                FormattedLLMErrorMessage(
+                    cause=f"One or more of the parameters you provided were found to be unsupported by model {model_name!r} while {purpose}.",
+                    solution=f"Review the documentation for model provider {model_provider_name!r} and adjust your request.",
+                )
+            ) from None
+        case litellm.exceptions.BadRequestError():
+            raise err_msg_parser.parse_bad_request_error(exception) from None
+        case litellm.exceptions.InternalServerError():
+            raise ModelInternalServerError(
+                FormattedLLMErrorMessage(
+                    cause=f"Model {model_name!r} is currently experiencing internal server issues while {purpose}.",
+                    solution=f"Try again in a few moments. Check with your model provider {model_provider_name!r} if the issue persists.",
+                )
+            ) from None
+        case litellm.exceptions.NotFoundError():
+            raise ModelNotFoundError(
+                FormattedLLMErrorMessage(
+                    cause=f"The specified model {model_name!r} could not be found while {purpose}.",
+                    solution=f"Check that the model name is correct and supported by your model provider {model_provider_name!r} and try again.",
+                )
+            ) from None
+        case litellm.exceptions.PermissionDeniedError():
+            raise ModelPermissionDeniedError(
+                FormattedLLMErrorMessage(
+                    cause=f"Your API key was found to lack the necessary permissions to use model {model_name!r} while {purpose}.",
+                    solution=f"Use an API key that has the right permissions for the model or use a model the API key in use has access to in model provider {model_provider_name!r}.",
+                )
+            ) from None
+        case litellm.exceptions.RateLimitError():
+            raise ModelRateLimitError(
+                FormattedLLMErrorMessage(
+                    cause=f"You have exceeded the rate limit for model {model_name!r} while {purpose}.",
+                    solution="Wait and try again in a few moments.",
+                )
+            ) from None
+        case litellm.exceptions.Timeout():
+            raise ModelTimeoutError(
+                FormattedLLMErrorMessage(
+                    cause=f"The request to model {model_name!r} timed out while {purpose}.",
+                    solution="Check your connection and try again. You may need to increase the timeout setting for the model.",
+                )
+            ) from None
+        case litellm.exceptions.UnprocessableEntityError():
+            raise ModelUnprocessableEntityError(
+                FormattedLLMErrorMessage(
+                    cause=f"The request to model {model_name!r} failed despite correct request format while {purpose}.",
+                    solution="This is most likely temporary. Try again in a few moments.",
+                )
+            ) from None
+        # Parsing and validation errors
+        case GenerationValidationFailureError():
+            raise ModelGenerationValidationFailureError(
+                FormattedLLMErrorMessage(
+                    cause=f"The provided output schema was unable to be parsed from model {model_name!r} responses while {purpose}.",
+                    solution="This is most likely temporary as we make additional attempts. If you continue to see more of this, simplify or modify the output schema for structured output and try again. If you are attempting token-intensive tasks like generations with high-reasoning effort, ensure that max_tokens in the model config is high enough to reach completion.",
+                )
+            ) from None
+        case DataDesignerError():
+            raise exception from None
+        case _:
+            raise DataDesignerError(
+                FormattedLLMErrorMessage(
+                    cause=f"An unexpected error occurred while {purpose}.",
+                    solution=f"Review the stack trace for more details: {exception}",
+                )
+            ) from exception
+def catch_llm_exceptions(func: Callable) -> Callable:
+    """This decorator should be used on any `ModelFacade` method that could potentially raise
+    exceptions that should turn into upstream user-facing errors.
+    """
+    @wraps(func)
+    def wrapper(model_facade: Any, *args, **kwargs):
+        try:
+            return func(model_facade, *args, **kwargs)
+        except Exception as e:
+            logger.debug(
+                "\n".join(
+                    [
+                        "",
+                        "|----------",
+                        f"| Caught an exception downstream of type {type(e)!r}. Re-raising it below as a custom error with more context.",
+                        "|----------",
+                    ]
+                ),
+                exc_info=True,
+                stack_info=True,
+            )
+            handle_llm_exceptions(
+                e, model_facade.model_name, model_facade.model_provider_name, purpose=kwargs.get("purpose")
+            )
+    return wrapper
+class DownstreamLLMExceptionMessageParser:
+    def __init__(self, model_name: str, model_provider_name: str, purpose: str):
+        self.model_name = model_name
+        self.model_provider_name = model_provider_name
+        self.purpose = purpose
+    def parse_bad_request_error(self, exception: litellm.exceptions.BadRequestError) -> DataDesignerError:
+        err_msg = FormattedLLMErrorMessage(
+            cause=f"The request for model {self.model_name!r} was found to be malformed or missing required parameters while {self.purpose}.",
+            solution="Check your request parameters and try again.",
+        )
+        if "is not a multimodal model" in str(exception):
+            err_msg = FormattedLLMErrorMessage(
+                cause=f"Model {self.model_name!r} is not a multimodal model, but it looks like you are trying to provide multimodal context while {self.purpose}.",
+                solution="Check your request parameters and try again.",
+            )
+        return ModelBadRequestError(err_msg)
+    def parse_context_window_exceeded_error(
+        self, exception: litellm.exceptions.ContextWindowExceededError
+    ) -> DataDesignerError:
+        cause = f"The input data for model '{self.model_name}' was found to exceed its supported context width while {self.purpose}."
+        try:
+            if "OpenAIException - This model's maximum context length is " in str(exception):
+                openai_exception_cause = (
+                    str(exception).split("OpenAIException - ")[1].split("\n")[0].split(" Please reduce ")[0]
+                )
+                cause = f"{cause} {openai_exception_cause}"
+        except Exception:
+            pass
+        finally:
+            return ModelContextWindowExceededError(
+                FormattedLLMErrorMessage(
+                    cause=cause,
+                    solution="Check the model's supported max context width. Adjust the length of your input along with completions and try again.",
+                )
+            )
+    def parse_api_error(
+        self, exception: litellm.exceptions.InternalServerError, auth_error_msg: FormattedLLMErrorMessage
+    ) -> DataDesignerError:
+        if "Error code: 403" in str(exception):
+            return ModelAuthenticationError(auth_error_msg)
+        return ModelAPIError(
+            FormattedLLMErrorMessage(
+                cause=f"An unexpected API error occurred with model {self.model_name!r} while {self.purpose}.",
+                solution=f"Try again in a few moments. Check with your model provider {self.model_provider_name!r} if the issue persists.",
+            )
+        )

data_designer/engine/models/facade.py ADDED Viewed

@@ -0,0 +1,284 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+import logging
+from collections.abc import Callable
+from copy import deepcopy
+from typing import TYPE_CHECKING, Any
+from data_designer.config.models import GenerationType, ModelConfig, ModelProvider
+from data_designer.engine.model_provider import ModelProviderRegistry
+from data_designer.engine.models.errors import (
+    GenerationValidationFailureError,
+    catch_llm_exceptions,
+    get_exception_primary_cause,
+)
+from data_designer.engine.models.litellm_overrides import CustomRouter, LiteLLMRouterDefaultKwargs
+from data_designer.engine.models.parsers.errors import ParserException
+from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
+from data_designer.engine.models.utils import ChatMessage, prompt_to_messages
+from data_designer.engine.secret_resolver import SecretResolver
+from data_designer.lazy_heavy_imports import litellm
+if TYPE_CHECKING:
+    import litellm
+logger = logging.getLogger(__name__)
+class ModelFacade:
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        secret_resolver: SecretResolver,
+        model_provider_registry: ModelProviderRegistry,
+    ):
+        self._model_config = model_config
+        self._secret_resolver = secret_resolver
+        self._model_provider_registry = model_provider_registry
+        self._litellm_deployment = self._get_litellm_deployment(model_config)
+        self._router = CustomRouter([self._litellm_deployment], **LiteLLMRouterDefaultKwargs().model_dump())
+        self._usage_stats = ModelUsageStats()
+    @property
+    def model_name(self) -> str:
+        return self._model_config.model
+    @property
+    def model_provider(self) -> ModelProvider:
+        return self._model_provider_registry.get_provider(self._model_config.provider)
+    @property
+    def model_generation_type(self) -> GenerationType:
+        return self._model_config.generation_type
+    @property
+    def model_provider_name(self) -> str:
+        return self.model_provider.name
+    @property
+    def model_alias(self) -> str:
+        return self._model_config.alias
+    @property
+    def usage_stats(self) -> ModelUsageStats:
+        return self._usage_stats
+    def completion(
+        self, messages: list[ChatMessage], skip_usage_tracking: bool = False, **kwargs
+    ) -> litellm.ModelResponse:
+        message_payloads = [message.to_dict() for message in messages]
+        logger.debug(
+            f"Prompting model {self.model_name!r}...",
+            extra={"model": self.model_name, "messages": message_payloads},
+        )
+        response = None
+        kwargs = self.consolidate_kwargs(**kwargs)
+        try:
+            response = self._router.completion(model=self.model_name, messages=message_payloads, **kwargs)
+            logger.debug(
+                f"Received completion from model {self.model_name!r}",
+                extra={
+                    "model": self.model_name,
+                    "response": response,
+                    "text": response.choices[0].message.content,
+                    "usage": self._usage_stats.model_dump(),
+                },
+            )
+            return response
+        except Exception as e:
+            raise e
+        finally:
+            if not skip_usage_tracking and response is not None:
+                self._track_usage(response)
+    def consolidate_kwargs(self, **kwargs) -> dict[str, Any]:
+        # Remove purpose from kwargs to avoid passing it to the model
+        kwargs.pop("purpose", None)
+        kwargs = {**self._model_config.inference_parameters.generate_kwargs, **kwargs}
+        if self.model_provider.extra_body:
+            kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body}
+        if self.model_provider.extra_headers:
+            kwargs["extra_headers"] = self.model_provider.extra_headers
+        return kwargs
+    @catch_llm_exceptions
+    def generate_text_embeddings(
+        self, input_texts: list[str], skip_usage_tracking: bool = False, **kwargs
+    ) -> list[list[float]]:
+        logger.debug(
+            f"Generating embeddings with model {self.model_name!r}...",
+            extra={
+                "model": self.model_name,
+                "input_count": len(input_texts),
+            },
+        )
+        kwargs = self.consolidate_kwargs(**kwargs)
+        response = None
+        try:
+            response = self._router.embedding(model=self.model_name, input=input_texts, **kwargs)
+            logger.debug(
+                f"Received embeddings from model {self.model_name!r}",
+                extra={
+                    "model": self.model_name,
+                    "embedding_count": len(response.data) if response.data else 0,
+                    "usage": self._usage_stats.model_dump(),
+                },
+            )
+            if response.data and len(response.data) == len(input_texts):
+                return [data["embedding"] for data in response.data]
+            else:
+                raise ValueError(f"Expected {len(input_texts)} embeddings, but received {len(response.data)}")
+        except Exception as e:
+            raise e
+        finally:
+            if not skip_usage_tracking and response is not None:
+                self._track_usage_from_embedding(response)
+    @catch_llm_exceptions
+    def generate(
+        self,
+        prompt: str,
+        *,
+        parser: Callable[[str], Any],
+        system_prompt: str | None = None,
+        multi_modal_context: list[dict[str, Any]] | None = None,
+        max_correction_steps: int = 0,
+        max_conversation_restarts: int = 0,
+        skip_usage_tracking: bool = False,
+        purpose: str | None = None,
+        **kwargs,
+    ) -> tuple[Any, list[ChatMessage]]:
+        """Generate a parsed output with correction steps.
+        This generation call will attempt to generate an output which is
+        valid according to the specified parser, where "valid" implies
+        that the parser can process the LLM response without raising
+        an exception.
+        `ParserExceptions` are routed back
+        to the LLM as new rounds in the conversation, where the LLM is provided its
+        earlier response along with the "user" role responding with the exception string
+        (not traceback). This will continue for the number of rounds specified by
+        `max_correction_steps`.
+        Args:
+            prompt (str): Task prompt.
+            system_prompt (str, optional): Optional system instructions. If not specified,
+                no system message is provided and the model should use its default system
+                prompt.
+            parser (func(str) -> Any): A function applied to the LLM response which processes
+                an LLM response into some output object.
+            max_correction_steps (int): Maximum number of correction rounds permitted
+                within a single conversation. Note, many rounds can lead to increasing
+                context size without necessarily improving performance -- small language
+                models can enter repeated cycles which will not be solved with more steps.
+                Default: `0` (no correction).
+            max_conversation_restarts (int): Maximum number of full conversation restarts permitted
+                if generation fails.  Default: `0` (no restarts).
+            skip_usage_tracking (bool): Whether to skip usage tracking. Default: `False`.
+            purpose (str): The purpose of the model usage to show as context in the error message.
+                It is expected to be used by the @catch_llm_exceptions decorator.
+            **kwargs: Additional arguments to pass to the model.
+        Returns:
+            A tuple containing:
+                - The parsed output object from the parser.
+                - The full trace of ChatMessage entries in the conversation, including any
+                  corrections and reasoning traces. Callers can decide whether to store this.
+        Raises:
+            GenerationValidationFailureError: If the maximum number of retries or
+                correction steps are met and the last response failures on
+                generation validation.
+        """
+        output_obj = None
+        curr_num_correction_steps = 0
+        curr_num_restarts = 0
+        starting_messages = prompt_to_messages(
+            user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context
+        )
+        messages: list[ChatMessage] = deepcopy(starting_messages)
+        while True:
+            completion_response = self.completion(messages, skip_usage_tracking=skip_usage_tracking, **kwargs)
+            response = completion_response.choices[0].message.content or ""
+            reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None)
+            messages.append(ChatMessage.as_assistant(content=response, reasoning_content=reasoning_trace or None))
+            curr_num_correction_steps += 1
+            try:
+                output_obj = parser(response)  # type: ignore - if not a string will cause a ParserException below
+                break
+            except ParserException as exc:
+                if max_correction_steps == 0 and max_conversation_restarts == 0:
+                    raise GenerationValidationFailureError(
+                        "Unsuccessful generation attempt. No retries were attempted."
+                    ) from exc
+                if curr_num_correction_steps <= max_correction_steps:
+                    # Add user message with error for correction
+                    messages.append(ChatMessage.as_user(content=str(get_exception_primary_cause(exc))))
+                elif curr_num_restarts < max_conversation_restarts:
+                    curr_num_correction_steps = 0
+                    curr_num_restarts += 1
+                    messages = deepcopy(starting_messages)
+                else:
+                    raise GenerationValidationFailureError(
+                        f"Unsuccessful generation despite {max_correction_steps} correction steps "
+                        f"and {max_conversation_restarts} conversation restarts."
+                    ) from exc
+        return output_obj, messages
+    def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
+        provider = self._model_provider_registry.get_provider(model_config.provider)
+        api_key = None
+        if provider.api_key:
+            api_key = self._secret_resolver.resolve(provider.api_key)
+        api_key = api_key or "not-used-but-required"
+        litellm_params = litellm.LiteLLM_Params(
+            model=f"{provider.provider_type}/{model_config.model}",
+            api_base=provider.endpoint,
+            api_key=api_key,
+        )
+        return {
+            "model_name": model_config.model,
+            "litellm_params": litellm_params.model_dump(),
+        }
+    def _track_usage(self, response: litellm.types.utils.ModelResponse | None) -> None:
+        if response is None:
+            self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1))
+            return
+        if (
+            response.usage is not None
+            and response.usage.prompt_tokens is not None
+            and response.usage.completion_tokens is not None
+        ):
+            self._usage_stats.extend(
+                token_usage=TokenUsageStats(
+                    input_tokens=response.usage.prompt_tokens,
+                    output_tokens=response.usage.completion_tokens,
+                ),
+                request_usage=RequestUsageStats(successful_requests=1, failed_requests=0),
+            )
+    def _track_usage_from_embedding(self, response: litellm.types.utils.EmbeddingResponse | None) -> None:
+        if response is None:
+            self._usage_stats.extend(request_usage=RequestUsageStats(successful_requests=0, failed_requests=1))
+            return
+        if response.usage is not None and response.usage.prompt_tokens is not None:
+            self._usage_stats.extend(
+                token_usage=TokenUsageStats(
+                    input_tokens=response.usage.prompt_tokens,
+                    output_tokens=0,
+                ),
+                request_usage=RequestUsageStats(successful_requests=1, failed_requests=0),
+            )

data_designer/engine/models/factory.py ADDED Viewed

@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from data_designer.config.models import ModelConfig
+from data_designer.engine.model_provider import ModelProviderRegistry
+from data_designer.engine.secret_resolver import SecretResolver
+if TYPE_CHECKING:
+    from data_designer.engine.models.registry import ModelRegistry
+def create_model_registry(
+    *,
+    model_configs: list[ModelConfig] | None = None,
+    secret_resolver: SecretResolver,
+    model_provider_registry: ModelProviderRegistry,
+) -> ModelRegistry:
+    """Factory function for creating a ModelRegistry instance.
+    Heavy dependencies (litellm, httpx) are deferred until this function is called.
+    This is a factory function pattern - imports inside factories are idiomatic Python
+    for lazy initialization.
+    """
+    from data_designer.engine.models.facade import ModelFacade
+    from data_designer.engine.models.litellm_overrides import apply_litellm_patches
+    from data_designer.engine.models.registry import ModelRegistry
+    apply_litellm_patches()
+    def model_facade_factory(model_config, secret_resolver, model_provider_registry):
+        return ModelFacade(model_config, secret_resolver, model_provider_registry)
+    return ModelRegistry(
+        model_configs=model_configs,
+        secret_resolver=secret_resolver,
+        model_provider_registry=model_provider_registry,
+        model_facade_factory=model_facade_factory,
+    )