PyPI - nvidia-nat - Versions diffs - 1.3.0a20250910__py3-none-any.whl → 1.3.0a20250917__py3-none-any.whl - Mend

nvidia-nat 1.3.0a20250910py3-none-any.whl → 1.3.0a20250917py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

nat/agent/base.py +9 -4
nat/agent/prompt_optimizer/prompt.py +68 -0
nat/agent/prompt_optimizer/register.py +149 -0
nat/agent/react_agent/agent.py +1 -1
nat/agent/react_agent/register.py +15 -5
nat/agent/reasoning_agent/reasoning_agent.py +6 -1
nat/agent/register.py +2 -0
nat/agent/rewoo_agent/agent.py +4 -2
nat/agent/rewoo_agent/register.py +8 -3
nat/agent/router_agent/__init__.py +0 -0
nat/agent/router_agent/agent.py +329 -0
nat/agent/router_agent/prompt.py +48 -0
nat/agent/router_agent/register.py +97 -0
nat/agent/tool_calling_agent/agent.py +69 -7
nat/agent/tool_calling_agent/register.py +11 -3
nat/builder/builder.py +27 -4
nat/builder/component_utils.py +7 -3
nat/builder/function.py +167 -0
nat/builder/function_info.py +1 -1
nat/builder/workflow.py +5 -0
nat/builder/workflow_builder.py +213 -16
nat/cli/commands/optimize.py +90 -0
nat/cli/commands/workflow/templates/config.yml.j2 +0 -1
nat/cli/commands/workflow/workflow_commands.py +4 -7
nat/cli/entrypoint.py +2 -0
nat/cli/register_workflow.py +38 -4
nat/cli/type_registry.py +71 -0
nat/data_models/component.py +2 -0
nat/data_models/component_ref.py +11 -0
nat/data_models/config.py +40 -16
nat/data_models/function.py +34 -0
nat/data_models/function_dependencies.py +8 -0
nat/data_models/optimizable.py +119 -0
nat/data_models/optimizer.py +149 -0
nat/data_models/temperature_mixin.py +4 -3
nat/data_models/top_p_mixin.py +4 -3
nat/embedder/nim_embedder.py +1 -1
nat/embedder/openai_embedder.py +1 -1
nat/eval/config.py +1 -1
nat/eval/evaluate.py +5 -1
nat/eval/register.py +4 -0
nat/eval/runtime_evaluator/__init__.py +14 -0
nat/eval/runtime_evaluator/evaluate.py +123 -0
nat/eval/runtime_evaluator/register.py +100 -0
nat/experimental/test_time_compute/functions/plan_select_execute_function.py +5 -1
nat/front_ends/fastapi/dask_client_mixin.py +43 -0
nat/front_ends/fastapi/fastapi_front_end_config.py +14 -3
nat/front_ends/fastapi/fastapi_front_end_plugin.py +111 -3
nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +243 -228
nat/front_ends/fastapi/job_store.py +518 -99
nat/front_ends/fastapi/main.py +11 -19
nat/front_ends/fastapi/utils.py +57 -0
nat/front_ends/mcp/mcp_front_end_plugin_worker.py +3 -2
nat/llm/aws_bedrock_llm.py +14 -3
nat/llm/nim_llm.py +14 -3
nat/llm/openai_llm.py +8 -1
nat/observability/exporter/processing_exporter.py +29 -55
nat/observability/mixin/redaction_config_mixin.py +5 -4
nat/observability/mixin/tagging_config_mixin.py +26 -14
nat/observability/mixin/type_introspection_mixin.py +401 -107
nat/observability/processor/processor.py +3 -0
nat/observability/processor/redaction/__init__.py +24 -0
nat/observability/processor/redaction/contextual_redaction_processor.py +125 -0
nat/observability/processor/redaction/contextual_span_redaction_processor.py +66 -0
nat/observability/processor/redaction/redaction_processor.py +177 -0
nat/observability/processor/redaction/span_header_redaction_processor.py +92 -0
nat/observability/processor/span_tagging_processor.py +21 -14
nat/profiler/decorators/framework_wrapper.py +9 -6
nat/profiler/parameter_optimization/__init__.py +0 -0
nat/profiler/parameter_optimization/optimizable_utils.py +93 -0
nat/profiler/parameter_optimization/optimizer_runtime.py +67 -0
nat/profiler/parameter_optimization/parameter_optimizer.py +149 -0
nat/profiler/parameter_optimization/parameter_selection.py +108 -0
nat/profiler/parameter_optimization/pareto_visualizer.py +380 -0
nat/profiler/parameter_optimization/prompt_optimizer.py +384 -0
nat/profiler/parameter_optimization/update_helpers.py +66 -0
nat/profiler/utils.py +3 -1
nat/tool/chat_completion.py +4 -1
nat/tool/github_tools.py +450 -0
nat/tool/register.py +2 -7
nat/utils/callable_utils.py +70 -0
nat/utils/exception_handlers/automatic_retries.py +103 -48
nat/utils/type_utils.py +4 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/METADATA +8 -1
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/RECORD +91 -71
nat/observability/processor/header_redaction_processor.py +0 -123
nat/observability/processor/redaction_processor.py +0 -77
nat/tool/github_tools/create_github_commit.py +0 -133
nat/tool/github_tools/create_github_issue.py +0 -87
nat/tool/github_tools/create_github_pr.py +0 -106
nat/tool/github_tools/get_github_file.py +0 -106
nat/tool/github_tools/get_github_issue.py +0 -166
nat/tool/github_tools/get_github_pr.py +0 -256
nat/tool/github_tools/update_github_issue.py +0 -100
/nat/{tool/github_tools → agent/prompt_optimizer}/__init__.py +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/WHEEL +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/entry_points.txt +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/licenses/LICENSE.md +0 -0
{nvidia_nat-1.3.0a20250910.dist-info → nvidia_nat-1.3.0a20250917.dist-info}/top_level.txt +0 -0

nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py CHANGED Viewed

@@ -14,9 +14,9 @@
 # limitations under the License.
 import asyncio
+import json
 import logging
 import os
-import time
 import typing
 from abc import ABC
 from abc import abstractmethod
@@ -25,7 +25,6 @@ from collections.abc import Callable
 from contextlib import asynccontextmanager
 from pathlib import Path
-from fastapi import BackgroundTasks
 from fastapi import Body
 from fastapi import FastAPI
 from fastapi import Request
@@ -58,18 +57,30 @@ from nat.front_ends.fastapi.fastapi_front_end_config import EvaluateRequest
 from nat.front_ends.fastapi.fastapi_front_end_config import EvaluateResponse
 from nat.front_ends.fastapi.fastapi_front_end_config import EvaluateStatusResponse
 from nat.front_ends.fastapi.fastapi_front_end_config import FastApiFrontEndConfig
-from nat.front_ends.fastapi.job_store import JobInfo
-from nat.front_ends.fastapi.job_store import JobStore
 from nat.front_ends.fastapi.message_handler import WebSocketMessageHandler
 from nat.front_ends.fastapi.response_helpers import generate_single_response
 from nat.front_ends.fastapi.response_helpers import generate_streaming_response_as_str
 from nat.front_ends.fastapi.response_helpers import generate_streaming_response_full_as_str
 from nat.front_ends.fastapi.step_adaptor import StepAdaptor
+from nat.front_ends.fastapi.utils import get_config_file_path
 from nat.object_store.models import ObjectStoreItem
+from nat.runtime.loader import load_workflow
 from nat.runtime.session import SessionManager
 logger = logging.getLogger(__name__)
+_DASK_AVAILABLE = False
+try:
+    from nat.front_ends.fastapi.job_store import JobInfo
+    from nat.front_ends.fastapi.job_store import JobStatus
+    from nat.front_ends.fastapi.job_store import JobStore
+    _DASK_AVAILABLE = True
+except ImportError:
+    JobInfo = None
+    JobStatus = None
+    JobStore = None
 class FastApiFrontEndPluginWorkerBase(ABC):
@@ -80,10 +91,29 @@ class FastApiFrontEndPluginWorkerBase(ABC):
                           FastApiFrontEndConfig), ("Front end config is not FastApiFrontEndConfig")
         self._front_end_config = config.general.front_end
-        self._cleanup_tasks: list[str] = []
-        self._cleanup_tasks_lock = asyncio.Lock()
+        self._dask_available = False
+        self._job_store = None
         self._http_flow_handler: HTTPAuthenticationFlowHandler | None = HTTPAuthenticationFlowHandler()
+        self._scheduler_address = os.environ.get("NAT_DASK_SCHEDULER_ADDRESS")
+        self._db_url = os.environ.get("NAT_JOB_STORE_DB_URL")
+        self._config_file_path = get_config_file_path()
+        if self._scheduler_address is not None:
+            if not _DASK_AVAILABLE:
+                raise RuntimeError("Dask is not available, please install it to use the FastAPI front end with Dask.")
+            if self._db_url is None:
+                raise RuntimeError(
+                    "NAT_JOB_STORE_DB_URL must be set when using Dask (configure a persistent JobStore database).")
+            try:
+                self._job_store = JobStore(scheduler_address=self._scheduler_address, db_url=self._db_url)
+                self._dask_available = True
+                logger.debug("Connected to Dask scheduler at %s", self._scheduler_address)
+            except Exception as e:
+                raise RuntimeError(f"Failed to connect to Dask scheduler at {self._scheduler_address}: {e}") from e
+        else:
+            logger.debug("No Dask scheduler address provided, running without Dask support.")
     @property
     def config(self) -> Config:
@@ -107,20 +137,6 @@ class FastApiFrontEndPluginWorkerBase(ABC):
                 yield
-                # If a cleanup task is running, cancel it
-                async with self._cleanup_tasks_lock:
-                    # Cancel all cleanup tasks
-                    for task_name in self._cleanup_tasks:
-                        cleanup_task: asyncio.Task | None = getattr(starting_app.state, task_name, None)
-                        if cleanup_task is not None:
-                            logger.info("Cancelling %s cleanup task", task_name)
-                            cleanup_task.cancel()
-                        else:
-                            logger.warning("No cleanup task found for %s", task_name)
-                    self._cleanup_tasks.clear()
             logger.debug("Closing NAT server from process %s", os.getpid())
         nat_app = FastAPI(lifespan=lifespan)
@@ -208,32 +224,6 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
         self._outstanding_flows: dict[str, FlowState] = {}
         self._outstanding_flows_lock = asyncio.Lock()
-    @staticmethod
-    async def _periodic_cleanup(name: str, job_store: JobStore, sleep_time_sec: int = 300):
-        while True:
-            try:
-                job_store.cleanup_expired_jobs()
-                logger.debug("Expired %s jobs cleaned up", name)
-            except Exception as e:
-                logger.exception("Error during %s job cleanup: %s", name, e)
-            await asyncio.sleep(sleep_time_sec)
-    async def create_cleanup_task(self, app: FastAPI, name: str, job_store: JobStore, sleep_time_sec: int = 300):
-        # Schedule periodic cleanup of expired jobs on first job creation
-        attr_name = f"{name}_cleanup_task"
-        # Cheap check, if it doesn't exist, we will need to re-check after we acquire the lock
-        if not hasattr(app.state, attr_name):
-            async with self._cleanup_tasks_lock:
-                if not hasattr(app.state, attr_name):
-                    logger.info("Starting %s periodic cleanup task", name)
-                    setattr(
-                        app.state,
-                        attr_name,
-                        asyncio.create_task(
-                            self._periodic_cleanup(name=name, job_store=job_store, sleep_time_sec=sleep_time_sec)))
-                    self._cleanup_tasks.append(attr_name)
     def get_step_adaptor(self) -> StepAdaptor:
         return StepAdaptor(self.front_end_config.step_adaptor)
@@ -276,52 +266,72 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             },
         }
-        # Create job store for tracking evaluation jobs
-        job_store = JobStore()
-        # Don't run multiple evaluations at the same time
-        evaluation_lock = asyncio.Lock()
-        async def run_evaluation(job_id: str, config_file: str, reps: int, session_manager: SessionManager):
+        # TODO: Find another way to limit the number of concurrent evaluations
+        async def run_evaluation(scheduler_address: str,
+                                 db_url: str,
+                                 workflow_config_file_path: str,
+                                 job_id: str,
+                                 eval_config_file: str,
+                                 reps: int):
             """Background task to run the evaluation."""
-            async with evaluation_lock:
-                try:
-                    # Create EvaluationRunConfig using the CLI defaults
-                    eval_config = EvaluationRunConfig(config_file=Path(config_file), dataset=None, reps=reps)
-                    # Create a new EvaluationRun with the evaluation-specific config
-                    job_store.update_status(job_id, "running")
-                    eval_runner = EvaluationRun(eval_config)
-                    output: EvaluationRunOutput = await eval_runner.run_and_evaluate(session_manager=session_manager,
-                                                                                     job_id=job_id)
-                    if output.workflow_interrupted:
-                        job_store.update_status(job_id, "interrupted")
-                    else:
-                        parent_dir = os.path.dirname(
-                            output.workflow_output_file) if output.workflow_output_file else None
-                        job_store.update_status(job_id, "success", output_path=str(parent_dir))
-                except Exception as e:
-                    logger.exception("Error in evaluation job %s: %s", job_id, str(e))
-                    job_store.update_status(job_id, "failure", error=str(e))
-        async def start_evaluation(request: EvaluateRequest, background_tasks: BackgroundTasks, http_request: Request):
+            job_store = JobStore(scheduler_address=scheduler_address, db_url=db_url)
+            try:
+                # We have two config files, one for the workflow and one for the evaluation
+                # Create EvaluationRunConfig using the CLI defaults
+                eval_config = EvaluationRunConfig(config_file=Path(eval_config_file), dataset=None, reps=reps)
+                # Create a new EvaluationRun with the evaluation-specific config
+                await job_store.update_status(job_id, JobStatus.RUNNING)
+                eval_runner = EvaluationRun(eval_config)
+                async with load_workflow(workflow_config_file_path) as local_session_manager:
+                    output: EvaluationRunOutput = await eval_runner.run_and_evaluate(
+                        session_manager=local_session_manager, job_id=job_id)
+                if output.workflow_interrupted:
+                    await job_store.update_status(job_id, JobStatus.INTERRUPTED)
+                else:
+                    parent_dir = os.path.dirname(output.workflow_output_file) if output.workflow_output_file else None
+                    await job_store.update_status(job_id, JobStatus.SUCCESS, output_path=str(parent_dir))
+            except Exception as e:
+                logger.exception("Error in evaluation job %s", job_id)
+                await job_store.update_status(job_id, JobStatus.FAILURE, error=str(e))
+        async def start_evaluation(request: EvaluateRequest, http_request: Request):
             """Handle evaluation requests."""
             async with session_manager.session(http_connection=http_request):
                 # if job_id is present and already exists return the job info
+                # There is a race condition between this check and the actual job submission, however if the client is
+                # supplying their own job_ids, then it is their responsibility to ensure that the job_id is unique.
                 if request.job_id:
-                    job = job_store.get_job(request.job_id)
-                    if job:
-                        return EvaluateResponse(job_id=job.job_id, status=job.status)
-                job_id = job_store.create_job(request.config_file, request.job_id, request.expiry_seconds)
-                await self.create_cleanup_task(app=app, name="async_evaluation", job_store=job_store)
-                background_tasks.add_task(run_evaluation, job_id, request.config_file, request.reps, session_manager)
-                return EvaluateResponse(job_id=job_id, status="submitted")
-        def translate_job_to_response(job: JobInfo) -> EvaluateStatusResponse:
+                    job_status = await self._job_store.get_status(request.job_id)
+                    if job_status != JobStatus.NOT_FOUND:
+                        return EvaluateResponse(job_id=request.job_id, status=job_status)
+                job_id = self._job_store.ensure_job_id(request.job_id)
+                await self._job_store.submit_job(job_id=job_id,
+                                                 config_file=request.config_file,
+                                                 expiry_seconds=request.expiry_seconds,
+                                                 job_fn=run_evaluation,
+                                                 job_args=[
+                                                     self._scheduler_address,
+                                                     self._db_url,
+                                                     self._config_file_path,
+                                                     job_id,
+                                                     request.config_file,
+                                                     request.reps
+                                                 ])
+                logger.info("Submitted evaluation job %s with config %s", job_id, request.config_file)
+                return EvaluateResponse(job_id=job_id, status=JobStatus.SUBMITTED)
+        def translate_job_to_response(job: "JobInfo") -> EvaluateStatusResponse:
             """Translate a JobInfo object to an EvaluateStatusResponse."""
             return EvaluateStatusResponse(job_id=job.job_id,
                                           status=job.status,
@@ -330,7 +340,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                                           output_path=str(job.output_path),
                                           created_at=job.created_at,
                                           updated_at=job.updated_at,
-                                          expires_at=job_store.get_expires_at(job))
+                                          expires_at=self._job_store.get_expires_at(job))
         async def get_job_status(job_id: str, http_request: Request) -> EvaluateStatusResponse:
             """Get the status of an evaluation job."""
@@ -338,7 +348,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             async with session_manager.session(http_connection=http_request):
-                job = job_store.get_job(job_id)
+                job = await self._job_store.get_job(job_id)
                 if not job:
                     logger.warning("Job %s not found", job_id)
                     raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
@@ -351,7 +361,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             async with session_manager.session(http_connection=http_request):
-                job = job_store.get_last_job()
+                job = await self._job_store.get_last_job()
                 if not job:
                     logger.warning("No jobs found when requesting last job status")
                     raise HTTPException(status_code=404, detail="No jobs found")
@@ -365,61 +375,65 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                 if status is None:
                     logger.info("Getting all jobs")
-                    jobs = job_store.get_all_jobs()
+                    jobs = await self._job_store.get_all_jobs()
                 else:
                     logger.info("Getting jobs with status %s", status)
-                    jobs = job_store.get_jobs_by_status(status)
+                    jobs = await self._job_store.get_jobs_by_status(JobStatus(status))
                 logger.info("Found %d jobs", len(jobs))
                 return [translate_job_to_response(job) for job in jobs]
         if self.front_end_config.evaluate.path:
-            # Add last job endpoint first (most specific)
-            app.add_api_route(
-                path=f"{self.front_end_config.evaluate.path}/job/last",
-                endpoint=get_last_job_status,
-                methods=["GET"],
-                response_model=EvaluateStatusResponse,
-                description="Get the status of the last created evaluation job",
-                responses={
-                    404: {
-                        "description": "No jobs found"
-                    }, 500: response_500
-                },
-            )
-            # Add specific job endpoint (least specific)
-            app.add_api_route(
-                path=f"{self.front_end_config.evaluate.path}/job/{{job_id}}",
-                endpoint=get_job_status,
-                methods=["GET"],
-                response_model=EvaluateStatusResponse,
-                description="Get the status of an evaluation job",
-                responses={
-                    404: {
-                        "description": "Job not found"
-                    }, 500: response_500
-                },
-            )
-            # Add jobs endpoint with optional status query parameter
-            app.add_api_route(
-                path=f"{self.front_end_config.evaluate.path}/jobs",
-                endpoint=get_jobs,
-                methods=["GET"],
-                response_model=list[EvaluateStatusResponse],
-                description="Get all jobs, optionally filtered by status",
-                responses={500: response_500},
-            )
+            if self._dask_available:
+                # Add last job endpoint first (most specific)
+                app.add_api_route(
+                    path=f"{self.front_end_config.evaluate.path}/job/last",
+                    endpoint=get_last_job_status,
+                    methods=["GET"],
+                    response_model=EvaluateStatusResponse,
+                    description="Get the status of the last created evaluation job",
+                    responses={
+                        404: {
+                            "description": "No jobs found"
+                        }, 500: response_500
+                    },
+                )
-            # Add HTTP endpoint for evaluation
-            app.add_api_route(
-                path=self.front_end_config.evaluate.path,
-                endpoint=start_evaluation,
-                methods=[self.front_end_config.evaluate.method],
-                response_model=EvaluateResponse,
-                description=self.front_end_config.evaluate.description,
-                responses={500: response_500},
-            )
+                # Add specific job endpoint (least specific)
+                app.add_api_route(
+                    path=f"{self.front_end_config.evaluate.path}/job/{{job_id}}",
+                    endpoint=get_job_status,
+                    methods=["GET"],
+                    response_model=EvaluateStatusResponse,
+                    description="Get the status of an evaluation job",
+                    responses={
+                        404: {
+                            "description": "Job not found"
+                        }, 500: response_500
+                    },
+                )
+                # Add jobs endpoint with optional status query parameter
+                app.add_api_route(
+                    path=f"{self.front_end_config.evaluate.path}/jobs",
+                    endpoint=get_jobs,
+                    methods=["GET"],
+                    response_model=list[EvaluateStatusResponse],
+                    description="Get all jobs, optionally filtered by status",
+                    responses={500: response_500},
+                )
+                # Add HTTP endpoint for evaluation
+                app.add_api_route(
+                    path=self.front_end_config.evaluate.path,
+                    endpoint=start_evaluation,
+                    methods=[self.front_end_config.evaluate.method],
+                    response_model=EvaluateResponse,
+                    description=self.front_end_config.evaluate.description,
+                    responses={500: response_500},
+                )
+            else:
+                logger.warning("Dask is not available, evaluation endpoints will not be added.")
     async def add_static_files_route(self, app: FastAPI, builder: WorkflowBuilder):
@@ -526,21 +540,23 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
         GenerateStreamResponseType = workflow.streaming_output_schema
         GenerateSingleResponseType = workflow.single_output_schema
-        # Append job_id and expiry_seconds to the input schema, this effectively makes these reserved keywords
-        # Consider prefixing these with "nat_" to avoid conflicts
-        class AsyncGenerateRequest(GenerateBodyType):
-            job_id: str | None = Field(default=None, description="Unique identifier for the evaluation job")
-            sync_timeout: int = Field(
-                default=0,
-                ge=0,
-                le=300,
-                description="Attempt to perform the job synchronously up until `sync_timeout` sectonds, "
-                "if the job hasn't been completed by then a job_id will be returned with a status code of 202.")
-            expiry_seconds: int = Field(default=JobStore.DEFAULT_EXPIRY,
-                                        ge=JobStore.MIN_EXPIRY,
-                                        le=JobStore.MAX_EXPIRY,
-                                        description="Optional time (in seconds) before the job expires. "
-                                        "Clamped between 600 (10 min) and 86400 (24h).")
+        if self._dask_available:
+            # Append job_id and expiry_seconds to the input schema, this effectively makes these reserved keywords
+            # Consider prefixing these with "nat_" to avoid conflicts
+            class AsyncGenerateRequest(GenerateBodyType):
+                job_id: str | None = Field(default=None, description="Unique identifier for the evaluation job")
+                sync_timeout: int = Field(
+                    default=0,
+                    ge=0,
+                    le=300,
+                    description="Attempt to perform the job synchronously up until `sync_timeout` sectonds, "
+                    "if the job hasn't been completed by then a job_id will be returned with a status code of 202.")
+                expiry_seconds: int = Field(default=JobStore.DEFAULT_EXPIRY,
+                                            ge=JobStore.MIN_EXPIRY,
+                                            le=JobStore.MAX_EXPIRY,
+                                            description="Optional time (in seconds) before the job expires. "
+                                            "Clamped between 600 (10 min) and 86400 (24h).")
         # Ensure that the input is in the body. POD types are treated as query parameters
         if (not issubclass(GenerateBodyType, BaseModel)):
@@ -560,12 +576,6 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             },
         }
-        # Create job store for tracking async generation jobs
-        job_store = JobStore()
-        # Run up to max_running_async_jobs jobs at the same time
-        async_job_concurrency = asyncio.Semaphore(self._front_end_config.max_running_async_jobs)
         def get_single_endpoint(result_type: type | None):
             async def get_single(response: Response, request: Request):
@@ -726,34 +736,44 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             return post_openai_api_compatible
-        async def run_generation(job_id: str, payload: typing.Any, session_manager: SessionManager, result_type: type):
-            """Background task to run the evaluation."""
-            async with async_job_concurrency:
-                try:
-                    result = await generate_single_response(payload=payload,
-                                                            session_manager=session_manager,
-                                                            result_type=result_type)
-                    job_store.update_status(job_id, "success", output=result)
-                except Exception as e:
-                    logger.exception("Error in evaluation job %s: %s", job_id, e)
-                    job_store.update_status(job_id, "failure", error=str(e))
-        def _job_status_to_response(job: JobInfo) -> AsyncGenerationStatusResponse:
+        def _job_status_to_response(job: "JobInfo") -> AsyncGenerationStatusResponse:
             job_output = job.output
             if job_output is not None:
-                job_output = job_output.model_dump()
+                try:
+                    job_output = json.loads(job_output)
+                except json.JSONDecodeError:
+                    logger.error("Failed to parse job output as JSON: %s", job_output)
+                    job_output = {"error": "Output parsing failed"}
             return AsyncGenerationStatusResponse(job_id=job.job_id,
                                                  status=job.status,
                                                  error=job.error,
                                                  output=job_output,
                                                  created_at=job.created_at,
                                                  updated_at=job.updated_at,
-                                                 expires_at=job_store.get_expires_at(job))
+                                                 expires_at=self._job_store.get_expires_at(job))
+        async def run_generation(scheduler_address: str,
+                                 db_url: str,
+                                 config_file_path: str,
+                                 job_id: str,
+                                 payload: typing.Any):
+            """Background task to run the workflow."""
+            job_store = JobStore(scheduler_address=scheduler_address, db_url=db_url)
+            try:
+                async with load_workflow(config_file_path) as local_session_manager:
+                    result = await generate_single_response(
+                        payload, local_session_manager, result_type=local_session_manager.workflow.single_output_schema)
-        def post_async_generation(request_type: type, final_result_type: type):
+                await job_store.update_status(job_id, JobStatus.SUCCESS, output=result)
+            except Exception as e:
+                logger.exception("Error in async job %s", job_id)
+                await job_store.update_status(job_id, JobStatus.FAILURE, error=str(e))
+        def post_async_generation(request_type: type):
             async def start_async_generation(
-                    request: request_type, background_tasks: BackgroundTasks, response: Response,
+                    request: request_type, response: Response,
                     http_request: Request) -> AsyncGenerateResponse | AsyncGenerationStatusResponse:
                 """Handle async generation requests."""
@@ -761,41 +781,29 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                     # if job_id is present and already exists return the job info
                     if request.job_id:
-                        job = job_store.get_job(request.job_id)
+                        job = await self._job_store.get_job(request.job_id)
                         if job:
                             return AsyncGenerateResponse(job_id=job.job_id, status=job.status)
-                    job_id = job_store.create_job(job_id=request.job_id, expiry_seconds=request.expiry_seconds)
-                    await self.create_cleanup_task(app=app, name="async_generation", job_store=job_store)
-                    # The fastapi/starlette background tasks won't begin executing until after the response is sent
-                    # to the client, so we need to wrap the task in a function, alowing us to start the task now,
-                    # and allowing the background task function to await the results.
-                    task = asyncio.create_task(
-                        run_generation(job_id=job_id,
-                                       payload=request,
-                                       session_manager=session_manager,
-                                       result_type=final_result_type))
-                    async def wrapped_task(t: asyncio.Task):
-                        return await t
-                    background_tasks.add_task(wrapped_task, task)
-                    now = time.time()
-                    sync_timeout = now + request.sync_timeout
-                    while time.time() < sync_timeout:
-                        job = job_store.get_job(job_id)
-                        if job is not None and job.status not in job_store.ACTIVE_STATUS:
-                            # If the job is done, return the result
-                            response.status_code = 200
-                            return _job_status_to_response(job)
-                        # Sleep for a short time before checking again
-                        await asyncio.sleep(0.1)
+                    job_id = self._job_store.ensure_job_id(request.job_id)
+                    (_, job) = await self._job_store.submit_job(job_id=job_id,
+                                                                expiry_seconds=request.expiry_seconds,
+                                                                job_fn=run_generation,
+                                                                sync_timeout=request.sync_timeout,
+                                                                job_args=[
+                                                                    self._scheduler_address,
+                                                                    self._db_url,
+                                                                    self._config_file_path,
+                                                                    job_id,
+                                                                    request.model_dump(mode="json")
+                                                                ])
+                    if job is not None:
+                        response.status_code = 200
+                        return _job_status_to_response(job)
                     response.status_code = 202
-                    return AsyncGenerateResponse(job_id=job_id, status="submitted")
+                    return AsyncGenerateResponse(job_id=job_id, status=JobStatus.SUBMITTED)
             return start_async_generation
@@ -805,8 +813,8 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
             async with session_manager.session(http_connection=http_request):
-                job = job_store.get_job(job_id)
-                if not job:
+                job = await self._job_store.get_job(job_id)
+                if job is None:
                     logger.warning("Job %s not found", job_id)
                     raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
@@ -934,30 +942,33 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
                     responses={500: response_500},
                 )
-                app.add_api_route(
-                    path=f"{endpoint.path}/async",
-                    endpoint=post_async_generation(request_type=AsyncGenerateRequest,
-                                                   final_result_type=GenerateSingleResponseType),
-                    methods=[endpoint.method],
-                    response_model=AsyncGenerateResponse | AsyncGenerationStatusResponse,
-                    description="Start an async generate job",
-                    responses={500: response_500},
-                )
+                if self._dask_available:
+                    app.add_api_route(
+                        path=f"{endpoint.path}/async",
+                        endpoint=post_async_generation(request_type=AsyncGenerateRequest),
+                        methods=[endpoint.method],
+                        response_model=AsyncGenerateResponse | AsyncGenerationStatusResponse,
+                        description="Start an async generate job",
+                        responses={500: response_500},
+                    )
+                else:
+                    logger.warning("Dask is not available, async generation endpoints will not be added.")
             else:
                 raise ValueError(f"Unsupported method {endpoint.method}")
-            app.add_api_route(
-                path=f"{endpoint.path}/async/job/{{job_id}}",
-                endpoint=get_async_job_status,
-                methods=["GET"],
-                response_model=AsyncGenerationStatusResponse,
-                description="Get the status of an async job",
-                responses={
-                    404: {
-                        "description": "Job not found"
-                    }, 500: response_500
-                },
-            )
+            if self._dask_available:
+                app.add_api_route(
+                    path=f"{endpoint.path}/async/job/{{job_id}}",
+                    endpoint=get_async_job_status,
+                    methods=["GET"],
+                    response_model=AsyncGenerationStatusResponse,
+                    description="Get the status of an async job",
+                    responses={
+                        404: {
+                            "description": "Job not found"
+                        }, 500: response_500
+                    },
+                )
         if (endpoint.openai_api_path):
             if (endpoint.method == "GET"):
@@ -1084,3 +1095,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
     async def _remove_flow(self, state: str):
         async with self._outstanding_flows_lock:
             del self._outstanding_flows[state]
+# Prevent Sphinx from documenting items not a part of the public API
+__all__ = ["FastApiFrontEndPluginWorkerBase", "FastApiFrontEndPluginWorker", "RouteInfo"]

nvidia-nat 1.3.0a20250910__py3-none-any.whl → 1.3.0a20250917__py3-none-any.whl

nvidia-nat 1.3.0a20250910py3-none-any.whl → 1.3.0a20250917py3-none-any.whl