PyPI - aiqtoolkit - Versions diffs - 1.2.0a20250626__py3-none-any.whl → 1.2.0a20250628__py3-none-any.whl - Mend

aiqtoolkit 1.2.0a20250626py3-none-any.whl → 1.2.0a20250628py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (20) hide show

aiq/data_models/dataset_handler.py CHANGED Viewed

@@ -30,7 +30,8 @@ from aiq.data_models.common import TypedBaseModel
 class EvalS3Config(BaseModel):
-    endpoint_url: str
+    endpoint_url: str | None = None
+    region_name: str | None = None
     bucket: str
     access_key: str
     secret_key: str

aiq/data_models/evaluate.py CHANGED Viewed

@@ -72,6 +72,10 @@ class EvalOutputConfig(BaseModel):
 class EvalGeneralConfig(BaseModel):
     max_concurrency: int = 8
+    # Workflow alias for displaying in evaluation UI, if not provided,
+    # the workflow type will be used
+    workflow_alias: str | None = None
     # Output directory for the workflow and evaluation results
     output_dir: Path = Path("/tmp/aiq/examples/default/")

aiq/data_models/profiler.py CHANGED Viewed

@@ -42,6 +42,7 @@ class PrefixSpanConfig(BaseModel):
 class ProfilerConfig(BaseModel):
+    base_metrics: bool = False
     token_usage_forecast: bool = False
     token_uniqueness_forecast: bool = False
     workflow_runtime_forecast: bool = False

aiq/eval/dataset_handler/dataset_handler.py CHANGED Viewed

@@ -152,6 +152,16 @@ class DatasetHandler:
         allow re-running evaluation using the orignal config file and '--skip_workflow' option.
         """
+        def parse_if_json_string(value):
+            if isinstance(value, str):
+                try:
+                    return json.loads(value)
+                except json.JSONDecodeError:
+                    return value
+            if hasattr(value, "model_dump"):
+                return value.model_dump()
+            return value
         indent = 2
         if self.is_structured_input():
             # Extract structured data from EvalInputItems
@@ -165,6 +175,6 @@ class DatasetHandler:
             } for item in eval_input.eval_input_items]
         else:
             # Unstructured case: return only raw output objects as a JSON array
-            data = [json.loads(item.output_obj) for item in eval_input.eval_input_items]
+            data = [parse_if_json_string(item.output_obj) for item in eval_input.eval_input_items]
         return json.dumps(data, indent=indent, ensure_ascii=False, default=str)

aiq/eval/evaluate.py CHANGED Viewed

@@ -31,8 +31,12 @@ from aiq.eval.dataset_handler.dataset_handler import DatasetHandler
 from aiq.eval.evaluator.evaluator_model import EvalInput
 from aiq.eval.evaluator.evaluator_model import EvalInputItem
 from aiq.eval.evaluator.evaluator_model import EvalOutput
+from aiq.eval.usage_stats import UsageStats
+from aiq.eval.usage_stats import UsageStatsItem
+from aiq.eval.usage_stats import UsageStatsLLM
 from aiq.eval.utils.output_uploader import OutputUploader
 from aiq.eval.utils.weave_eval import WeaveEvaluationIntegration
+from aiq.profiler.data_models import ProfilerResults
 from aiq.runtime.session import AIQSessionManager
 logger = logging.getLogger(__name__)
@@ -63,12 +67,46 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         # evaluation_results is list of tuples (evaluator_name, EvalOutput)
         self.evaluation_results: list[tuple[str, EvalOutput]] = []
+        # usage stats
+        self.usage_stats: UsageStats = UsageStats()
         # workflow output file
         self.workflow_output_file: Path | None = None
         # evaluation output files
         self.evaluator_output_files: list[Path] = []
+    def _compute_usage_stats(self, item: EvalInputItem):
+        """Compute usage stats for a single item using the intermediate steps"""
+        # get the prompt and completion tokens from the intermediate steps
+        from aiq.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
+        steps = [IntermediatePropertyAdaptor.from_intermediate_step(step) for step in item.trajectory]
+        usage_stats_per_llm = {}
+        total_tokens = 0
+        for step in steps:
+            if step.event_type == "LLM_END":
+                llm_name = step.llm_name
+                if llm_name not in usage_stats_per_llm:
+                    usage_stats_per_llm[llm_name] = UsageStatsLLM()
+                usage_stats_per_llm[llm_name].prompt_tokens += step.token_usage.prompt_tokens
+                usage_stats_per_llm[llm_name].completion_tokens += step.token_usage.completion_tokens
+                usage_stats_per_llm[llm_name].total_tokens += step.token_usage.total_tokens
+                total_tokens += step.token_usage.total_tokens
+        # find min and max event timestamps
+        if item.trajectory:
+            min_timestamp = min(step.event_timestamp for step in item.trajectory)
+            max_timestamp = max(step.event_timestamp for step in item.trajectory)
+            runtime = max_timestamp - min_timestamp
+        else:
+            runtime = 0.0
+        # add the usage stats to the usage stats dict
+        self.usage_stats.usage_stats_items[item.id] = UsageStatsItem(usage_stats_per_llm=usage_stats_per_llm,
+                                                                     runtime=runtime,
+                                                                     total_tokens=total_tokens)
+        return self.usage_stats.usage_stats_items[item.id]
     async def run_workflow_local(self, session_manager: AIQSessionManager):
         '''
         Launch the workflow with the specified questions and extract the output using the jsonpath
@@ -138,8 +176,10 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
                 item.output_obj = output
                 item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
+                usage_stats_item = self._compute_usage_stats(item)
                 self.weave_eval.log_prediction(item, output)
+                await self.weave_eval.log_usage_stats(item, usage_stats_item)
         async def wrapped_run(item: EvalInputItem) -> None:
             await run_one(item)
@@ -161,15 +201,19 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         from aiq.eval.remote_workflow import EvaluationRemoteWorkflowHandler
         handler = EvaluationRemoteWorkflowHandler(self.config, self.eval_config.general.max_concurrency)
         await handler.run_workflow_remote(self.eval_input)
+        for item in self.eval_input.eval_input_items:
+            usage_stats_item = self._compute_usage_stats(item)
+            self.weave_eval.log_prediction(item, item.output_obj)
+            await self.weave_eval.log_usage_stats(item, usage_stats_item)
-    async def profile_workflow(self):
+    async def profile_workflow(self) -> ProfilerResults:
         """
         Profile a dataset
         """
         if not self.eval_config.general.profiler:
             logger.info("Profiler is not enabled. Skipping profiling.")
-            return
+            return ProfilerResults()
         from aiq.profiler.profile_runner import ProfilerRunner
@@ -179,7 +223,7 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         profiler_runner = ProfilerRunner(self.eval_config.general.profiler, self.eval_config.general.output_dir)
-        await profiler_runner.run(all_stats)
+        return await profiler_runner.run(all_stats)
     def cleanup_output_directory(self):
         '''Remove contents of the output directory if it exists'''
@@ -238,7 +282,7 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
             except Exception as e:
                 logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e, exc_info=True)
-    def write_output(self, dataset_handler: DatasetHandler):
+    def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
         workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
         workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
@@ -271,7 +315,7 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
                    "`eval` with the --skip_completed_entries flag.")
             logger.warning(msg)
-        self.weave_eval.log_summary(self.evaluation_results)
+        self.weave_eval.log_summary(self.usage_stats, self.evaluation_results, profiler_results)
     async def run_single_evaluator(self, evaluator_name: str, evaluator: Any):
         """Run a single evaluator and store its results."""
@@ -314,6 +358,16 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         config = validate_schema(config_dict, AIQConfig)
         return config
+    def _get_workflow_alias(self, workflow_type: str | None = None):
+        """Get the workflow alias for displaying in evaluation UI."""
+        if self.eval_config.general.workflow_alias:
+            return self.eval_config.general.workflow_alias
+        if not workflow_type or workflow_type == "EmptyFunctionConfig":
+            return "aiqtoolkit-eval"
+        return workflow_type
     async def run_and_evaluate(self,
                                session_manager: AIQSessionManager | None = None,
                                job_id: str | None = None) -> EvaluationRunOutput:
@@ -331,7 +385,8 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         else:
             config = load_config(self.config.config_file)
         self.eval_config = config.eval
-        logger.debug("Loaded evaluation configuration: %s", self.eval_config)
+        workflow_alias = self._get_workflow_alias(config.workflow.type)
+        logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
         # Cleanup the output directory
         if self.eval_config.general.output:
@@ -373,10 +428,9 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         # Run workflow and evaluate
         async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
             # Initialize Weave integration
-            self.weave_eval.initialize_client()
-            if self.weave_eval.client:
-                self.weave_eval.initialize_logger(self.eval_input, config)
+            self.weave_eval.initialize_logger(workflow_alias, self.eval_input, config)
+            # Run workflow
             if self.config.endpoint:
                 await self.run_workflow_remote()
             else:
@@ -391,10 +445,10 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
             await self.run_evaluators(evaluators)
         # Profile the workflow
-        await self.profile_workflow()
+        profiler_results = await self.profile_workflow()
         # Write the results to the output directory
-        self.write_output(dataset_handler)
+        self.write_output(dataset_handler, profiler_results)
         # Run custom scripts and upload evaluation outputs to S3
         if self.eval_config.general.output:

aiq/eval/usage_stats.py ADDED Viewed

@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from pydantic import BaseModel
+class UsageStatsLLM(BaseModel):
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+class UsageStatsItem(BaseModel):
+    usage_stats_per_llm: dict[str, UsageStatsLLM]
+    total_tokens: int | None = None
+    runtime: float = 0.0
+class UsageStats(BaseModel):
+    # key is the id or input_obj from EvalInputItem
+    usage_stats_items: dict[typing.Any, UsageStatsItem] = {}

aiq/eval/utils/output_uploader.py CHANGED Viewed

@@ -78,9 +78,18 @@ class OutputUploader:
         session = aioboto3.Session()
         try:
+            if self.s3_config.endpoint_url:
+                region_name = None
+                endpoint_url = self.s3_config.endpoint_url
+            elif self.s3_config.region_name:
+                region_name = self.s3_config.region_name
+                endpoint_url = None
+            else:
+                raise ValueError("No endpoint_url or region_name provided in the config: eval.general.output.s3")
             async with session.client(
                     "s3",
-                    endpoint_url=self.s3_config.endpoint_url,
+                    endpoint_url=endpoint_url,
+                    region_name=region_name,
                     aws_access_key_id=self.s3_config.access_key,
                     aws_secret_access_key=self.s3_config.secret_key,
             ) as s3_client:

aiq/eval/utils/weave_eval.py CHANGED Viewed

@@ -16,11 +16,13 @@
 import asyncio
 import logging
 from typing import Any
-from typing import List
 from aiq.eval.evaluator.evaluator_model import EvalInput
 from aiq.eval.evaluator.evaluator_model import EvalInputItem
 from aiq.eval.evaluator.evaluator_model import EvalOutput
+from aiq.eval.usage_stats import UsageStats
+from aiq.eval.usage_stats import UsageStatsItem
+from aiq.profiler.data_models import ProfilerResults
 logger = logging.getLogger(__name__)
@@ -61,23 +63,35 @@ class WeaveEvaluationIntegration:  # pylint: disable=too-many-public-methods
             self.client = None
             return False
-    def initialize_logger(self, eval_input: EvalInput, config: Any):
+    def _get_prediction_inputs(self, item: EvalInputItem):
+        """Get the inputs for displaying in the UI.
+        The following fields are excluded as they are too large to display in the UI:
+        - full_dataset_entry
+        - expected_trajectory
+        - trajectory
+        output_obj is excluded because it is displayed separately.
+        """
+        include = {"id", "input_obj", "expected_output_obj"}
+        return item.model_dump(include=include)
+    def _get_weave_dataset(self, eval_input: EvalInput):
+        """Get the full dataset for Weave."""
+        return [item.full_dataset_entry for item in eval_input.eval_input_items]
+    def initialize_logger(self, workflow_alias: str, eval_input: EvalInput, config: Any):
         """Initialize the Weave evaluation logger."""
-        if not self.client:
+        if not self.client and not self.initialize_client():
+            # lazy init the client
             return False
         try:
-            weave_dataset = [
-                item.model_dump(exclude={"output_obj", "trajectory"}) for item in eval_input.eval_input_items
-            ]
+            weave_dataset = self._get_weave_dataset(eval_input)
             config_dict = config.model_dump(mode="json")
-            # TODO: make this configurable
-            config_dict["name"] = "aiqtoolkit-eval"
+            config_dict["name"] = workflow_alias
             self.eval_logger = self.EvaluationLogger(model=config_dict, dataset=weave_dataset)
             self.pred_loggers = {}
-            del weave_dataset
-            del config_dict
             return True
         except Exception as e:
             self.eval_logger = None
@@ -90,21 +104,37 @@ class WeaveEvaluationIntegration:  # pylint: disable=too-many-public-methods
         if not self.eval_logger:
             return
-        pred_logger = self.eval_logger.log_prediction(inputs=item.model_dump(exclude={"output_obj", "trajectory"}),
-                                                      output=output)
+        pred_logger = self.eval_logger.log_prediction(inputs=self._get_prediction_inputs(item), output=output)
         self.pred_loggers[item.id] = pred_logger
+    async def log_usage_stats(self, item: EvalInputItem, usage_stats_item: UsageStatsItem):
+        """Log usage stats to Weave."""
+        if not self.eval_logger:
+            return
+        # log each usage stat as a score
+        await self.pred_loggers[item.id].alog_score(scorer="wf_runtime", score=usage_stats_item.runtime)
+        # log the total tokens for this item, per-llm tokens can be exported later if needed
+        await self.pred_loggers[item.id].alog_score(scorer="wf_tokens", score=usage_stats_item.total_tokens)
     async def alog_score(self, eval_output: EvalOutput, evaluator_name: str):
         """Log scores for evaluation outputs."""
         if not self.eval_logger:
             return
+        # Create coroutines for all score logging operations
+        coros = []
         for eval_output_item in eval_output.eval_output_items:
             if eval_output_item.id in self.pred_loggers:
-                await self.pred_loggers[eval_output_item.id].alog_score(
+                coros.append(self.pred_loggers[eval_output_item.id].alog_score(
                     scorer=evaluator_name,
                     score=eval_output_item.score,
-                )
+                ))
+        # Execute all coroutines concurrently
+        if coros:
+            await asyncio.gather(*coros)
     async def afinish_loggers(self):
         """Finish all prediction loggers."""
@@ -114,22 +144,37 @@ class WeaveEvaluationIntegration:  # pylint: disable=too-many-public-methods
         async def _finish_one(pred_logger):
             if hasattr(pred_logger, '_has_finished') and not pred_logger._has_finished:
                 return
-            # run the *blocking* finish() in a thread so we don’t nest loops
+            # run the *blocking* finish() in a thread so we don't nest loops
             await asyncio.to_thread(pred_logger.finish)
         await asyncio.gather(*[_finish_one(pl) for pl in self.pred_loggers.values()])
-    def log_summary(self, evaluation_results: List[tuple[str, EvalOutput]]):
+    def _log_profiler_metrics(self, profiler_results: ProfilerResults, usage_stats: UsageStats) -> dict[str, Any]:
+        """Log profiler metrics to Weave."""
+        profile_metrics = {}
+        if profiler_results.workflow_runtime_metrics:
+            profile_metrics["wf_p95_runtime"] = profiler_results.workflow_runtime_metrics.p95
+        # TODO:get the LLM tokens from the usage stats and log them
+        return profile_metrics
+    def log_summary(self,
+                    usage_stats: UsageStats,
+                    evaluation_results: list[tuple[str, EvalOutput]],
+                    profiler_results: ProfilerResults):
         """Log summary statistics to Weave."""
         if not self.eval_logger:
             return
         summary = {}
+        # add evaluation results to the summary
         for evaluator_name, eval_output in evaluation_results:
-            # Calculate average score for this evaluator
-            scores = [item.score for item in eval_output.eval_output_items if item.score is not None]
-            if scores:
-                summary[f"{evaluator_name}_avg"] = sum(scores) / len(scores)
+            summary[evaluator_name] = eval_output.average_score
+        # add profiler metrics to the summary
+        profile_metrics = self._log_profiler_metrics(profiler_results, usage_stats)
+        summary.update(profile_metrics)
-        # Log the summary to finish the evaluation
-        self.eval_logger.log_summary(summary)
+        # Log the summary to finish the evaluation, disable auto-summarize
+        # as we will be adding profiler metrics to the summary
+        self.eval_logger.log_summary(summary, auto_summarize=False)

aiq/profiler/data_models.py ADDED Viewed

@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import BaseModel
+from aiq.profiler.inference_optimization.data_models import WorkflowRuntimeMetrics
+class ProfilerResults(BaseModel):
+    workflow_runtime_metrics: WorkflowRuntimeMetrics | None = None

aiq/profiler/inference_metrics_model.py CHANGED Viewed

@@ -23,3 +23,6 @@ class InferenceMetricsModel(BaseModel):
     ninetieth_interval: tuple[float, float] = Field(default=(0, 0), description="90% confidence interval")
     ninety_fifth_interval: tuple[float, float] = Field(default=(0, 0), description="95% confidence interval")
     ninety_ninth_interval: tuple[float, float] = Field(default=(0, 0), description="99% confidence interval")
+    p90: float = Field(default=0, description="90th percentile of the samples")
+    p95: float = Field(default=0, description="95th percentile of the samples")
+    p99: float = Field(default=0, description="99th percentile of the samples")

aiq/profiler/profile_runner.py CHANGED Viewed

@@ -25,6 +25,7 @@ from pydantic import BaseModel
 from aiq.data_models.evaluate import ProfilerConfig
 from aiq.data_models.intermediate_step import IntermediateStep
+from aiq.profiler.data_models import ProfilerResults
 from aiq.profiler.forecasting.model_trainer import ModelTrainer
 from aiq.profiler.inference_metrics_model import InferenceMetricsModel
 from aiq.profiler.utils import create_standardized_dataframe
@@ -80,7 +81,7 @@ class ProfilerRunner:
         # Ensure output directory
         os.makedirs(output_dir, exist_ok=True)
-    async def run(self, all_steps: list[list[IntermediateStep]]):
+    async def run(self, all_steps: list[list[IntermediateStep]]) -> ProfilerResults:
         """
         Main entrypoint: Works on Input DataFrame generated from eval to fit forecasting model,
         writes out combined requests JSON, then computes and saves additional metrics,
@@ -171,7 +172,7 @@ class ProfilerRunner:
             uniqueness = compute_inter_query_token_uniqueness_by_llm(all_steps)
             token_uniqueness_results = uniqueness
-        if self.profile_config.workflow_runtime_forecast:
+        if self.profile_config.workflow_runtime_forecast or self.profile_config.base_metrics:
             # ------------------------------------------------------------
             # Compute and save workflow runtime metrics
             # ------------------------------------------------------------
@@ -275,7 +276,7 @@ class ProfilerRunner:
                 logger.info("Fitted model for forecasting.")
             except Exception as e:
                 logger.exception("Fitting model failed. %s", e, exc_info=True)
-                return
+                return ProfilerResults()
             os.makedirs(self.output_dir, exist_ok=True)
@@ -285,6 +286,8 @@ class ProfilerRunner:
             logger.info("Saved fitted model to disk.")
+        return ProfilerResults(workflow_runtime_metrics=workflow_runtimes_results)
     # -------------------------------------------------------------------
     # Confidence Intervals / Metrics
     # -------------------------------------------------------------------
@@ -391,7 +394,8 @@ class ProfilerRunner:
     def _compute_confidence_intervals(self, data: list[float], metric_name: str) -> InferenceMetricsModel:
         """
-        Helper to compute 90, 95, 99% confidence intervals for the mean of a dataset.
+        Helper to compute 90, 95, 99 % confidence intervals **and** the empirical
+        90th/95th/99th percentiles (p90/p95/p99) for the mean of a dataset.
         Uses a z-score from the normal approximation for large samples.
         Returns a dict like::
@@ -409,11 +413,16 @@ class ProfilerRunner:
         n = len(data)
         mean_val = statistics.mean(data)
         if n <= 1:
-            return InferenceMetricsModel(n=n,
-                                         mean=mean_val,
-                                         ninetieth_interval=(mean_val, mean_val),
-                                         ninety_fifth_interval=(mean_val, mean_val),
-                                         ninety_ninth_interval=(mean_val, mean_val))
+            return InferenceMetricsModel(
+                n=n,
+                mean=mean_val,
+                ninetieth_interval=(mean_val, mean_val),
+                ninety_fifth_interval=(mean_val, mean_val),
+                ninety_ninth_interval=(mean_val, mean_val),
+                p90=mean_val,
+                p95=mean_val,
+                p99=mean_val,
+            )
         stdev_val = statistics.pstdev(data)  # population stdev or use stdev for sample
         # standard error
@@ -430,4 +439,32 @@ class ProfilerRunner:
         # Optionally, store more info
         intervals["n"] = n
         intervals["mean"] = mean_val
+        # ------------------------------------------------------------------
+        # Percentiles
+        # ------------------------------------------------------------------
+        sorted_data = sorted(data)
+        def _percentile(arr: list[float], pct: float) -> float:
+            """
+            Linear interpolation between closest ranks.
+            pct is given from 0‑100 (e.g. 90 for p90).
+            """
+            if not arr:
+                return 0.0
+            k = (len(arr) - 1) * (pct / 100.0)
+            f = math.floor(k)
+            c = math.ceil(k)
+            if f == c:
+                return arr[int(k)]
+            return arr[f] + (arr[c] - arr[f]) * (k - f)
+        p90_val = _percentile(sorted_data, 90)
+        p95_val = _percentile(sorted_data, 95)
+        p99_val = _percentile(sorted_data, 99)
+        intervals["p90"] = p90_val
+        intervals["p95"] = p95_val
+        intervals["p99"] = p99_val
         return InferenceMetricsModel(**intervals)

aiq/tool/mcp/mcp_client.py CHANGED Viewed

@@ -68,6 +68,16 @@ def model_from_mcp_schema(name: str, mcp_input_schema: dict) -> type[BaseModel]:
             else:
                 item_type = _type_map.get(item_properties.get("type", "string"), Any)
             field_type = list[item_type]
+        elif isinstance(json_type, list):
+            field_type = None
+            for t in json_type:
+                mapped = _type_map.get(t, Any)
+                field_type = mapped if field_type is None else field_type | mapped
+            return field_type, Field(
+                default=field_properties.get("default", None if "null" in json_type else ...),
+                description=field_properties.get("description", "")
+            )
         else:
             field_type = _type_map.get(json_type, Any)

aiq/tool/mcp/mcp_tool.py CHANGED Viewed

@@ -75,7 +75,8 @@ async def mcp_tool(config: MCPToolConfig, builder: Builder):  # pylint: disable=
                 return await tool.acall(args)
             _ = tool.input_schema.model_validate(kwargs)
-            return await tool.acall(kwargs)
+            filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
+            return await tool.acall(filtered_kwargs)
         except Exception as e:
             if config.return_exception:
                 if tool_input:

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiqtoolkit
-Version: 1.2.0a20250626
+Version: 1.2.0a20250628
 Summary: NVIDIA Agent Intelligence toolkit
 Author: NVIDIA Corporation
 Maintainer: NVIDIA Corporation

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/RECORD RENAMED Viewed

@@ -79,10 +79,10 @@ aiq/data_models/common.py,sha256=G63rUXvDAtK6p1SrRyH0VlHGqrDgCZVVjbnzgGSl2Ic,421
 aiq/data_models/component.py,sha256=x6jm1Fhn1k1hGu-5AjM0ywuyvs6ztaZfapD8bLUXSqc,1469
 aiq/data_models/component_ref.py,sha256=GyyIf4k80aUIn6LV9r84m5imbiVhpdaY7uKMMpYpbzU,3872
 aiq/data_models/config.py,sha256=ERLjZY0iqexZ-gSXsCSN1UqgNeiwkEjWdYJEdKqeYTY,14116
-aiq/data_models/dataset_handler.py,sha256=SifWhFHtxTMEjrXaXOYQgBOSKfWOzkc6OtOoPJ39pD4,3978
+aiq/data_models/dataset_handler.py,sha256=liMB3xRohkr4VTMmNWPvWi9qhbhlJQfQK36g5Rknweo,4027
 aiq/data_models/discovery_metadata.py,sha256=OcITQc5VeML4bTHurrsMNiK_oB3z7wudMxcyN7LI8pY,12785
 aiq/data_models/embedder.py,sha256=0v917IiohVA_7zdF7hoO_zQcmNe4hQEFhh4fxRiYBbk,940
-aiq/data_models/evaluate.py,sha256=tLL-AidxW6-VnEpIDYqGpvIdcNXnDee7Ooze9_bzXeY,4557
+aiq/data_models/evaluate.py,sha256=WBeABZsIa6W04MPj24SRu4s-ty2PkJ7_4SLojXmj5Pk,4704
 aiq/data_models/evaluator.py,sha256=bd2njsyQB2t6ClJ66gJiCjYHsQpWZwPD7rsU0J109TI,939
 aiq/data_models/front_end.py,sha256=z8k6lSWjt1vMOYFbjWQxodpwAqPeuGS0hRBjsriDW2s,932
 aiq/data_models/function.py,sha256=M_duXVXL5MvYe0WVLvqEgEzXs0UAYNSMfy9ZTpxuKPA,1013
@@ -93,7 +93,7 @@ aiq/data_models/invocation_node.py,sha256=nDRylgzBfJduGA-lme9xN4P6BdOYj0L6ytLHnT
 aiq/data_models/llm.py,sha256=McbDdUUtWfp9WCdMMJA2xh7mvlmyNdGDCH8P_7l2iKU,920
 aiq/data_models/logging.py,sha256=1QtVjIQ99PgMYUuzw4h1FAoPRteZY7uf3oFTqV3ONgA,940
 aiq/data_models/memory.py,sha256=RYwmE8I0PJ-h1GD-689abgt5DDi7JlWANeXpOsvWT9E,932
-aiq/data_models/profiler.py,sha256=99KBOnFDJWtmTUIscivk-hHYvbNax-QPe7mQwTCgu88,1750
+aiq/data_models/profiler.py,sha256=z3IlEhj-veB4Yz85271bTkScSUkVwK50tR3dwlDRgcE,1781
 aiq/data_models/registry_handler.py,sha256=g1rFaz4uSydMJn7qpdX-DNHJd_rNf8tXYN49dLDYHPo,968
 aiq/data_models/retriever.py,sha256=UOfss4sru5ku5E8YZYN5qz4MVbFi2VwvpNUPVp9hsnQ,1202
 aiq/data_models/step_adaptor.py,sha256=h7nVAwdgbuHd1e1-SR5jY9nkDMBDGqzTzrl-4lBQX7o,2615
@@ -107,15 +107,16 @@ aiq/embedder/openai_embedder.py,sha256=5FO3xsyNvEmbLBsZb3xsCpbN1Soxio4yf4b5gTPVx
 aiq/embedder/register.py,sha256=3MTZrfNQKp6AZTbfaA-PpTnyXiMyu-8HH9JnDCC0v9o,978
 aiq/eval/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
 aiq/eval/config.py,sha256=IlOr2o618kbkXP0G1F-AklZfsKYVos9UB4Dvlxf66xk,1431
-aiq/eval/evaluate.py,sha256=VdVdB_CV842gIV4diHciJ1qrof5_N3H8I16WwracCsQ,17940
+aiq/eval/evaluate.py,sha256=zv2AQfcf-aaQO0Tx7VV5Qc7KZ6DMniKPjXG-BUrtlMA,20983
 aiq/eval/intermediate_step_adapter.py,sha256=4cSsGgFBvNjXnclk5FvZnQaFEdeulp7VEdRWKLcREAQ,4498
 aiq/eval/register.py,sha256=QOHJqA2CQixeWMC9InyKbzXo1jByvrntD_m9-2Mvg9k,1076
 aiq/eval/remote_workflow.py,sha256=Fb7Z6gdP2L_gqyWB--AEWfcXe9xPpQ_hPsf9lmqGXjI,5524
 aiq/eval/runtime_event_subscriber.py,sha256=2VM8MqmPc_EWPxxrDDR9naiioZirkJUfGwzbXQqbdZA,1906
+aiq/eval/usage_stats.py,sha256=izIIoHElo3mvysq_Z3hw9YPcxhR6G_zaIF4CzyPdJR4,1135
 aiq/eval/dataset_handler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/eval/dataset_handler/dataset_downloader.py,sha256=Zvfbd-fPOhB9n8ZiCBaBKW0y-5v97mQAy3dkBL0OFZ0,4553
 aiq/eval/dataset_handler/dataset_filter.py,sha256=mop6wa4P_QtQ5QkfXv-hVBm3EMerfNECSTJGGDB1YWE,2115
-aiq/eval/dataset_handler/dataset_handler.py,sha256=z4trKYPnqSrLvsKctU9d5WrQW7ddbZZx0zOrYVLqbAA,7847
+aiq/eval/dataset_handler/dataset_handler.py,sha256=sJhjZrasAZiDI_B2GM3czb6HTY0xSfqHV8386jmjCjI,8194
 aiq/eval/evaluator/__init__.py,sha256=GUJrgGtpvyMUCjUBvR3faAdv-tZzbU9W-izgx9aMEQg,680
 aiq/eval/evaluator/base_evaluator.py,sha256=5kqOcTYNecnh9us_XvV58pj5tZI82NGkVN4tg9-R_ZE,3040
 aiq/eval/evaluator/evaluator_model.py,sha256=5cxe3mqznlNGzv29v_VseYU7OzoT1eTf7hgSPQxytsM,1440
@@ -132,9 +133,9 @@ aiq/eval/tunable_rag_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 aiq/eval/tunable_rag_evaluator/evaluate.py,sha256=f4jfn9VVLmkOg631TQr2wy7hPwGMJMsQa4kmXsu0-Uc,13069
 aiq/eval/tunable_rag_evaluator/register.py,sha256=q4p2rFyMzWmaINJc961ZV4jzIlAN4GfWsoImHo0ovsY,2558
 aiq/eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-aiq/eval/utils/output_uploader.py,sha256=SaQbZPkw-Q0H7t5yG60Kh-p1cflR7gPklVkilC4uPbU,5141
+aiq/eval/utils/output_uploader.py,sha256=lkV63Jr97YuG1vr04uOZDvs9e1pGP4FbJykRxS2d7a4,5579
 aiq/eval/utils/tqdm_position_registry.py,sha256=9CtpCk1wtYCSyieHPaSp8nlZu6EcNUOaUz2RTqfekrA,1286
-aiq/eval/utils/weave_eval.py,sha256=yIdlp4UdCPgwFYJNJon5eZD1d99E-6dcmfVg6B-4RKE,5076
+aiq/eval/utils/weave_eval.py,sha256=l9NTkgLTb30wBnfiHI_yefPFVNyIBrNdbPNq2o58HO4,7088
 aiq/front_ends/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
 aiq/front_ends/register.py,sha256=OKv1xi-g8WHtUMuIPhwjG6wOYqaGDD-Q9vDtKtT9d1Y,889
 aiq/front_ends/console/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
@@ -181,9 +182,10 @@ aiq/observability/register.py,sha256=mejMBVr3dHHfShIiyn1fIbA0Gb6z9Ayg8WRMgB0wf5E
 aiq/plugins/.namespace,sha256=Gace0pOC3ETEJf-TBVuNw0TQV6J_KtOPpEiSzMH-odo,215
 aiq/profiler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/profiler/data_frame_row.py,sha256=vudqk1ZzZtlZln2Ir43mPl3nwNc0pQlhwbtdY9oSKtI,1755
-aiq/profiler/inference_metrics_model.py,sha256=e_M0ApsyDgPMrOIOnm1beHtNeHKwOh5CAxu-OiJaEzQ,1241
+aiq/profiler/data_models.py,sha256=I8k1zjg8KaLjjpc1SxMgaEC2h_jW2bv1cENl1BsTcG8,899
+aiq/profiler/inference_metrics_model.py,sha256=Thz3OHBDzGrpPYaOm8m8_pNeEA_q0yDlUUDHFkQ3U90,1481
 aiq/profiler/intermediate_property_adapter.py,sha256=XZ_A8f2S5M-EJSkErY6I750Y8HAZPdXsr6Cpb1wXlNM,3537
-aiq/profiler/profile_runner.py,sha256=ltADgYhZvcsYtgYahFXW6FtTLm9DSepJUE2U2w0ZU-A,20855
+aiq/profiler/profile_runner.py,sha256=Xyh0wl2aeRJtRBzvvkMYkFvqUptB7XUfYJ7jdbBCPuE,22102
 aiq/profiler/utils.py,sha256=hNh_JfxXDrACIp4usXtlriTfVuYUkk3Pv-x74K34MQg,8180
 aiq/profiler/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/profiler/callbacks/agno_callback_handler.py,sha256=aDAUY6GDIUtly6KowXXKUqLc7NbE6khg1aXT1AritaA,14930
@@ -279,8 +281,8 @@ aiq/tool/github_tools/get_github_issue.py,sha256=vwLNkNOszLlymkQju0cR8BNvfdH4Enm
 aiq/tool/github_tools/get_github_pr.py,sha256=b7eCOqrVoejGjRwmUVdU45uF07ihbY8lRacMYOSgMrY,9716
 aiq/tool/github_tools/update_github_issue.py,sha256=TUElxUuzjZr_QldL_48RcqSx0A9b23NB_lA82QwFjkM,4103
 aiq/tool/mcp/__init__.py,sha256=GUJrgGtpvyMUCjUBvR3faAdv-tZzbU9W-izgx9aMEQg,680
-aiq/tool/mcp/mcp_client.py,sha256=lYbf669ATqGKkL0jjd76r0aAtAFnWeruWw-lOPsmYu8,8103
-aiq/tool/mcp/mcp_tool.py,sha256=rQQcaCT-GHQcDmG5weX-2Y-HxBPX-0cC73LjL1u0FUU,4009
+aiq/tool/mcp/mcp_client.py,sha256=bTZGh_Y3mRJA9BGbbmfVNRddTIcK251jKlPL7kAjFK0,8553
+aiq/tool/mcp/mcp_tool.py,sha256=0L2Zj1CBwrvv5P9A8-lj_Ao_oBaC6aYRJXw9q5Et4uo,4099
 aiq/tool/memory_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/tool/memory_tools/add_memory_tool.py,sha256=9EjB3DpYhxwasz7o3O8Rq__Ys5986fciv44ahC6mVCo,3349
 aiq/tool/memory_tools/delete_memory_tool.py,sha256=wdB_I8y-1D1OpNtBi6ZOg36vvNkbaxp-yvdqFMc2Suk,2532
@@ -310,10 +312,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
 aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
 aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
-aiqtoolkit-1.2.0a20250626.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
-aiqtoolkit-1.2.0a20250626.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-aiqtoolkit-1.2.0a20250626.dist-info/METADATA,sha256=37IlijO2OTc7Oi5tW3vdnnI9OjBFUM5xzjwKd2RBYtU,20274
-aiqtoolkit-1.2.0a20250626.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-aiqtoolkit-1.2.0a20250626.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
-aiqtoolkit-1.2.0a20250626.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
-aiqtoolkit-1.2.0a20250626.dist-info/RECORD,,
+aiqtoolkit-1.2.0a20250628.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
+aiqtoolkit-1.2.0a20250628.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+aiqtoolkit-1.2.0a20250628.dist-info/METADATA,sha256=kGslYo0xYh5ERzp0dvetOiCsiTWFe__dUyWxvxWkIiM,20274
+aiqtoolkit-1.2.0a20250628.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+aiqtoolkit-1.2.0a20250628.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
+aiqtoolkit-1.2.0a20250628.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
+aiqtoolkit-1.2.0a20250628.dist-info/RECORD,,

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/licenses/LICENSE-3rd-party.txt RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250626.dist-info → aiqtoolkit-1.2.0a20250628.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiqtoolkit 1.2.0a20250626__py3-none-any.whl → 1.2.0a20250628__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0a20250626py3-none-any.whl → 1.2.0a20250628py3-none-any.whl