PyPI - aiqtoolkit - Versions diffs - 1.2.0a20250616__py3-none-any.whl → 1.2.0a20250617__py3-none-any.whl - Mend

aiqtoolkit 1.2.0a20250616py3-none-any.whl → 1.2.0a20250617py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (13) hide show

aiq/builder/function.py CHANGED Viewed

@@ -48,7 +48,8 @@ class Function(FunctionBase[InputT, StreamingOutputT, SingleOutputT], ABC):
                  input_schema: type[BaseModel] | None = None,
                  streaming_output_schema: type[BaseModel] | type[None] | None = None,
                  single_output_schema: type[BaseModel] | type[None] | None = None,
-                 converters: list[Callable[[typing.Any], typing.Any]] | None = None):
+                 converters: list[Callable[[typing.Any], typing.Any]] | None = None,
+                 instance_name: str | None = None):
         super().__init__(input_schema=input_schema,
                          streaming_output_schema=streaming_output_schema,
@@ -57,6 +58,7 @@ class Function(FunctionBase[InputT, StreamingOutputT, SingleOutputT], ABC):
         self.config = config
         self.description = description
+        self.instance_name = instance_name or config.type
         self._context = AIQContext.get()
     def convert(self, value: typing.Any, to_type: type[_T]) -> _T:
@@ -110,7 +112,7 @@ class Function(FunctionBase[InputT, StreamingOutputT, SingleOutputT], ABC):
             The output of the function optionally converted to the specified type.
         """
-        with self._context.push_active_function(self.config.type,
+        with self._context.push_active_function(self.instance_name,
                                                 input_data=value) as manager:  # Set the current invocation context
             try:
                 converted_input: InputT = self._convert_input(value)  # type: ignore
@@ -254,17 +256,17 @@ class Function(FunctionBase[InputT, StreamingOutputT, SingleOutputT], ABC):
 class LambdaFunction(Function[InputT, StreamingOutputT, SingleOutputT]):
-    def __init__(self, *, config: FunctionBaseConfig, info: FunctionInfo):
+    def __init__(self, *, config: FunctionBaseConfig, info: FunctionInfo, instance_name: str | None = None):
         super().__init__(config=config,
                          description=info.description,
                          input_schema=info.input_schema,
                          streaming_output_schema=info.stream_output_schema,
                          single_output_schema=info.single_output_schema,
-                         converters=info.converters)
+                         converters=info.converters,
+                         instance_name=instance_name)
         self._info = info
         self._ainvoke_fn: _InvokeFnT = info.single_fn
         self._astream_fn: _StreamFnT = info.stream_fn
@@ -284,8 +286,10 @@ class LambdaFunction(Function[InputT, StreamingOutputT, SingleOutputT]):
             yield x
     @staticmethod
-    def from_info(*, config: FunctionBaseConfig,
-                  info: FunctionInfo) -> 'LambdaFunction[InputT, StreamingOutputT, SingleOutputT]':
+    def from_info(*,
+                  config: FunctionBaseConfig,
+                  info: FunctionInfo,
+                  instance_name: str | None = None) -> 'LambdaFunction[InputT, StreamingOutputT, SingleOutputT]':
         input_type: type = info.input_type
         streaming_output_type = info.stream_output_type
@@ -294,4 +298,4 @@ class LambdaFunction(Function[InputT, StreamingOutputT, SingleOutputT]):
         class FunctionImpl(LambdaFunction[input_type, streaming_output_type, single_output_type]):
             pass
-        return FunctionImpl(config=config, info=info)
+        return FunctionImpl(config=config, info=info, instance_name=instance_name)

aiq/builder/workflow_builder.py CHANGED Viewed

@@ -333,7 +333,7 @@ class WorkflowBuilder(Builder, AbstractAsyncContextManager):
         if (isinstance(build_result, FunctionInfo)):
             # Create the function object
-            build_result = LambdaFunction.from_info(config=config, info=build_result)
+            build_result = LambdaFunction.from_info(config=config, info=build_result, instance_name=name)
         if (not isinstance(build_result, Function)):
             raise ValueError("Expected a function, FunctionInfo object, or FunctionBase object to be "

aiq/data_models/evaluate.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 import typing
+from enum import Enum
 from pathlib import Path
 from pydantic import BaseModel
@@ -28,6 +29,12 @@ from aiq.data_models.intermediate_step import IntermediateStepType
 from aiq.data_models.profiler import ProfilerConfig
+class JobEvictionPolicy(str, Enum):
+    """Policy for evicting old jobs when max_jobs is exceeded."""
+    TIME_CREATED = "time_created"
+    TIME_MODIFIED = "time_modified"
 class EvalCustomScriptConfig(BaseModel):
     # Path to the script to run
     script: Path
@@ -35,6 +42,16 @@ class EvalCustomScriptConfig(BaseModel):
     kwargs: dict[str, str] = {}
+class JobManagementConfig(BaseModel):
+    # Whether to append a unique job ID to the output directory for each run
+    append_job_id_to_output_dir: bool = False
+    # Maximum number of jobs to keep in the output directory. Oldest jobs will be evicted.
+    # A value of 0 means no limit.
+    max_jobs: int = 0
+    # Policy for evicting old jobs. Defaults to using time_created.
+    eviction_policy: JobEvictionPolicy = JobEvictionPolicy.TIME_CREATED
 class EvalOutputConfig(BaseModel):
     # Output directory for the workflow and evaluation results
     dir: Path = Path("/tmp/aiq/examples/default/")
@@ -46,6 +63,8 @@ class EvalOutputConfig(BaseModel):
     s3: EvalS3Config | None = None
     # Whether to cleanup the output directory before running the workflow
     cleanup: bool = True
+    # Job management configuration (job id, eviction, etc.)
+    job_management: JobManagementConfig = JobManagementConfig()
     # Filter for the workflow output steps
     workflow_output_step_filter: list[IntermediateStepType] | None = None

aiq/eval/evaluate.py CHANGED Viewed

@@ -18,11 +18,13 @@ import logging
 import shutil
 from pathlib import Path
 from typing import Any
+from uuid import uuid4
 from pydantic import BaseModel
 from tqdm import tqdm
 from aiq.data_models.evaluate import EvalConfig
+from aiq.data_models.evaluate import JobEvictionPolicy
 from aiq.eval.config import EvaluationRunConfig
 from aiq.eval.config import EvaluationRunOutput
 from aiq.eval.dataset_handler.dataset_handler import DatasetHandler
@@ -178,10 +180,60 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
     def cleanup_output_directory(self):
         '''Remove contents of the output directory if it exists'''
-        if self.eval_config.general.output and self.eval_config.general.output.dir and \
-                self.eval_config.general.output.dir.exists():
-            logger.info("Cleaning up output directory %s", self.eval_config.general.output.dir)
-            shutil.rmtree(self.eval_config.general.output.dir)
+        output_config = self.eval_config.general.output
+        output_dir = output_config.dir
+        if not (output_config and output_dir.exists()):
+            return
+        # If cleanup is true, remove the entire directory and we are done
+        if output_config.cleanup:
+            logger.info("Cleaning up entire output directory: %s", output_config.dir)
+            shutil.rmtree(output_config.dir)
+            return
+        if output_config.job_management.max_jobs == 0:
+            # No eviction policy
+            return
+        base_dir = output_dir / "jobs"
+        if not base_dir.exists():
+            return
+        # Get all subdirectories, which represent individual job runs
+        job_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
+        if len(job_dirs) <= output_config.job_management.max_jobs:
+            return
+        # Determine sort key based on eviction_policy, defaulting to creation time
+        if output_config.job_management.eviction_policy == JobEvictionPolicy.TIME_MODIFIED:
+            def sort_key(x):
+                return x.stat().st_mtime
+            logger.info("Using last modified time for job eviction policy.")
+        else:
+            def sort_key(x):
+                return x.stat().st_ctime
+            logger.info("Using creation time for job eviction policy.")
+        # Sort directories (oldest first)
+        job_dirs.sort(key=sort_key)
+        num_to_delete = len(job_dirs) - output_config.job_management.max_jobs
+        logger.info("Found %d jobs, exceeding limit of %d. Removing %d oldest jobs.",
+                    len(job_dirs),
+                    output_config.job_management.max_jobs,
+                    num_to_delete)
+        for dir_to_delete in job_dirs[:num_to_delete]:
+            try:
+                logger.info("Deleting old job directory: %s", dir_to_delete)
+                shutil.rmtree(dir_to_delete)
+            except Exception as e:
+                logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e, exc_info=True)
     def write_output(self, dataset_handler: DatasetHandler):
         workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
@@ -272,9 +324,15 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         logger.debug("Loaded evaluation configuration: %s", self.eval_config)
         # Cleanup the output directory
-        if self.eval_config.general.output and self.eval_config.general.output.cleanup:
+        if self.eval_config.general.output:
             self.cleanup_output_directory()
+        # Generate a job_id if append_job_id_to_output_dir is enabled and no job_id provided
+        if (self.eval_config.general.output
+                and self.eval_config.general.output.job_management.append_job_id_to_output_dir and not job_id):
+            job_id = "job_" + str(uuid4())
+            logger.info("Generated job ID for output directory: %s", job_id)
         # If a job id is provided keep the data per-job
         if job_id:
             self.eval_config.general.output_dir = self.eval_config.general.output_dir / f"jobs/{job_id}"

aiq/eval/tunable_rag_evaluator/evaluate.py CHANGED Viewed

@@ -25,11 +25,9 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.runnables import RunnableLambda
 from tqdm import tqdm
-from aiq.eval.evaluator.evaluator_model import EvalInput
+from aiq.eval.evaluator.base_evaluator import BaseEvaluator
 from aiq.eval.evaluator.evaluator_model import EvalInputItem
-from aiq.eval.evaluator.evaluator_model import EvalOutput
 from aiq.eval.evaluator.evaluator_model import EvalOutputItem
-from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
 logger = logging.getLogger(__name__)
@@ -96,7 +94,7 @@ def runnable_with_retries(original_fn: Callable, llm_retry_control_params: dict
     )
-class TunableRagEvaluator:
+class TunableRagEvaluator(BaseEvaluator):
     '''Tunable RAG evaluator class with customizable LLM prompt for scoring.'''
     def __init__(self,
@@ -106,187 +104,142 @@ class TunableRagEvaluator:
                  max_concurrency: int,
                  default_scoring: bool,
                  default_score_weights: dict):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating RAG")
         self.llm = llm
-        self.max_concurrency = max_concurrency
         self.judge_llm_prompt = judge_llm_prompt
         self.llm_retry_control_params = llm_retry_control_params
-        self.semaphore = asyncio.Semaphore(self.max_concurrency)
         self.default_scoring = default_scoring
         # Use user-provided weights if available; otherwise, set equal weights for each score
         self.default_score_weights = default_score_weights if default_score_weights else {
             "coverage": 1 / 3, "correctness": 1 / 3, "relevance": 1 / 3
         }
-    async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
-        '''Evaluate function'''
-        async def process_item(item):
-            """Compute RAG evaluation for an individual item"""
-            question = item.input_obj
-            answer_description = item.expected_output_obj
-            generated_answer = item.output_obj
-            # Call judge LLM to generate score
-            score = 0.0
-            default_evaluation_schema = [
-                ResponseSchema(
-                    name="coverage_score",
-                    description=
-                    "Score for the coverage of all critical aspects mentioned in the expected answer. Ex. 0.5",
-                    type="float"),
-                ResponseSchema(
-                    name="correctness_score",
-                    description=
-                    "Score for the accuracy of the generated answer compared to the expected answer. Ex. 0.5",
-                    type="float"),
-                ResponseSchema(name="relevance_score",
-                               description="Score for the relevance of the generated answer to the question. Ex. 0.5",
-                               type="float"),
-                ResponseSchema(
-                    name="reasoning",
-                    description=
-                    "1-2 summarized sentences of reasoning for the scores. Ex. 'The generated answer covers all critical aspects mentioned in the expected answer, is correct, and is relevant to the question.'",
-                    type="string"),
-            ]
-            custom_evaluation_schema = [
-                ResponseSchema(name="score", description="Score for the generated answer. Ex. 0.5", type="float"),
-                ResponseSchema(
-                    name="reasoning",
-                    description=
-                    "1-2 sentence reasoning for the score. Ex. 'The generated answer is exactly the same as the description of the expected answer.'",
-                    type="string"),
-            ]
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
+        """Compute RAG evaluation for an individual item and return EvalOutputItem"""
+        question = item.input_obj
+        answer_description = item.expected_output_obj
+        generated_answer = item.output_obj
+        # Call judge LLM to generate score
+        score = 0.0
+        default_evaluation_schema = [
+            ResponseSchema(
+                name="coverage_score",
+                description="Score for the coverage of all critical aspects mentioned in the expected answer. Ex. 0.5",
+                type="float"),
+            ResponseSchema(
+                name="correctness_score",
+                description="Score for the accuracy of the generated answer compared to the expected answer. Ex. 0.5",
+                type="float"),
+            ResponseSchema(name="relevance_score",
+                           description="Score for the relevance of the generated answer to the question. Ex. 0.5",
+                           type="float"),
+            ResponseSchema(
+                name="reasoning",
+                description=
+                "1-2 summarized sentences of reasoning for the scores. Ex. 'The generated answer covers all critical aspects mentioned in the expected answer, is correct, and is relevant to the question.'",
+                type="string"),
+        ]
-            if self.default_scoring:
-                evaluation_schema = default_evaluation_schema
-            else:
-                evaluation_schema = custom_evaluation_schema
+        custom_evaluation_schema = [
+            ResponseSchema(name="score", description="Score for the generated answer. Ex. 0.5", type="float"),
+            ResponseSchema(
+                name="reasoning",
+                description=
+                "1-2 sentence reasoning for the score. Ex. 'The generated answer is exactly the same as the description of the expected answer.'",
+                type="string"),
+        ]
-            llm_input_response_parser = StructuredOutputParser.from_response_schemas(evaluation_schema)
-            format_instructions = llm_input_response_parser.get_format_instructions()
+        if self.default_scoring:
+            evaluation_schema = default_evaluation_schema
+        else:
+            evaluation_schema = custom_evaluation_schema
-            eval_prompt = evaluation_prompt(judge_llm_prompt=self.judge_llm_prompt,
-                                            question=question,
-                                            answer_description=answer_description,
-                                            generated_answer=generated_answer,
-                                            format_instructions=format_instructions,
-                                            default_scoring=self.default_scoring)
+        llm_input_response_parser = StructuredOutputParser.from_response_schemas(evaluation_schema)
+        format_instructions = llm_input_response_parser.get_format_instructions()
-            messages = [
-                SystemMessage(content="You must respond only in JSON format."), HumanMessage(content=eval_prompt)
-            ]
+        eval_prompt = evaluation_prompt(judge_llm_prompt=self.judge_llm_prompt,
+                                        question=question,
+                                        answer_description=answer_description,
+                                        generated_answer=generated_answer,
+                                        format_instructions=format_instructions,
+                                        default_scoring=self.default_scoring)
-            response = await runnable_with_retries(self.llm.ainvoke, self.llm_retry_control_params).ainvoke(messages)
+        messages = [SystemMessage(content="You must respond only in JSON format."), HumanMessage(content=eval_prompt)]
-            # Initialize default values to handle service errors
-            coverage_score = 0.0
-            correctness_score = 0.0
-            relevance_score = 0.0
-            reasoning = "Error in evaluator from parsing judge LLM response."
+        response = await runnable_with_retries(self.llm.ainvoke, self.llm_retry_control_params).ainvoke(messages)
-            try:
-                parsed_response = llm_input_response_parser.parse(response.content)
-                if self.default_scoring:
-                    try:
-                        coverage_score = parsed_response["coverage_score"]
-                        correctness_score = parsed_response["correctness_score"]
-                        relevance_score = parsed_response["relevance_score"]
-                        reasoning = parsed_response["reasoning"]
-                    except KeyError as e:
-                        logger.error("Missing required keys in default scoring response: %s",
-                                     ", ".join(str(arg) for arg in e.args))
-                        reasoning = f"Error in evaluator from parsing judge LLM response. Missing required key(s): {', '.join(str(arg) for arg in e.args)}"
-                    coverage_weight = self.default_score_weights.get("coverage", 1 / 3)
-                    correctness_weight = self.default_score_weights.get("correctness", 1 / 3)
-                    relevance_weight = self.default_score_weights.get("relevance", 1 / 3)
-                    # Calculate score
-                    total_weight = coverage_weight + correctness_weight + relevance_weight
-                    coverage_weight = coverage_weight / total_weight
-                    correctness_weight = correctness_weight / total_weight
-                    relevance_weight = relevance_weight / total_weight
-                    if round(coverage_weight + correctness_weight + relevance_weight, 2) != 1:
-                        logger.warning("The sum of the default score weights is not 1. The weights will be normalized.")
-                        coverage_weight = coverage_weight / (coverage_weight + correctness_weight + relevance_weight)
-                        correctness_weight = correctness_weight / (coverage_weight + correctness_weight +
-                                                                   relevance_weight)
-                        relevance_weight = relevance_weight / (coverage_weight + correctness_weight + relevance_weight)
-                    score = (coverage_weight * coverage_score + correctness_weight * correctness_score +
-                             relevance_weight * relevance_score)
-                else:
-                    try:
-                        score = parsed_response["score"]
-                        reasoning = parsed_response["reasoning"]
-                    except KeyError as e:
-                        logger.error("Missing required keys in custom scoring response: %s",
-                                     ", ".join(str(arg) for arg in e.args))
-                        reasoning = f"Error in evaluator from parsing judge LLM response. Missing required key(s): {', '.join(str(arg) for arg in e.args)}"
-                        raise
-            except (KeyError, ValueError) as e:
-                logger.error("Error parsing judge LLM response: %s", e)
-                score = 0.0
-                reasoning = "Error in evaluator from parsing judge LLM response."
+        # Initialize default values to handle service errors
+        coverage_score = 0.0
+        correctness_score = 0.0
+        relevance_score = 0.0
+        reasoning = "Error in evaluator from parsing judge LLM response."
+        try:
+            parsed_response = llm_input_response_parser.parse(response.content)
             if self.default_scoring:
-                reasoning = {
-                    "question": question,
-                    "answer_description": answer_description,
-                    "generated_answer": generated_answer,
-                    "score_breakdown": {
-                        "coverage_score": coverage_score,
-                        "correctness_score": correctness_score,
-                        "relevance_score": relevance_score,
-                    },
-                    "reasoning": reasoning,
-                }
-            else:
-                reasoning = {
-                    "question": question,
-                    "answer_description": answer_description,
-                    "generated_answer": generated_answer,
-                    "reasoning": reasoning
-                }
-            return score, reasoning
-        async def wrapped_process(item: EvalInputItem) -> tuple[float, dict]:
-            """
-            Process an item asynchronously and update the progress bar.
-            Use the semaphore to limit the number of concurrent items.
-            """
-            async with self.semaphore:
-                result = await process_item(item)
-                # Update the progress bar
-                pbar.update(1)
-                return result
+                try:
+                    coverage_score = parsed_response["coverage_score"]
+                    correctness_score = parsed_response["correctness_score"]
+                    relevance_score = parsed_response["relevance_score"]
+                    reasoning = parsed_response["reasoning"]
+                except KeyError as e:
+                    logger.error("Missing required keys in default scoring response: %s",
+                                 ", ".join(str(arg) for arg in e.args))
+                    reasoning = f"Error in evaluator from parsing judge LLM response. Missing required key(s): {', '.join(str(arg) for arg in e.args)}"
+                coverage_weight = self.default_score_weights.get("coverage", 1 / 3)
+                correctness_weight = self.default_score_weights.get("correctness", 1 / 3)
+                relevance_weight = self.default_score_weights.get("relevance", 1 / 3)
+                # Calculate score
+                total_weight = coverage_weight + correctness_weight + relevance_weight
+                coverage_weight = coverage_weight / total_weight
+                correctness_weight = correctness_weight / total_weight
+                relevance_weight = relevance_weight / total_weight
+                if round(coverage_weight + correctness_weight + relevance_weight, 2) != 1:
+                    logger.warning("The sum of the default score weights is not 1. The weights will be normalized.")
+                    coverage_weight = coverage_weight / (coverage_weight + correctness_weight + relevance_weight)
+                    correctness_weight = correctness_weight / (coverage_weight + correctness_weight + relevance_weight)
+                    relevance_weight = relevance_weight / (coverage_weight + correctness_weight + relevance_weight)
+                score = (coverage_weight * coverage_score + correctness_weight * correctness_score +
+                         relevance_weight * relevance_score)
-        try:
-            # Claim a tqdm position to display the progress bar
-            tqdm_position = TqdmPositionRegistry.claim()
-            # Create a progress bar
-            pbar = tqdm(total=len(eval_input.eval_input_items), desc="Evaluating RAG", position=tqdm_position)
-            # Process items concurrently with a limit on concurrency
-            results = await asyncio.gather(*[wrapped_process(item) for item in eval_input.eval_input_items])
-        finally:
-            pbar.close()
-            TqdmPositionRegistry.release(tqdm_position)
-        # Extract scores and reasonings
-        sample_scores, sample_reasonings = zip(*results) if results else ([], [])
-        # Compute average score
-        avg_score = round(sum(sample_scores) / len(sample_scores), 2) if sample_scores else 0.0
-        # Construct EvalOutputItems
-        eval_output_items = [
-            EvalOutputItem(id=item.id, score=score, reasoning=reasoning)
-            for item, score, reasoning in zip(eval_input.eval_input_items, sample_scores, sample_reasonings)
-        ]
+            else:
+                try:
+                    score = parsed_response["score"]
+                    reasoning = parsed_response["reasoning"]
+                except KeyError as e:
+                    logger.error("Missing required keys in custom scoring response: %s",
+                                 ", ".join(str(arg) for arg in e.args))
+                    reasoning = f"Error in evaluator from parsing judge LLM response. Missing required key(s): {', '.join(str(arg) for arg in e.args)}"
+                    raise
+        except (KeyError, ValueError) as e:
+            logger.error("Error parsing judge LLM response: %s", e)
+            score = 0.0
+            reasoning = "Error in evaluator from parsing judge LLM response."
-        return EvalOutput(average_score=avg_score, eval_output_items=eval_output_items)
+        if self.default_scoring:
+            reasoning = {
+                "question": question,
+                "answer_description": answer_description,
+                "generated_answer": generated_answer,
+                "score_breakdown": {
+                    "coverage_score": coverage_score,
+                    "correctness_score": correctness_score,
+                    "relevance_score": relevance_score,
+                },
+                "reasoning": reasoning,
+            }
+        else:
+            reasoning = {
+                "question": question,
+                "answer_description": answer_description,
+                "generated_answer": generated_answer,
+                "reasoning": reasoning
+            }
+        return EvalOutputItem(id=item.id, score=score, reasoning=reasoning)

aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py CHANGED Viewed

@@ -69,13 +69,20 @@ def build_call_tree_for_example(example_df: pd.DataFrame) -> list[CallNode]:
             return "LLM"
         if evt.startswith("TOOL_"):
             return "TOOL"
+        if evt.startswith("FUNCTION_"):
+            return "FUNCTION"
+        if evt.startswith("SPAN_"):
+            return "FUNCTION"
         return None
     def get_op_name(row: pd.Series, op_type: str) -> str:
         if op_type == "LLM":
             return row.get("llm_name") or "unknown_llm"
+        if op_type == "FUNCTION":
+            return row.get("function_name") or "unknown_function"
         if op_type == "TOOL":
             return row.get("tool_name") or "unknown_tool"
         return "unknown_op"
     for _, row in example_df.iterrows():
@@ -309,6 +316,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
     color_map = {
         "LLM": "tab:blue",
         "TOOL": "tab:green",
+        "FUNCTION": "tab:orange",
     }
     default_color = "tab:gray"

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiqtoolkit
-Version: 1.2.0a20250616
+Version: 1.2.0a20250617
 Summary: NVIDIA Agent Intelligence toolkit
 Author: NVIDIA Corporation
 Maintainer: NVIDIA Corporation

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/RECORD RENAMED Viewed

@@ -25,7 +25,7 @@ aiq/builder/eval_builder.py,sha256=UnNgtQiDAUfT3yuwjZQVerenI09-4q0Cse9uwLjk3Fg,4
 aiq/builder/evaluator.py,sha256=O6Gu0cUwQkrPxPX29Vf_-RopgijxPnhy7mhg_j-9A84,1162
 aiq/builder/framework_enum.py,sha256=eYwHQifZ86dx-OTubVA3qhCLRqhB4ElMBYBGA0gYtic,885
 aiq/builder/front_end.py,sha256=Xhvfi4VcDh5EoCtLr6AlLQfbRm8_TyugUc_IRfirN6Y,2225
-aiq/builder/function.py,sha256=Sh4LKgC-gipsMkNexUY4mw-Br4dWZxq6AHv-als0-e0,11430
+aiq/builder/function.py,sha256=74mZuDemcgpuRAKfn_aSOz1Pqjem3x_9nR0Oh61Tai8,11727
 aiq/builder/function_base.py,sha256=AF5a56y-Nw9OpWsP8IFukUKM2FtP8758qYQW6EfObO0,13109
 aiq/builder/function_info.py,sha256=pGPIAL0tjVqLOJymIRB0boI9pzJGdXiPK3KiZvXQsqM,25266
 aiq/builder/intermediate_step_manager.py,sha256=aKjOK7Gk9XbKhKvRMQTylRGDFZJU7rwqSuiZYaPfwjA,7830
@@ -33,7 +33,7 @@ aiq/builder/llm.py,sha256=DcoYCyschsRjkW_yGsa_Ci7ELSpk5KRbi9778Dm_B9c,951
 aiq/builder/retriever.py,sha256=GM7L1T4NdNZKerFZiCfLcQOwsGoX0NRlF8my7SMq3l4,970
 aiq/builder/user_interaction_manager.py,sha256=OXr-RxWf1sEZjzQH_jt0nmqrLBtYLHGEZEcfDYYFV88,2913
 aiq/builder/workflow.py,sha256=UOjrXmu1sxWTxTjygszqYgK0gK65r_beLKUwOpxtXuc,5894
-aiq/builder/workflow_builder.py,sha256=w5XBzACKp4yQu8aUYTiO1xVX78uyfDQnuquqgRT30Hc,30392
+aiq/builder/workflow_builder.py,sha256=0bRcNdjMCIfY60oaMu4hvH-mldIcxEDkm_lKRCBywnU,30412
 aiq/cli/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
 aiq/cli/entrypoint.py,sha256=BJsCZgC2nVyAWj7tBXwW67OIteg833xAI54R-e9O8oc,4757
 aiq/cli/main.py,sha256=yVTX5-5-21OOfG8qAdcK3M1fCQUxdr3G37Mb5OldPQc,1772
@@ -82,7 +82,7 @@ aiq/data_models/config.py,sha256=ERLjZY0iqexZ-gSXsCSN1UqgNeiwkEjWdYJEdKqeYTY,141
 aiq/data_models/dataset_handler.py,sha256=SifWhFHtxTMEjrXaXOYQgBOSKfWOzkc6OtOoPJ39pD4,3978
 aiq/data_models/discovery_metadata.py,sha256=OcITQc5VeML4bTHurrsMNiK_oB3z7wudMxcyN7LI8pY,12785
 aiq/data_models/embedder.py,sha256=0v917IiohVA_7zdF7hoO_zQcmNe4hQEFhh4fxRiYBbk,940
-aiq/data_models/evaluate.py,sha256=Llu9_H840nbV_nAimcFQaTeK6oLRmGac9UKsoaLlL58,3786
+aiq/data_models/evaluate.py,sha256=tLL-AidxW6-VnEpIDYqGpvIdcNXnDee7Ooze9_bzXeY,4557
 aiq/data_models/evaluator.py,sha256=bd2njsyQB2t6ClJ66gJiCjYHsQpWZwPD7rsU0J109TI,939
 aiq/data_models/front_end.py,sha256=z8k6lSWjt1vMOYFbjWQxodpwAqPeuGS0hRBjsriDW2s,932
 aiq/data_models/function.py,sha256=M_duXVXL5MvYe0WVLvqEgEzXs0UAYNSMfy9ZTpxuKPA,1013
@@ -107,7 +107,7 @@ aiq/embedder/openai_embedder.py,sha256=5FO3xsyNvEmbLBsZb3xsCpbN1Soxio4yf4b5gTPVx
 aiq/embedder/register.py,sha256=3MTZrfNQKp6AZTbfaA-PpTnyXiMyu-8HH9JnDCC0v9o,978
 aiq/eval/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
 aiq/eval/config.py,sha256=IlOr2o618kbkXP0G1F-AklZfsKYVos9UB4Dvlxf66xk,1431
-aiq/eval/evaluate.py,sha256=FFKIWRse9C3z6A7Fyu8GN0ZHMrxGspw9LnhQ7ulEYSE,15125
+aiq/eval/evaluate.py,sha256=AGEvmagd43jLq0aE_yNs_FFPFxVJEx49cu6Fl3WeQqA,17270
 aiq/eval/intermediate_step_adapter.py,sha256=4cSsGgFBvNjXnclk5FvZnQaFEdeulp7VEdRWKLcREAQ,4498
 aiq/eval/register.py,sha256=QOHJqA2CQixeWMC9InyKbzXo1jByvrntD_m9-2Mvg9k,1076
 aiq/eval/remote_workflow.py,sha256=Fb7Z6gdP2L_gqyWB--AEWfcXe9xPpQ_hPsf9lmqGXjI,5524
@@ -129,7 +129,7 @@ aiq/eval/trajectory_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 aiq/eval/trajectory_evaluator/evaluate.py,sha256=Y51KMhJ9t8AoYWrQlrwipc2CtgIXA9IUGZTbKegtsnw,3257
 aiq/eval/trajectory_evaluator/register.py,sha256=kktT4fu5_1Cou-iohD3YhQevsWiR3TA5NpFSweVz0eQ,1709
 aiq/eval/tunable_rag_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-aiq/eval/tunable_rag_evaluator/evaluate.py,sha256=xo7gtBI-cOrmk8s6FNLDoMhn2F0ODOxdAtg37i4Vu24,15387
+aiq/eval/tunable_rag_evaluator/evaluate.py,sha256=f4jfn9VVLmkOg631TQr2wy7hPwGMJMsQa4kmXsu0-Uc,13069
 aiq/eval/tunable_rag_evaluator/register.py,sha256=q4p2rFyMzWmaINJc961ZV4jzIlAN4GfWsoImHo0ovsY,2558
 aiq/eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/eval/utils/output_uploader.py,sha256=SaQbZPkw-Q0H7t5yG60Kh-p1cflR7gPklVkilC4uPbU,5141
@@ -208,7 +208,7 @@ aiq/profiler/inference_optimization/prompt_caching.py,sha256=LGfxJG4R2y4vMFoiFzt
 aiq/profiler/inference_optimization/token_uniqueness.py,sha256=OCNlVmemMLS2kt0OZIXOGt8MbrTy5mbdhSMPYHs31a4,4571
 aiq/profiler/inference_optimization/workflow_runtimes.py,sha256=lnGa0eTpHiDEbx9rX-tcx100qSd6amePLlgb4Gx7JBc,2664
 aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py,sha256=8IgeAImmYlRy-JEaGeoYE6_BuNZ_3tyZmzXOGvDKCeg,16461
+aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py,sha256=yr81PsQ4TcrEnuPDlRwhL9Hcox3gO855DsS-BDo00u0,16732
 aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py,sha256=VZLBgsIUGOkY0ZUCLHQM4LpBQpJBM5JKRTUBGyoOFWU,11100
 aiq/profiler/inference_optimization/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py,sha256=J-oMRCEnd6I1XFXiyLUu8VPR745ptnzgzvn0Opsi208,16953
@@ -309,10 +309,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
 aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
 aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
-aiqtoolkit-1.2.0a20250616.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
-aiqtoolkit-1.2.0a20250616.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-aiqtoolkit-1.2.0a20250616.dist-info/METADATA,sha256=Es-uNK-zqvm2HoDhXHzB1O7gj-hY1K3-ovK9nqOBsow,20274
-aiqtoolkit-1.2.0a20250616.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-aiqtoolkit-1.2.0a20250616.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
-aiqtoolkit-1.2.0a20250616.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
-aiqtoolkit-1.2.0a20250616.dist-info/RECORD,,
+aiqtoolkit-1.2.0a20250617.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
+aiqtoolkit-1.2.0a20250617.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+aiqtoolkit-1.2.0a20250617.dist-info/METADATA,sha256=7U1QTdteJlIPEhjypLhDj_VsNOp3dcGC5GyK4sPecD8,20274
+aiqtoolkit-1.2.0a20250617.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+aiqtoolkit-1.2.0a20250617.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
+aiqtoolkit-1.2.0a20250617.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
+aiqtoolkit-1.2.0a20250617.dist-info/RECORD,,

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/licenses/LICENSE-3rd-party.txt RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250616.dist-info → aiqtoolkit-1.2.0a20250617.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiqtoolkit 1.2.0a20250616__py3-none-any.whl → 1.2.0a20250617__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0a20250616py3-none-any.whl → 1.2.0a20250617py3-none-any.whl