PyPI - aiqtoolkit - Versions diffs - 1.2.0a20250621__py3-none-any.whl → 1.2.0a20250623__py3-none-any.whl - Mend

aiqtoolkit 1.2.0a20250621py3-none-any.whl → 1.2.0a20250623py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (9) hide show

aiq/eval/evaluate.py CHANGED Viewed

@@ -32,6 +32,7 @@ from aiq.eval.evaluator.evaluator_model import EvalInput
 from aiq.eval.evaluator.evaluator_model import EvalInputItem
 from aiq.eval.evaluator.evaluator_model import EvalOutput
 from aiq.eval.utils.output_uploader import OutputUploader
+from aiq.eval.utils.weave_eval import WeaveEvaluationIntegration
 from aiq.runtime.session import AIQSessionManager
 logger = logging.getLogger(__name__)
@@ -54,7 +55,7 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         # Helpers
         self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
+        self.weave_eval: WeaveEvaluationIntegration = WeaveEvaluationIntegration()
         # Metadata
         self.eval_input: EvalInput | None = None
         self.workflow_interrupted: bool = False
@@ -138,6 +139,8 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
                 item.output_obj = output
                 item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
+                self.weave_eval.log_prediction(item, output)
         async def wrapped_run(item: EvalInputItem) -> None:
             await run_one(item)
             pbar.update(1)
@@ -268,11 +271,15 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
                    "`eval` with the --skip_completed_entries flag.")
             logger.warning(msg)
+        self.weave_eval.log_summary(self.evaluation_results)
     async def run_single_evaluator(self, evaluator_name: str, evaluator: Any):
         """Run a single evaluator and store its results."""
         try:
             eval_output = await evaluator.evaluate_fn(self.eval_input)
             self.evaluation_results.append((evaluator_name, eval_output))
+            await self.weave_eval.alog_score(eval_output, evaluator_name)
         except Exception as e:
             logger.exception("An error occurred while running evaluator %s: %s", evaluator_name, e, exc_info=True)
@@ -289,6 +296,9 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         except Exception as e:
             logger.exception("An error occurred while running evaluators: %s", e, exc_info=True)
             raise
+        finally:
+            # Finish prediction loggers in Weave
+            await self.weave_eval.afinish_loggers()
     def apply_overrides(self):
         from aiq.cli.cli_utils.config_override import load_and_override_config
@@ -362,6 +372,11 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         # Run workflow and evaluate
         async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
+            # Initialize Weave integration
+            self.weave_eval.initialize_client()
+            if self.weave_eval.client:
+                self.weave_eval.initialize_logger(self.eval_input, config)
             if self.config.endpoint:
                 await self.run_workflow_remote()
             else:

aiq/eval/utils/weave_eval.py ADDED Viewed

@@ -0,0 +1,135 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import logging
+from typing import Any
+from typing import List
+from aiq.eval.evaluator.evaluator_model import EvalInput
+from aiq.eval.evaluator.evaluator_model import EvalInputItem
+from aiq.eval.evaluator.evaluator_model import EvalOutput
+logger = logging.getLogger(__name__)
+class WeaveEvaluationIntegration:  # pylint: disable=too-many-public-methods
+    """
+    Class to handle all Weave integration functionality.
+    """
+    def __init__(self):
+        self.available = False
+        self.client = None
+        self.eval_logger = None
+        self.pred_loggers = {}
+        try:
+            from weave.flow.eval_imperative import EvaluationLogger
+            from weave.flow.eval_imperative import ScoreLogger
+            from weave.trace.context import weave_client_context
+            self.EvaluationLogger = EvaluationLogger
+            self.ScoreLogger = ScoreLogger
+            self.weave_client_context = weave_client_context
+            self.available = True
+        except ImportError:
+            self.available = False
+            # we simply don't do anything if weave is not available
+            pass
+    def initialize_client(self):
+        """Initialize the Weave client if available."""
+        if not self.available:
+            return False
+        try:
+            self.client = self.weave_client_context.require_weave_client()
+            return self.client is not None
+        except Exception:
+            self.client = None
+            return False
+    def initialize_logger(self, eval_input: EvalInput, config: Any):
+        """Initialize the Weave evaluation logger."""
+        if not self.client:
+            return False
+        try:
+            weave_dataset = [
+                item.model_dump(exclude={"output_obj", "trajectory"}) for item in eval_input.eval_input_items
+            ]
+            config_dict = config.model_dump(mode="json")
+            # TODO: make this configurable
+            config_dict["name"] = "aiqtoolkit-eval"
+            self.eval_logger = self.EvaluationLogger(model=config_dict, dataset=weave_dataset)
+            self.pred_loggers = {}
+            del weave_dataset
+            del config_dict
+            return True
+        except Exception as e:
+            self.eval_logger = None
+            logger.warning("Failed to initialize Weave `EvaluationLogger`: %s", e)
+            return False
+    def log_prediction(self, item: EvalInputItem, output: Any):
+        """Log a prediction to Weave."""
+        if not self.eval_logger:
+            return
+        pred_logger = self.eval_logger.log_prediction(inputs=item.model_dump(exclude={"output_obj", "trajectory"}),
+                                                      output=output)
+        self.pred_loggers[item.id] = pred_logger
+    async def alog_score(self, eval_output: EvalOutput, evaluator_name: str):
+        """Log scores for evaluation outputs."""
+        if not self.eval_logger:
+            return
+        for eval_output_item in eval_output.eval_output_items:
+            if eval_output_item.id in self.pred_loggers:
+                await self.pred_loggers[eval_output_item.id].alog_score(
+                    scorer=evaluator_name,
+                    score=eval_output_item.score,
+                )
+    async def afinish_loggers(self):
+        """Finish all prediction loggers."""
+        if not self.eval_logger:
+            return
+        async def _finish_one(pred_logger):
+            if hasattr(pred_logger, '_has_finished') and not pred_logger._has_finished:
+                return
+            # run the *blocking* finish() in a thread so we don’t nest loops
+            await asyncio.to_thread(pred_logger.finish)
+        await asyncio.gather(*[_finish_one(pl) for pl in self.pred_loggers.values()])
+    def log_summary(self, evaluation_results: List[tuple[str, EvalOutput]]):
+        """Log summary statistics to Weave."""
+        if not self.eval_logger:
+            return
+        summary = {}
+        for evaluator_name, eval_output in evaluation_results:
+            # Calculate average score for this evaluator
+            scores = [item.score for item in eval_output.eval_output_items if item.score is not None]
+            if scores:
+                summary[f"{evaluator_name}_avg"] = sum(scores) / len(scores)
+        # Log the summary to finish the evaluation
+        self.eval_logger.log_summary(summary)

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiqtoolkit
-Version: 1.2.0a20250621
+Version: 1.2.0a20250623
 Summary: NVIDIA Agent Intelligence toolkit
 Author: NVIDIA Corporation
 Maintainer: NVIDIA Corporation

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/RECORD RENAMED Viewed

@@ -107,7 +107,7 @@ aiq/embedder/openai_embedder.py,sha256=5FO3xsyNvEmbLBsZb3xsCpbN1Soxio4yf4b5gTPVx
 aiq/embedder/register.py,sha256=3MTZrfNQKp6AZTbfaA-PpTnyXiMyu-8HH9JnDCC0v9o,978
 aiq/eval/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
 aiq/eval/config.py,sha256=IlOr2o618kbkXP0G1F-AklZfsKYVos9UB4Dvlxf66xk,1431
-aiq/eval/evaluate.py,sha256=AGEvmagd43jLq0aE_yNs_FFPFxVJEx49cu6Fl3WeQqA,17270
+aiq/eval/evaluate.py,sha256=VdVdB_CV842gIV4diHciJ1qrof5_N3H8I16WwracCsQ,17940
 aiq/eval/intermediate_step_adapter.py,sha256=4cSsGgFBvNjXnclk5FvZnQaFEdeulp7VEdRWKLcREAQ,4498
 aiq/eval/register.py,sha256=QOHJqA2CQixeWMC9InyKbzXo1jByvrntD_m9-2Mvg9k,1076
 aiq/eval/remote_workflow.py,sha256=Fb7Z6gdP2L_gqyWB--AEWfcXe9xPpQ_hPsf9lmqGXjI,5524
@@ -134,6 +134,7 @@ aiq/eval/tunable_rag_evaluator/register.py,sha256=q4p2rFyMzWmaINJc961ZV4jzIlAN4G
 aiq/eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/eval/utils/output_uploader.py,sha256=SaQbZPkw-Q0H7t5yG60Kh-p1cflR7gPklVkilC4uPbU,5141
 aiq/eval/utils/tqdm_position_registry.py,sha256=9CtpCk1wtYCSyieHPaSp8nlZu6EcNUOaUz2RTqfekrA,1286
+aiq/eval/utils/weave_eval.py,sha256=yIdlp4UdCPgwFYJNJon5eZD1d99E-6dcmfVg6B-4RKE,5076
 aiq/front_ends/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
 aiq/front_ends/register.py,sha256=OKv1xi-g8WHtUMuIPhwjG6wOYqaGDD-Q9vDtKtT9d1Y,889
 aiq/front_ends/console/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
@@ -309,10 +310,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
 aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
 aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
-aiqtoolkit-1.2.0a20250621.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
-aiqtoolkit-1.2.0a20250621.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-aiqtoolkit-1.2.0a20250621.dist-info/METADATA,sha256=D14wpPMZp_1mnHcSmKNtKAky2uL_D_5b2i0rs39bkiM,20274
-aiqtoolkit-1.2.0a20250621.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-aiqtoolkit-1.2.0a20250621.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
-aiqtoolkit-1.2.0a20250621.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
-aiqtoolkit-1.2.0a20250621.dist-info/RECORD,,
+aiqtoolkit-1.2.0a20250623.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
+aiqtoolkit-1.2.0a20250623.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+aiqtoolkit-1.2.0a20250623.dist-info/METADATA,sha256=M98GDq-TQ5hxx-6C2mgkvNJD-NPsksihuT96qOukjEE,20274
+aiqtoolkit-1.2.0a20250623.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+aiqtoolkit-1.2.0a20250623.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
+aiqtoolkit-1.2.0a20250623.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
+aiqtoolkit-1.2.0a20250623.dist-info/RECORD,,

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/licenses/LICENSE-3rd-party.txt RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{aiqtoolkit-1.2.0a20250621.dist-info → aiqtoolkit-1.2.0a20250623.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiqtoolkit 1.2.0a20250621__py3-none-any.whl → 1.2.0a20250623__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0a20250621py3-none-any.whl → 1.2.0a20250623py3-none-any.whl