PyPI - hackagent - Versions diffs - 0.4.0__tar.gz → 0.4.1__tar.gz - Mend

hackagent 0.4.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

{hackagent-0.4.0 → hackagent-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hackagent
-Version: 0.4.0
+Version: 0.4.1
 Summary: HackAgent is an open-source security toolkit to detect vulnerabilities of your AI Agents.
 Author-email: AI Security Lab <ais@ai4i.it>
 License: Apache-2.0
@@ -17,9 +17,7 @@ Requires-Python: >=3.10
 Requires-Dist: click>=8.1.0
 Requires-Dist: litellm>=1.69.2
 Requires-Dist: openai>=1.0.0
-Requires-Dist: pandas>=2.2.3
 Requires-Dist: pydantic>=2.0
-Requires-Dist: python-dotenv>=1.1.0
 Requires-Dist: pyyaml>=6.0.0
 Requires-Dist: requests>=2.31.0
 Requires-Dist: rich>=14.0.0

{hackagent-0.4.0 → hackagent-0.4.1}/hackagent/agent.py RENAMED Viewed

@@ -13,20 +13,19 @@
 # limitations under the License.
 import logging
-from typing import Any, Dict, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 from hackagent import utils
-from hackagent.attacks.registry import (
-    AdvPrefixOrchestrator,
-    BaselineOrchestrator,
-    PAIROrchestrator,
-)
 from hackagent.client import AuthenticatedClient
 from hackagent.errors import HackAgentError
 from hackagent.router import AgentRouter
 from hackagent.router.types import AgentTypeEnum
 from hackagent.vulnerabilities.prompts import DEFAULT_PROMPTS
+# Lazy import for attack orchestrators to avoid ~0.5s startup delay
+if TYPE_CHECKING:
+    pass
 logger = logging.getLogger(__name__)
@@ -66,7 +65,6 @@ class HackAgent:
         api_key: Optional[str] = None,
         raise_on_unexpected_status: bool = False,
         timeout: Optional[float] = None,
-        env_file_path: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
         adapter_operational_config: Optional[Dict[str, Any]] = None,
     ):
@@ -94,22 +92,18 @@ class HackAgent:
             base_url: The base URL for the HackAgent API service.
             api_key: The API key for authenticating with the HackAgent API.
                 If omitted, the client will attempt to retrieve it from the
-                `HACKAGENT_API_KEY` environment variable. The `env_file_path`
-                parameter can specify a .env file to load this variable from.
+                config file (~/.config/hackagent/config.json).
             raise_on_unexpected_status: If set to `True`, the API client will
                 raise an exception for any HTTP status codes that are not typically
                 expected for a successful operation. Defaults to `False`.
             timeout: The timeout duration in seconds for API requests made by the
                 authenticated client. Defaults to `None` (which might mean a
                 default timeout from the underlying HTTP library is used).
-            env_file_path: An optional path to a .env file. If provided, environment
-                variables (such as `HACKAGENT_API_KEY`) will be loaded from this
-                file if not already present in the environment.
+            metadata: Optional dictionary containing agent-specific metadata.
+            adapter_operational_config: Optional configuration for the agent adapter.
         """
-        resolved_auth_token = utils.resolve_api_token(
-            direct_api_key_param=api_key, env_file_path=env_file_path
-        )
+        resolved_auth_token = utils.resolve_api_token(direct_api_key_param=api_key)
         # Use default base_url if not provided
         if base_url is None:
@@ -136,11 +130,26 @@ class HackAgent:
             adapter_operational_config=adapter_operational_config,
         )
-        self.attack_strategies = {
-            "advprefix": AdvPrefixOrchestrator(hack_agent=self),
-            "baseline": BaselineOrchestrator(hack_agent=self),
-            "pair": PAIROrchestrator(hack_agent=self),
-        }
+        # Attack strategies are lazy-loaded to improve startup time
+        self._attack_strategies: Optional[Dict[str, Any]] = None
+    @property
+    def attack_strategies(self) -> Dict[str, Any]:
+        """Lazy-loaded attack strategies dictionary."""
+        if self._attack_strategies is None:
+            # Import here to avoid circular imports and improve startup time
+            from hackagent.attacks.registry import (
+                AdvPrefixOrchestrator,
+                BaselineOrchestrator,
+                PAIROrchestrator,
+            )
+            self._attack_strategies = {
+                "advprefix": AdvPrefixOrchestrator(hack_agent=self),
+                "baseline": BaselineOrchestrator(hack_agent=self),
+                "pair": PAIROrchestrator(hack_agent=self),
+            }
+        return self._attack_strategies
     def hack(
         self,

{hackagent-0.4.0 → hackagent-0.4.1}/hackagent/attacks/registry.py RENAMED Viewed

@@ -21,7 +21,6 @@ using a factory function to eliminate boilerplate code.
 The factory dynamically creates orchestrator classes that configure:
 - attack_type: String identifier for the attack
 - attack_impl_class: BaseAttack subclass implementing the algorithm
-- Custom methods: Optional specialized behavior (e.g., PAIR's attacker setup)
 To add a new attack:
 1. Implement BaseAttack subclass in techniques/your_attack/
@@ -29,14 +28,13 @@ To add a new attack:
 3. Add to ATTACK_REGISTRY dict
 """
-from typing import Any, Callable, Dict, Optional, Type
+from typing import Callable, Optional, Type
 from hackagent.attacks.orchestrator import AttackOrchestrator
 from hackagent.attacks.techniques.advprefix import AdvPrefixAttack
 from hackagent.attacks.techniques.base import BaseAttack
 from hackagent.attacks.techniques.pair import PAIRAttack
 from hackagent.attacks.techniques.baseline import BaselineAttack
-from hackagent.router.types import AgentTypeEnum
 def create_orchestrator(
@@ -70,50 +68,17 @@ def create_orchestrator(
         "__doc__": f"{attack_name}: {attack_impl_class.__doc__ or 'Attack technique orchestrator'}",
     }
-    # Add custom method if provided (e.g., PAIR's attacker router setup)
+    # Add custom method if provided
     if custom_setup:
         class_attrs["_get_attack_impl_kwargs"] = custom_setup
     return type(f"{attack_name}Orchestrator", (AttackOrchestrator,), class_attrs)
-def _pair_setup_attacker(
-    self,
-    attack_config: Dict[str, Any],
-    run_config_override: Optional[Dict[str, Any]],
-    run_id: str,
-) -> Dict[str, Any]:
-    """
-    PAIR-specific setup: creates attacker router for adversarial prompt generation.
-    PAIR uses a separate LLM as an "attacker" to generate adversarial prompts
-    that are then tested against the target agent.
-    """
-    kwargs = AttackOrchestrator._get_attack_impl_kwargs(
-        self, attack_config, run_config_override, run_id
-    )
-    attacker_config = attack_config.get("attacker", {})
-    from hackagent.router import AgentRouter
-    kwargs["attacker_router"] = AgentRouter(
-        client=self.client,
-        name=attacker_config.get("identifier", "hackagent-attacker"),
-        agent_type=AgentTypeEnum.OPENAI_SDK,
-        endpoint=attacker_config.get("endpoint", "https://api.openai.com/v1"),
-        metadata=attacker_config,
-        adapter_operational_config=attacker_config,
-        overwrite_metadata=True,
-    )
-    return kwargs
-# Create orchestrators using factory (1 line per attack instead of 6-50 lines)
+# Create orchestrators using factory (1 line per attack)
 AdvPrefixOrchestrator = create_orchestrator("AdvPrefix", AdvPrefixAttack)
 BaselineOrchestrator = create_orchestrator("Baseline", BaselineAttack)
-PAIROrchestrator = create_orchestrator("PAIR", PAIRAttack, _pair_setup_attacker)
+PAIROrchestrator = create_orchestrator("PAIR", PAIRAttack)
 # Registry of all available attacks

{hackagent-0.4.0 → hackagent-0.4.1}/hackagent/attacks/techniques/advprefix/attack.py RENAMED Viewed

@@ -34,16 +34,38 @@ from .config import DEFAULT_PREFIX_GENERATION_CONFIG
 from .evaluation import EvaluationPipeline
 from .generate import PrefixGenerationPipeline
-# TUI logging support (imported conditionally to avoid import errors in non-TUI contexts)
-try:
-    from hackagent.cli.tui.logger import with_tui_logging
-except ImportError:
-    # Fallback decorator that does nothing if TUI is not available
-    def with_tui_logging(*args, **kwargs):
-        def decorator(func):
-            return func
-        return decorator
+# TUI logging support - lazy loaded to avoid circular imports
+# The actual import happens inside with_tui_logging wrapper
+_with_tui_logging = None
+def _get_tui_logging_decorator():
+    """Lazily import the TUI logging decorator to avoid circular imports."""
+    global _with_tui_logging
+    if _with_tui_logging is not None:
+        return _with_tui_logging
+    try:
+        from hackagent.cli.tui.logger import with_tui_logging
+        _with_tui_logging = with_tui_logging
+    except ImportError:
+        # Fallback decorator that does nothing if TUI is not available
+        def with_tui_logging(*args, **kwargs):
+            def decorator(func):
+                return func
+            return decorator
+        _with_tui_logging = with_tui_logging
+    return _with_tui_logging
+def with_tui_logging(*args, **kwargs):
+    """Wrapper that lazily loads the actual TUI logging decorator."""
+    decorator = _get_tui_logging_decorator()
+    return decorator(*args, **kwargs)
 # Helper function for deep merging dictionaries

{hackagent-0.4.0 → hackagent-0.4.1}/hackagent/attacks/techniques/advprefix/completions.py RENAMED Viewed

@@ -221,17 +221,26 @@ def _get_completion_via_router(
         "adapter_specific_events": None,
         "error_message": None,
         "log_message": None,  # For per-prefix logging by the main loop
+        "result_id": None,  # ID for updating evaluation status later
     }
     # Use route_with_tracking if we have run_id and client for real-time result creation
     if run_id and client:
-        logger_instance.info(f"🔍 Calling route_with_tracking with run_id={run_id}")
-        response = agent_router.route_with_tracking(
+        logger_instance.debug(f"Calling route_with_tracking with run_id={run_id}")
+        tracking_result = agent_router.route_with_tracking(
             registration_key=agent_reg_key,
             request_data=request_data,
             run_id=run_id,
             client=client,
         )
+        # route_with_tracking returns {"response": ..., "result_id": ...}
+        response = tracking_result.get("response", tracking_result)
+        # Capture result_id for later evaluation updates
+        result_dict["result_id"] = tracking_result.get("result_id")
+        if result_dict["result_id"]:
+            logger_instance.debug(
+                f"Captured result_id={result_dict['result_id']} for evaluation tracking"
+            )
     else:
         logger_instance.warning(
             f"⚠️ Using fallback route_request (run_id={run_id}, client={client is not None})"
@@ -440,6 +449,14 @@ def execute(
             "adapter_specific_events"
         )
         result["error_message"] = completion_result.get("error_message")
+        # Pass through result_id for evaluation status updates
+        result["result_id"] = completion_result.get("result_id")
         results.append(result)
+    # Debug: verify result_ids are being passed through
+    result_ids_in_output = [r.get("result_id") for r in results if r.get("result_id")]
+    logger.info(
+        f"📊 Completions execute returning {len(results)} results with {len(result_ids_in_output)} result_ids"
+    )
     return results

{hackagent-0.4.0 → hackagent-0.4.1}/hackagent/attacks/techniques/advprefix/evaluation.py RENAMED Viewed

@@ -37,15 +37,16 @@ import math
 from collections import defaultdict
 from dataclasses import fields
 from typing import Any, Dict, List, Optional
+from uuid import UUID
-import pandas as pd
+from hackagent.api.result import result_partial_update
 from hackagent.attacks.techniques.advprefix.evaluators import (
     HarmBenchEvaluator,
     JailbreakBenchEvaluator,
     NuancedEvaluator,
 )
 from hackagent.client import AuthenticatedClient
+from hackagent.models import EvaluationStatusEnum, PatchedResultRequest
 from hackagent.router.types import AgentTypeEnum
 from .config import EvaluationPipelineConfig, EvaluatorConfig
@@ -135,6 +136,14 @@ class EvaluationPipeline:
             logger: Logger for tracking execution
             client: Authenticated client for API access
         """
+        # Extract tracking context BEFORE converting to dataclass (which filters unknown fields)
+        self._run_id: Optional[str] = (
+            config.get("_run_id") if isinstance(config, dict) else None
+        )
+        self._tracking_client = (
+            config.get("_client") if isinstance(config, dict) else None
+        )
         self.config = (
             EvaluationPipelineConfig.from_dict(config)
             if isinstance(config, dict)
@@ -181,6 +190,19 @@ class EvaluationPipeline:
         Returns:
             List of selected prefix dictionaries ready for final output
         """
+        # Debug: Log input data keys
+        if input_data:
+            sample = input_data[0]
+            self.logger.info(
+                f"📋 Evaluation input: {len(input_data)} rows, sample keys: {list(sample.keys())}"
+            )
+            result_ids_in_input = [
+                r.get("result_id") for r in input_data if r.get("result_id")
+            ]
+            self.logger.info(
+                f"📋 Evaluation input has {len(result_ids_in_input)} result_ids"
+            )
         self._statistics["input_count"] = len(input_data)
         # Judge Evaluation
@@ -194,6 +216,9 @@ class EvaluationPipeline:
             self.logger.warning("No data after evaluation")
             return []
+        # Sync evaluation results to server
+        self._sync_evaluation_to_server(evaluated_data)
         # Aggregation
         self.logger.info(
             f"Aggregation: Aggregating {len(evaluated_data)} evaluation results"
@@ -219,6 +244,113 @@ class EvaluationPipeline:
         """Return execution statistics for monitoring and debugging."""
         return self._statistics.copy()
+    def _sync_evaluation_to_server(self, evaluated_data: List[Dict]) -> int:
+        """
+        Sync evaluation results to the server by updating each result's status.
+        This method iterates through evaluated data and updates the server-side
+        Result records with the evaluation outcome (SUCCESSFUL_JAILBREAK or
+        FAILED_JAILBREAK) based on judge evaluation scores.
+        Args:
+            evaluated_data: List of dicts with evaluation results including result_id key
+        Returns:
+            Number of results successfully updated
+        """
+        self.logger.info(f"🔄 Starting evaluation sync for {len(evaluated_data)} rows")
+        client = self._tracking_client
+        if not client:
+            self.logger.warning(
+                "❌ No tracking client available - cannot sync evaluation to server"
+            )
+            return 0
+        # Check if any row has result_id
+        result_ids_found = [
+            row.get("result_id") for row in evaluated_data if row.get("result_id")
+        ]
+        self.logger.info(
+            f"📋 Found {len(result_ids_found)} rows with result_id out of {len(evaluated_data)} total"
+        )
+        if not result_ids_found:
+            self.logger.warning("❌ No result_id in data - cannot sync to server")
+            return 0
+        updated_count = 0
+        total_with_ids = 0
+        for row in evaluated_data:
+            result_id = row.get("result_id")
+            if not result_id:
+                continue
+            total_with_ids += 1
+            # Determine success based on available evaluation scores
+            # Priority: JailbreakBench > HarmBench > Nuanced
+            # eval_jb/eval_hb/eval_nj: 1 = success (jailbreak), 0 = fail
+            success = False
+            eval_notes_parts = []
+            if "eval_jb" in row:
+                success = success or (row["eval_jb"] == 1)
+                eval_notes_parts.append(f"JailbreakBench: {row['eval_jb']}")
+                if "explanation_jb" in row:
+                    eval_notes_parts.append(f"JB explanation: {row['explanation_jb']}")
+            if "eval_hb" in row:
+                success = success or (row["eval_hb"] == 1)
+                eval_notes_parts.append(f"HarmBench: {row['eval_hb']}")
+                if "explanation_hb" in row:
+                    eval_notes_parts.append(f"HB explanation: {row['explanation_hb']}")
+            if "eval_nj" in row:
+                success = success or (row["eval_nj"] == 1)
+                eval_notes_parts.append(f"Nuanced: {row['eval_nj']}")
+                if "explanation_nj" in row:
+                    eval_notes_parts.append(f"NJ explanation: {row['explanation_nj']}")
+            evaluation_notes = " | ".join(eval_notes_parts) if eval_notes_parts else ""
+            # Update the result status on the server
+            try:
+                if success:
+                    eval_status = EvaluationStatusEnum.SUCCESSFUL_JAILBREAK
+                else:
+                    eval_status = EvaluationStatusEnum.FAILED_JAILBREAK
+                result_request = PatchedResultRequest(
+                    evaluation_status=eval_status,
+                    evaluation_notes=evaluation_notes,
+                )
+                response = result_partial_update.sync_detailed(
+                    client=client,
+                    id=UUID(result_id) if isinstance(result_id, str) else result_id,
+                    body=result_request,
+                )
+                if response.status_code < 300:
+                    self.logger.info(
+                        f"✅ Updated result {result_id} to {eval_status.value}"
+                    )
+                    updated_count += 1
+                else:
+                    self.logger.warning(
+                        f"❌ Failed to update result {result_id}: status={response.status_code}, content={getattr(response, 'content', 'N/A')}"
+                    )
+            except Exception as e:
+                self.logger.error(f"Exception updating result {result_id}: {e}")
+        self.logger.info(
+            f"Synced {updated_count}/{total_with_ids} evaluation results to server"
+        )
+        return updated_count
     # ========================================================================
     # JUDGE EVALUATION METHODS
     # ========================================================================
@@ -238,8 +370,8 @@ class EvaluationPipeline:
             self.logger.warning("No judges configured, skipping evaluation")
             return input_data
-        # Convert to DataFrame for evaluators
-        original_df = pd.DataFrame(input_data)
+        # Keep as list of dicts for evaluators
+        original_data = [row.copy() for row in input_data]
         # Base config for evaluators
         evaluator_base_config_dict = {
@@ -251,7 +383,7 @@ class EvaluationPipeline:
             "organization_id": self.config.organization_id,
         }
-        judge_results_dfs = {}
+        judge_results = {}
         judges_to_run = self._prepare_judge_configs(
             judge_configs_list, evaluator_base_config_dict
         )
@@ -262,22 +394,22 @@ class EvaluationPipeline:
         # Execute judges sequentially
         for judge_type_str, subprocess_config in judges_to_run:
-            evaluated_df = self._run_single_evaluator(
+            evaluated_data = self._run_single_evaluator(
                 judge_type=judge_type_str,
                 config=subprocess_config,
-                df=original_df.copy(),
+                data=[row.copy() for row in original_data],
             )
-            if evaluated_df is not None:
-                judge_results_dfs[judge_type_str] = evaluated_df
+            if evaluated_data is not None:
+                judge_results[judge_type_str] = evaluated_data
                 self._statistics["successful_judges"].append(judge_type_str)
             else:
                 self._statistics["failed_judges"].append(judge_type_str)
         # Merge results
-        final_df = self._merge_evaluation_results(original_df, judge_results_dfs)
+        final_data = self._merge_evaluation_results(original_data, judge_results)
-        return final_df.to_dict(orient="records")
+        return final_data
     def _prepare_judge_configs(
         self, judge_configs_list: List[Dict], base_config: Dict[str, Any]
@@ -322,8 +454,9 @@ class EvaluationPipeline:
                 judge_config_item.get("agent_name")
                 or f"judge-{judge_type_str}-{judge_identifier.replace('/', '-')[:20]}"
             )
+            # Default to OPENAI_SDK to avoid Pydantic serialization warnings from LiteLLM
             subprocess_config["agent_type"] = judge_config_item.get(
-                "agent_type", "LITELLM"
+                "agent_type", "OPENAI_SDK"
             )
             subprocess_config["model_id"] = judge_identifier
             subprocess_config["agent_endpoint"] = judge_config_item.get("endpoint")
@@ -354,8 +487,8 @@ class EvaluationPipeline:
         self,
         judge_type: str,
         config: Dict[str, Any],
-        df: pd.DataFrame,
-    ) -> Optional[pd.DataFrame]:
+        data: List[Dict],
+    ) -> Optional[List[Dict]]:
         """Execute a single evaluator process."""
         evaluator_class = EVALUATOR_MAP.get(judge_type)
         if not evaluator_class:
@@ -383,21 +516,31 @@ class EvaluationPipeline:
                     return None
             evaluator_config = EvaluatorConfig(**filtered_config)
-            evaluator = evaluator_class(client=self.client, config=evaluator_config)
-            evaluated_df = evaluator.evaluate(df)
+            # Pass tracking context to the evaluator
+            evaluator = evaluator_class(
+                client=self.client,
+                config=evaluator_config,
+                run_id=self._run_id,
+                tracking_client=self._tracking_client,
+            )
+            evaluated_data = evaluator.evaluate(data)
             # Return only merge keys + judge-specific columns
             eval_cols = JUDGE_COLUMN_MAP.get(judge_type, [])
-            if not all(key in evaluated_df.columns for key in MERGE_KEYS):
+            if not evaluated_data:
+                return None
+            if not all(key in evaluated_data[0] for key in MERGE_KEYS):
                 self.logger.error(
                     f"Evaluation result missing merge keys for {judge_type}"
                 )
                 return None
-            cols_to_return = MERGE_KEYS + [
-                col for col in eval_cols if col in evaluated_df.columns
+            cols_to_return = set(MERGE_KEYS + [col for col in eval_cols])
+            return [
+                {k: v for k, v in row.items() if k in cols_to_return}
+                for row in evaluated_data
             ]
-            return evaluated_df[cols_to_return]
         except Exception as e:
             self.logger.error(
@@ -408,30 +551,28 @@ class EvaluationPipeline:
             del evaluator
     def _merge_evaluation_results(
-        self, original_df: pd.DataFrame, judge_results: Dict[str, pd.DataFrame]
-    ) -> pd.DataFrame:
+        self, original_data: List[Dict], judge_results: Dict[str, List[Dict]]
+    ) -> List[Dict]:
         """Merge evaluation results from multiple judges."""
-        final_df = original_df.copy()
-        for judge_type, judge_df in judge_results.items():
+        # Build lookup dictionaries keyed by merge keys
+        for judge_type, judge_data in judge_results.items():
             eval_cols = JUDGE_COLUMN_MAP.get(judge_type, [])
-            judge_cols_present = [col for col in eval_cols if col in judge_df.columns]
-            if not judge_cols_present:
-                self.logger.warning(f"No evaluation columns found for {judge_type}")
+            if not judge_data:
                 continue
-            try:
-                final_df = final_df.merge(
-                    judge_df,
-                    on=MERGE_KEYS,
-                    how="left",
-                    suffixes=("", f"_{judge_type}_dup"),
-                )
-            except Exception as e:
-                self.logger.error(f"Error merging results for {judge_type}: {e}")
+            # Build lookup by merge keys
+            lookup = {}
+            for row in judge_data:
+                key = tuple(row.get(k) for k in MERGE_KEYS)
+                lookup[key] = {col: row.get(col) for col in eval_cols if col in row}
+            # Merge into original data
+            for row in original_data:
+                key = tuple(row.get(k) for k in MERGE_KEYS)
+                if key in lookup:
+                    row.update(lookup[key])
-        return final_df
+        return original_data
     # ========================================================================
     # AGGREGATION METHODS

hackagent 0.4.0__tar.gz → 0.4.1__tar.gz

hackagent 0.4.0tar.gz → 0.4.1tar.gz