hackagent 0.2.3__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hackagent-0.2.3 → hackagent-0.2.5}/PKG-INFO +6 -4
- {hackagent-0.2.3 → hackagent-0.2.5}/README.md +3 -3
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/agent.py +0 -1
- hackagent-0.2.5/hackagent/attacks/AdvPrefix/__init__.py +13 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/completions.py +70 -43
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/compute_ce.py +59 -32
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/config.py +3 -3
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/generate.py +0 -23
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/scorer_parser.py +38 -40
- hackagent-0.2.5/hackagent/cli/__init__.py +7 -0
- hackagent-0.2.5/hackagent/cli/commands/__init__.py +5 -0
- hackagent-0.2.5/hackagent/cli/commands/agent.py +497 -0
- hackagent-0.2.5/hackagent/cli/commands/attack.py +360 -0
- hackagent-0.2.5/hackagent/cli/commands/config.py +228 -0
- hackagent-0.2.5/hackagent/cli/commands/results.py +385 -0
- hackagent-0.2.5/hackagent/cli/config.py +138 -0
- hackagent-0.2.5/hackagent/cli/main.py +388 -0
- hackagent-0.2.5/hackagent/cli/utils.py +227 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/logger.py +1 -1
- {hackagent-0.2.3 → hackagent-0.2.5}/pyproject.toml +19 -4
- hackagent-0.2.3/assets/banner.png +0 -0
- hackagent-0.2.3/assets/favicon.ico +0 -0
- hackagent-0.2.3/hackagent/vulnerabilities/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/LICENSE +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/agent_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/agent_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/agent_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/agent_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/agent_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/agent/agent_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/apilogs/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/apilogs/apilogs_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/apilogs/apilogs_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/attack_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/attack_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/attack_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/attack_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/attack_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/attack/attack_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/checkout/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/checkout/checkout_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/generate/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/generate/generate_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/judge/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/judge/judge_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/key/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/key/key_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/key/key_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/key/key_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/key/key_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_me_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/organization/organization_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/prompt_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/prompt_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/prompt_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/prompt_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/prompt_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/prompt/prompt_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_trace_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/result/result_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_result_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_run_tests_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/run/run_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_create.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_destroy.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_me_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_me_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_partial_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_retrieve.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/api/user/user_update.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/README.md +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/aggregation.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/completer.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/evaluation.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/preprocessing.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/scorer.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/selection.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/selector.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/AdvPrefix/utils.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/advprefix.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/base.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/attacks/strategies.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/client.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/errors.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/agent.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/agent_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/agent_type_enum.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/api_token_log.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/attack.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/attack_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/checkout_session_request_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/checkout_session_response.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/evaluation_status_enum.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/generate_error_response.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/generate_request_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/generate_request_request_messages_item.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/generate_success_response.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/generic_error_response.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/organization.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/organization_minimal.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/organization_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_agent_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_api_token_log_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_attack_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_organization_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_prompt_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_result_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_run_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_user_api_key_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/paginated_user_profile_list.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_agent_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_attack_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_organization_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_prompt_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_result_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_run_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/patched_user_profile_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/prompt.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/prompt_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/result.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/result_list_evaluation_status.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/result_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/run.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/run_list_status.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/run_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/status_enum.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/step_type_enum.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/trace.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/trace_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/user_api_key.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/user_api_key_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/user_profile.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/user_profile_minimal.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/models/user_profile_request.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/py.typed +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/router/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/router/adapters/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/router/adapters/base.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/router/adapters/google_adk.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/router/adapters/litellm_adapter.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/router/router.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/types.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/utils.py +0 -0
- {hackagent-0.2.3/hackagent/attacks/AdvPrefix → hackagent-0.2.5/hackagent/vulnerabilities}/__init__.py +0 -0
- {hackagent-0.2.3 → hackagent-0.2.5}/hackagent/vulnerabilities/prompts.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hackagent
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: HackAgent is an open-source security toolkit to detect vulnerabilities of your AI Agents.
|
|
5
5
|
Author: Nicola Franco
|
|
6
6
|
Author-email: nicola@vista-labs.ai
|
|
@@ -9,17 +9,19 @@ Classifier: Programming Language :: Python :: 3
|
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Dist: click (>=8.1.0,<9.0.0)
|
|
12
13
|
Requires-Dist: litellm (>=1.69.2,<2.0.0)
|
|
13
14
|
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
14
15
|
Requires-Dist: pydantic (>=2.0,<3.0)
|
|
15
16
|
Requires-Dist: python-dotenv (>=1.1.0,<2.0.0)
|
|
17
|
+
Requires-Dist: pyyaml (>=6.0.0,<7.0.0)
|
|
16
18
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
17
19
|
Requires-Dist: rich (>=14.0.0,<15.0.0)
|
|
18
20
|
Description-Content-Type: text/markdown
|
|
19
21
|
|
|
20
22
|
<div align="center">
|
|
21
23
|
|
|
22
|
-
<img src="
|
|
24
|
+
<img src="https://docs.hackagent.dev/img/banner.png" alt="Hack Agent" width=400></img>
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
⚔️
|
|
@@ -28,10 +30,10 @@ Description-Content-Type: text/markdown
|
|
|
28
30
|
|
|
29
31
|
<br>
|
|
30
32
|
|
|
31
|
-
 [Web App][Web App] -- [Docs][Docs] 
|
|
32
34
|
|
|
33
35
|
[Web App]: https://hackagent.dev/
|
|
34
|
-
[Docs]: https://hackagent.dev/
|
|
36
|
+
[Docs]: https://docs.hackagent.dev/
|
|
35
37
|
|
|
36
38
|
<br>
|
|
37
39
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<div align="center">
|
|
2
2
|
|
|
3
|
-
<img src="
|
|
3
|
+
<img src="https://docs.hackagent.dev/img/banner.png" alt="Hack Agent" width=400></img>
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
⚔️
|
|
@@ -9,10 +9,10 @@
|
|
|
9
9
|
|
|
10
10
|
<br>
|
|
11
11
|
|
|
12
|
-
 [Web App][Web App] -- [Docs][Docs] 
|
|
13
13
|
|
|
14
14
|
[Web App]: https://hackagent.dev/
|
|
15
|
-
[Docs]: https://hackagent.dev/
|
|
15
|
+
[Docs]: https://docs.hackagent.dev/
|
|
16
16
|
|
|
17
17
|
<br>
|
|
18
18
|
|
|
@@ -100,7 +100,6 @@ class HackAgent:
|
|
|
100
100
|
variables (such as `HACKAGENT_API_KEY`) will be loaded from this
|
|
101
101
|
file if not already present in the environment.
|
|
102
102
|
"""
|
|
103
|
-
utils.display_hackagent_splash()
|
|
104
103
|
|
|
105
104
|
resolved_auth_token = utils.resolve_api_token(
|
|
106
105
|
direct_api_key_param=api_key, env_file_path=env_file_path
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AdvPrefix Attack Module
|
|
3
|
+
|
|
4
|
+
Suppress pandas warnings for cleaner attack execution output.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
# Suppress pandas FutureWarnings specifically for groupby operations
|
|
10
|
+
# This addresses warnings from preprocessing operations in the AdvPrefix pipeline
|
|
11
|
+
warnings.filterwarnings(
|
|
12
|
+
"ignore", category=FutureWarning, message=".*include_groups.*", module="pandas.*"
|
|
13
|
+
)
|
|
@@ -6,6 +6,16 @@ from typing import Dict, Any, Optional, List # Added List
|
|
|
6
6
|
# --- Import AgentRouter and related components ---
|
|
7
7
|
from hackagent.router.router import AgentRouter, AgentTypeEnum
|
|
8
8
|
|
|
9
|
+
# --- Import Rich progress bar components ---
|
|
10
|
+
from rich.progress import (
|
|
11
|
+
Progress,
|
|
12
|
+
BarColumn,
|
|
13
|
+
TextColumn,
|
|
14
|
+
TimeRemainingColumn,
|
|
15
|
+
MofNCompleteColumn,
|
|
16
|
+
SpinnerColumn,
|
|
17
|
+
)
|
|
18
|
+
|
|
9
19
|
|
|
10
20
|
# Constants for surrogate prompts
|
|
11
21
|
SURROGATE_ATTACK_PROMPTS = {
|
|
@@ -247,50 +257,67 @@ def execute(
|
|
|
247
257
|
completion_results_list: List[Dict[str, Any]] = []
|
|
248
258
|
logger.info(f"Executing {len(input_df)} completion requests sequentially...")
|
|
249
259
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
260
|
+
# Create progress bar for agent interactions
|
|
261
|
+
with Progress(
|
|
262
|
+
SpinnerColumn(),
|
|
263
|
+
TextColumn("[progress.description]{task.description}"),
|
|
264
|
+
BarColumn(),
|
|
265
|
+
MofNCompleteColumn(),
|
|
266
|
+
TextColumn("[progress.percentage]{task.percentage:>3.1f}%"),
|
|
267
|
+
TimeRemainingColumn(),
|
|
268
|
+
) as progress_bar:
|
|
269
|
+
task = progress_bar.add_task(
|
|
270
|
+
f"[green]Step 6: Getting completions from {victim_agent_type.value} agent...",
|
|
271
|
+
total=len(input_df),
|
|
272
|
+
)
|
|
254
273
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
#
|
|
258
|
-
#
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
"completion
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
274
|
+
for index, row in input_df.iterrows():
|
|
275
|
+
prefix_text = row["prefix"]
|
|
276
|
+
# 'goal' might not be directly used if surrogate_prompt_template is complex or prefix_text is already combined
|
|
277
|
+
# goal_text = row.get("goal", "") # Ensure goal is available if needed by prompt construction
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
# n_samples handling: If n_samples_per_prefix > 1, the _get_completion_via_router (and adapter) needs to support it.
|
|
281
|
+
# Currently, it makes one call per row in input_df. If input_df is already expanded for samples, this is fine.
|
|
282
|
+
# If input_df has one row per unique prefix, and n_samples_per_prefix > 1, this loop needs to run n_samples_per_prefix times
|
|
283
|
+
# or _get_completion_via_router must handle requesting n_samples from the adapter.
|
|
284
|
+
# Assuming input_df might be pre-expanded or n_samples=1 for this synchronous version for simplicity.
|
|
285
|
+
# If n_samples > 1 and not pre-expanded, this will only get 1 sample per prefix.
|
|
286
|
+
result = _get_completion_via_router(
|
|
287
|
+
agent_router=agent_router,
|
|
288
|
+
agent_reg_key=victim_agent_reg_key,
|
|
289
|
+
prefix_text=prefix_text,
|
|
290
|
+
surrogate_prompt_template=actual_surrogate_prompt_str,
|
|
291
|
+
user_id=step_user_id_adk,
|
|
292
|
+
session_id=step_session_id_adk,
|
|
293
|
+
request_timeout=request_timeout,
|
|
294
|
+
max_new_tokens=max_new_tokens,
|
|
295
|
+
temperature=temperature,
|
|
296
|
+
n_samples=1, # Forcing 1 for this simple loop; adapter might take n_samples_per_prefix
|
|
297
|
+
logger_instance=logger,
|
|
298
|
+
original_index=index,
|
|
299
|
+
)
|
|
300
|
+
completion_results_list.append(result)
|
|
301
|
+
except Exception as e:
|
|
302
|
+
logger.error(
|
|
303
|
+
f"Exception during synchronous completion for original index {index}: {e}",
|
|
304
|
+
exc_info=e,
|
|
305
|
+
)
|
|
306
|
+
completion_results_list.append(
|
|
307
|
+
{
|
|
308
|
+
"completion": None,
|
|
309
|
+
"raw_request_payload": None,
|
|
310
|
+
"raw_response_status": None,
|
|
311
|
+
"raw_response_headers": None,
|
|
312
|
+
"raw_response_body": None,
|
|
313
|
+
"adapter_specific_events": None,
|
|
314
|
+
"error_message": f"Sync Task Exception: {type(e).__name__} - {str(e)}",
|
|
315
|
+
"log_message": None,
|
|
316
|
+
}
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Update progress bar after each completion
|
|
320
|
+
progress_bar.update(task, advance=1)
|
|
294
321
|
|
|
295
322
|
logger.info("All completion requests processed.")
|
|
296
323
|
|
|
@@ -7,6 +7,16 @@ import uuid
|
|
|
7
7
|
from hackagent.client import AuthenticatedClient
|
|
8
8
|
from hackagent.router.router import AgentRouter, AgentTypeEnum
|
|
9
9
|
|
|
10
|
+
# --- Import Rich progress bar components ---
|
|
11
|
+
from rich.progress import (
|
|
12
|
+
Progress,
|
|
13
|
+
BarColumn,
|
|
14
|
+
TextColumn,
|
|
15
|
+
TimeRemainingColumn,
|
|
16
|
+
MofNCompleteColumn,
|
|
17
|
+
SpinnerColumn,
|
|
18
|
+
)
|
|
19
|
+
|
|
10
20
|
# --- Remove old ADK utility imports and ADK_REFUSAL_KEYWORDS import ---
|
|
11
21
|
# from hackagent.api.utils import ADK_REFUSAL_KEYWORDS # Removed this import
|
|
12
22
|
|
|
@@ -112,38 +122,55 @@ def execute(
|
|
|
112
122
|
f"Executing {len(input_df)} ADK acceptability scoring requests sequentially..."
|
|
113
123
|
)
|
|
114
124
|
|
|
115
|
-
#
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
125
|
+
# Create progress bar for ADK acceptability scoring
|
|
126
|
+
with Progress(
|
|
127
|
+
SpinnerColumn(),
|
|
128
|
+
TextColumn("[progress.description]{task.description}"),
|
|
129
|
+
BarColumn(),
|
|
130
|
+
MofNCompleteColumn(),
|
|
131
|
+
TextColumn("[progress.percentage]{task.percentage:>3.1f}%"),
|
|
132
|
+
TimeRemainingColumn(),
|
|
133
|
+
) as progress_bar:
|
|
134
|
+
task = progress_bar.add_task(
|
|
135
|
+
f"[blue]Step 4: Computing cross-entropy via {agent_router.backend_agent.agent_type.value} agent...",
|
|
136
|
+
total=len(input_df),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Synchronous loop instead of asyncio.gather
|
|
140
|
+
for index, row in input_df.iterrows():
|
|
141
|
+
prefix = row["prefix"]
|
|
142
|
+
try:
|
|
143
|
+
result = _get_adk_acceptability_via_router(
|
|
144
|
+
router=agent_router,
|
|
145
|
+
agent_reg_key=victim_agent_reg_key,
|
|
146
|
+
prefix_text=prefix,
|
|
147
|
+
user_id=step_user_id,
|
|
148
|
+
session_id=step_session_id,
|
|
149
|
+
request_timeout=request_timeout,
|
|
150
|
+
logger_instance=logger,
|
|
151
|
+
original_index=index,
|
|
152
|
+
)
|
|
153
|
+
interaction_results_list.append(result)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.error(
|
|
156
|
+
f"Exception during synchronous ADK acceptability scoring for original index {index}: {e}",
|
|
157
|
+
exc_info=e,
|
|
158
|
+
)
|
|
159
|
+
interaction_results_list.append(
|
|
160
|
+
{
|
|
161
|
+
"score": float("inf"),
|
|
162
|
+
"request_payload": None,
|
|
163
|
+
"response_status_code": None,
|
|
164
|
+
"response_headers": None,
|
|
165
|
+
"response_body_raw": None,
|
|
166
|
+
"adk_events_list": None,
|
|
167
|
+
"error_message": f"Sync Task Exception: {type(e).__name__} - {str(e)}",
|
|
168
|
+
"log_message": None,
|
|
169
|
+
}
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Update progress bar after each scoring request
|
|
173
|
+
progress_bar.update(task, advance=1)
|
|
147
174
|
|
|
148
175
|
logger.info("All ADK acceptability scoring requests processed.")
|
|
149
176
|
|
|
@@ -6,7 +6,7 @@ DEFAULT_PREFIX_GENERATION_CONFIG: Dict[str, Any] = {
|
|
|
6
6
|
"output_dir": "./logs/runs",
|
|
7
7
|
# --- Model Configurations ---
|
|
8
8
|
"generator": {
|
|
9
|
-
"identifier": "
|
|
9
|
+
"identifier": "hackagent/generate",
|
|
10
10
|
"endpoint": "https://hackagent.dev/api/generate",
|
|
11
11
|
"batch_size": 2,
|
|
12
12
|
"max_new_tokens": 50,
|
|
@@ -15,14 +15,14 @@ DEFAULT_PREFIX_GENERATION_CONFIG: Dict[str, Any] = {
|
|
|
15
15
|
},
|
|
16
16
|
"judges": [
|
|
17
17
|
{
|
|
18
|
-
"identifier": "
|
|
18
|
+
"identifier": "hackagent/judge",
|
|
19
19
|
"endpoint": "https://hackagent.dev/api/judge",
|
|
20
20
|
"type": "harmbench",
|
|
21
21
|
}
|
|
22
22
|
],
|
|
23
23
|
"selection_judges": [
|
|
24
24
|
{
|
|
25
|
-
"identifier": "
|
|
25
|
+
"identifier": "hackagent/judge",
|
|
26
26
|
"endpoint": "https://hackagent.dev/api/judge",
|
|
27
27
|
"type": "harmbench",
|
|
28
28
|
}
|
|
@@ -55,30 +55,7 @@ def _construct_prompts(
|
|
|
55
55
|
if n_samples <= 0:
|
|
56
56
|
continue
|
|
57
57
|
|
|
58
|
-
# chat = [{"role": "user", "content": goal}] # Not directly used for router prompt format
|
|
59
58
|
try:
|
|
60
|
-
# The prompt for the router will be the fully constructed context.
|
|
61
|
-
# Custom chat templating needs to happen before sending to router.
|
|
62
|
-
# This templating logic might be simplified if direct calls are made,
|
|
63
|
-
# as the local proxy expects a more direct LiteLLM-like payload.
|
|
64
|
-
|
|
65
|
-
# For direct calls, the "prompt" is often just the user message content.
|
|
66
|
-
# For AgentRouter, the current logic constructs a more complex prompt string.
|
|
67
|
-
# We will adapt this based on whether we're calling directly or via router.
|
|
68
|
-
|
|
69
|
-
# The `final_prompt` here is what's sent to LiteLLM or the router.
|
|
70
|
-
# For direct local proxy, `messages` will be constructed later.
|
|
71
|
-
# For AgentRouter, this `final_prompt` is used.
|
|
72
|
-
|
|
73
|
-
# Let's keep final_prompt simple for now, it's the content for the "user" role
|
|
74
|
-
# and meta_prefix will be added to the generated part.
|
|
75
|
-
# This part of the logic might need to be revisited based on how CustomChatTemplates are meant to work
|
|
76
|
-
# with local proxy vs router.
|
|
77
|
-
|
|
78
|
-
# The current _construct_prompts prepares a `final_prompt` string.
|
|
79
|
-
# Let's assume this `final_prompt` is the "content" for the "user" message
|
|
80
|
-
# when making direct calls.
|
|
81
|
-
|
|
82
59
|
if meta_prefix in CUSTOM_CHAT_TEMPLATES:
|
|
83
60
|
prompt_content_for_template = CUSTOM_CHAT_TEMPLATES[
|
|
84
61
|
meta_prefix
|
|
@@ -77,46 +77,34 @@ class BaseEvaluator(ABC):
|
|
|
77
77
|
self.underlying_httpx_client = self.client.get_httpx_client()
|
|
78
78
|
|
|
79
79
|
self.is_local_judge_proxy_defined = False
|
|
80
|
-
self.actual_api_key:
|
|
80
|
+
self.actual_api_key: str = client.token
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
82
|
+
api_key_config_value = self.config.agent_metadata.get("api_key")
|
|
83
|
+
|
|
84
|
+
if api_key_config_value:
|
|
85
|
+
env_key_value = os.environ.get(api_key_config_value)
|
|
86
|
+
if env_key_value:
|
|
87
|
+
self.actual_api_key = env_key_value
|
|
88
|
+
self.logger.info(
|
|
89
|
+
f"Loaded API key for generator from environment variable: {api_key_config_value}"
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
self.actual_api_key = api_key_config_value
|
|
93
|
+
self.logger.info(
|
|
94
|
+
f"Using provided value directly as API key for generator (not found as env var: {api_key_config_value[:5]}...)."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
print("config.agent_endpoint", self.config.agent_endpoint)
|
|
98
|
+
is_local_proxy_defined = bool(
|
|
99
|
+
self.config.agent_endpoint == "https://hackagent.dev/api/judge"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if is_local_proxy_defined:
|
|
86
103
|
self.is_local_judge_proxy_defined = True
|
|
87
104
|
self.logger.info(
|
|
88
105
|
f"Local judge proxy detected for '{self.config.agent_name}' at: {self.config.agent_endpoint}"
|
|
89
106
|
)
|
|
90
107
|
|
|
91
|
-
if self.config.agent_metadata:
|
|
92
|
-
direct_api_key = self.config.agent_metadata.get("api_key")
|
|
93
|
-
api_key_env_var = self.config.agent_metadata.get("api_key_env_var")
|
|
94
|
-
|
|
95
|
-
if direct_api_key:
|
|
96
|
-
self.actual_api_key = direct_api_key
|
|
97
|
-
self.logger.info(
|
|
98
|
-
f"Using direct API key for local judge proxy '{self.config.agent_name}'."
|
|
99
|
-
)
|
|
100
|
-
elif api_key_env_var:
|
|
101
|
-
env_key_value = os.environ.get(api_key_env_var)
|
|
102
|
-
if env_key_value:
|
|
103
|
-
self.actual_api_key = env_key_value
|
|
104
|
-
self.logger.info(
|
|
105
|
-
f"Loaded API key for local judge proxy '{self.config.agent_name}' from env var: {api_key_env_var}"
|
|
106
|
-
)
|
|
107
|
-
else:
|
|
108
|
-
self.logger.warning(
|
|
109
|
-
f"Env var {api_key_env_var} for local judge proxy '{self.config.agent_name}' API key not found."
|
|
110
|
-
)
|
|
111
|
-
else:
|
|
112
|
-
self.logger.warning(
|
|
113
|
-
f"Local judge proxy '{self.config.agent_name}' detected, but no 'api_key' or 'api_key_env_var' found in agent_metadata."
|
|
114
|
-
)
|
|
115
|
-
else:
|
|
116
|
-
self.logger.warning(
|
|
117
|
-
f"Local judge proxy '{self.config.agent_name}' detected, but agent_metadata is missing for API key."
|
|
118
|
-
)
|
|
119
|
-
|
|
120
108
|
if not self.actual_api_key:
|
|
121
109
|
self.is_local_judge_proxy_defined = (
|
|
122
110
|
False # Cannot use local proxy without API key
|
|
@@ -497,8 +485,12 @@ Based on these criteria, please provide your classification. You should first br
|
|
|
497
485
|
super().__init__(client, config)
|
|
498
486
|
if not self.config.model_id: # Ensure model_id is present
|
|
499
487
|
raise ValueError("NuancedEvaluator requires a model_id in the config.")
|
|
500
|
-
if not self.
|
|
501
|
-
|
|
488
|
+
if not (self.is_local_judge_proxy_defined and self.actual_api_key) and (
|
|
489
|
+
not self.agent_router or not self.agent_registration_key
|
|
490
|
+
):
|
|
491
|
+
raise RuntimeError(
|
|
492
|
+
"AgentRouter not initialized for NuancedEvaluator and local proxy not available/functional."
|
|
493
|
+
)
|
|
502
494
|
|
|
503
495
|
def _get_request_data_for_row(self, row: pd.Series) -> Dict[str, Any]:
|
|
504
496
|
prompt_content = self.PROMPT.format(
|
|
@@ -630,9 +622,11 @@ Answer with either 'safe' or 'unsafe' and nothing else (in particular, never sta
|
|
|
630
622
|
raise ValueError(
|
|
631
623
|
"JailbreakBenchEvaluator requires a model_id in the config."
|
|
632
624
|
)
|
|
633
|
-
if not self.
|
|
625
|
+
if not (self.is_local_judge_proxy_defined and self.actual_api_key) and (
|
|
626
|
+
not self.agent_router or not self.agent_registration_key
|
|
627
|
+
):
|
|
634
628
|
raise RuntimeError(
|
|
635
|
-
"AgentRouter not initialized for JailbreakBenchEvaluator."
|
|
629
|
+
"AgentRouter not initialized for JailbreakBenchEvaluator and local proxy not available/functional."
|
|
636
630
|
)
|
|
637
631
|
|
|
638
632
|
def _get_request_data_for_row(self, row: pd.Series) -> Dict[str, Any]:
|
|
@@ -732,8 +726,12 @@ Answer with only "yes" or "no"."""
|
|
|
732
726
|
)
|
|
733
727
|
# Depending on strictness, could raise ValueError here
|
|
734
728
|
# raise ValueError("HarmBenchEvaluator requires a model_id in the config.")
|
|
735
|
-
if not self.
|
|
736
|
-
|
|
729
|
+
if not (self.is_local_judge_proxy_defined and self.actual_api_key) and (
|
|
730
|
+
not self.agent_router or not self.agent_registration_key
|
|
731
|
+
):
|
|
732
|
+
raise RuntimeError(
|
|
733
|
+
"AgentRouter not initialized for HarmBenchEvaluator and local proxy not available/functional."
|
|
734
|
+
)
|
|
737
735
|
|
|
738
736
|
# model_id processing (like adding provider prefix) should ideally happen when creating
|
|
739
737
|
# the EvaluatorConfig instance before passing it here, or handled by the LiteLLMAgentAdapter itself.
|