dreadnode 1.15.0__tar.gz → 1.15.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dreadnode-1.15.0 → dreadnode-1.15.2}/PKG-INFO +1 -1
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/agent.py +24 -8
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/hooks/__init__.py +2 -0
- dreadnode-1.15.2/dreadnode/agent/hooks/metrics.py +84 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/tools/__init__.py +4 -2
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/tools/base.py +13 -8
- dreadnode-1.15.2/dreadnode/agent/tools/execute.py +111 -0
- dreadnode-1.15.2/dreadnode/agent/tools/memory.py +56 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/tools/planning.py +26 -3
- dreadnode-1.15.2/dreadnode/agent/tools/reporting.py +35 -0
- dreadnode-1.15.2/dreadnode/agent/tools/tasking.py +58 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/agent/cli.py +0 -2
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/attack/cli.py +0 -2
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/eval/cli.py +0 -2
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/study/cli.py +0 -2
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/task/cli.py +2 -1
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/discovery.py +18 -2
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/eval.py +1 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/study.py +9 -1
- {dreadnode-1.15.0 → dreadnode-1.15.2}/pyproject.toml +1 -1
- dreadnode-1.15.0/dreadnode/agent/tools/reporting.py +0 -35
- dreadnode-1.15.0/dreadnode/agent/tools/tasking.py +0 -50
- {dreadnode-1.15.0 → dreadnode-1.15.2}/.gitignore +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/LICENSE +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/README.md +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/__main__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/error.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/events.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/format.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/hooks/backoff.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/hooks/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/hooks/summarize.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/prompts/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/prompts/summarize.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/reactions.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/result.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/stop.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/thread.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/agent/tools/fs.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/goat.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/hop_skip_jump.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/nes.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/prompt.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/simba.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/tap.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/attack/zoo.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/search/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/search/hop_skip_jump.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/search/image_utils.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/search/nes.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/search/simba.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/search/zoo.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/target/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/target/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/target/custom.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/airt/target/llm.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/api/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/api/client.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/api/models.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/api/util.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/artifact/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/artifact/credential_manager.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/artifact/merger.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/artifact/storage.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/artifact/tree_builder.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/agent/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/api.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/attack/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/docker.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/eval/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/github.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/main.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/cli.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/compose.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/constants.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/download.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/env_mgmt.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/tag.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/platform/version.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/profile/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/profile/cli.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/shared.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/study/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/cli/task/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/common_types.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/constants.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/convert.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/audio.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/image.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/object_3d.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/table.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/text.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/data_types/video.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/error.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/console.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/dataset.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/events.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/format.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/result.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/eval/sample.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/format.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/integrations/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/integrations/transformers.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/logging_.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/main.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/meta/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/meta/config.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/meta/context.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/meta/hydrate.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/meta/introspect.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/metric.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/object.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/collectors.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/console.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/events.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/format.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/result.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/sampling.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/search/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/search/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/search/boundary.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/search/graph.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/search/optuna_.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/search/random.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/stop.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/optimization/trial.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/py.typed +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/classification.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/consistency.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/contains.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/crucible.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/format.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/harm.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/image.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/json.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/judge.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/length.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/lexical.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/pii.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/readability.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/rigging.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/sentiment.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/similarity.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/scorers/util.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/serialization.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/task.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/tracing/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/tracing/constants.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/tracing/exporters.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/tracing/span.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/__init__.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/base.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/cipher.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/encoding.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/image.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/perturbation.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/refine.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/stylistic.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/substitution.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/swap.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/transforms/text.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/user_config.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/util.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/dreadnode/version.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/airt/beam_search.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/airt/graph_of_attacks_with_pruning.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/airt/tap_vs_goat_eval.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/airt/tree_of_attacks_with_pruning.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/data_export.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/log_artifact.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/log_object/audio.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/log_object/image.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/log_object/object3d.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/log_object/table.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/log_object/video.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/model_training.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/examples/rigging.ipynb +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/tests/cli/test_config.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/tests/cli/test_docker.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/tests/cli/test_github.py +0 -0
- {dreadnode-1.15.0 → dreadnode-1.15.2}/tests/test_meta.py +0 -0
|
@@ -2,10 +2,11 @@ import inspect
|
|
|
2
2
|
import typing as t
|
|
3
3
|
from contextlib import aclosing, asynccontextmanager
|
|
4
4
|
from copy import deepcopy
|
|
5
|
+
from textwrap import dedent
|
|
5
6
|
|
|
6
7
|
import rigging as rg
|
|
7
8
|
from loguru import logger
|
|
8
|
-
from pydantic import ConfigDict, Field, PrivateAttr, SkipValidation, field_validator
|
|
9
|
+
from pydantic import AfterValidator, ConfigDict, Field, PrivateAttr, SkipValidation, field_validator
|
|
9
10
|
from rigging.message import inject_system_content
|
|
10
11
|
from ulid import ULID # can't access via rg
|
|
11
12
|
|
|
@@ -71,14 +72,18 @@ class Agent(Model):
|
|
|
71
72
|
|
|
72
73
|
name: str
|
|
73
74
|
"""The name of the agent."""
|
|
74
|
-
description: str = ""
|
|
75
|
+
description: t.Annotated[str, AfterValidator(dedent)] = ""
|
|
75
76
|
"""A brief description of the agent's purpose."""
|
|
76
77
|
tags: list[str] = Config(default_factory=lambda: ["agent"])
|
|
77
78
|
"""A list of tags associated with the agent."""
|
|
79
|
+
label: str | None = Config(default=None)
|
|
80
|
+
"""Specific label for tracing, otherwise derived from the name."""
|
|
78
81
|
|
|
79
82
|
model: str | None = Config(default=None)
|
|
80
83
|
"""Inference model (rigging generator identifier)."""
|
|
81
|
-
instructions: str | None = Config(
|
|
84
|
+
instructions: t.Annotated[str | None, AfterValidator(lambda x: dedent(x) if x else x)] = Config(
|
|
85
|
+
default=None
|
|
86
|
+
)
|
|
82
87
|
"""The agent's core instructions."""
|
|
83
88
|
max_steps: int = Config(default=10)
|
|
84
89
|
"""The maximum number of steps (generation + tool calls)."""
|
|
@@ -90,15 +95,15 @@ class Agent(Model):
|
|
|
90
95
|
tool_mode: ToolMode = Config(default="auto", repr=False)
|
|
91
96
|
"""The tool calling mode to use."""
|
|
92
97
|
|
|
93
|
-
hooks: list[Hook] =
|
|
98
|
+
hooks: list[Hook] = Field(default_factory=list, exclude=True, repr=False)
|
|
94
99
|
"""Hooks to run at various points in the agent's lifecycle."""
|
|
95
|
-
stop_conditions: list[StopCondition] =
|
|
100
|
+
stop_conditions: list[StopCondition] = Field(default_factory=list)
|
|
96
101
|
"""The logical condition for successfully stopping a run."""
|
|
97
102
|
thread: Thread = Field(default_factory=Thread, exclude=True, repr=False)
|
|
98
103
|
"""Stateful thread for this agent, for when otherwise not specified during execution."""
|
|
99
|
-
scorers: ScorersLike[AgentResult] =
|
|
104
|
+
scorers: ScorersLike[AgentResult] = Field(default_factory=list)
|
|
100
105
|
"""Scorers to evaluate the agent output."""
|
|
101
|
-
assert_scores: list[str] | t.Literal[True] =
|
|
106
|
+
assert_scores: list[str] | t.Literal[True] = Field(default_factory=list)
|
|
102
107
|
"""Scores to ensure are truthy, otherwise the agent task is marked as failed."""
|
|
103
108
|
|
|
104
109
|
_generator: rg.Generator | None = PrivateAttr(None, init=False)
|
|
@@ -716,14 +721,25 @@ class Agent(Model):
|
|
|
716
721
|
)
|
|
717
722
|
trace_params.update(
|
|
718
723
|
{
|
|
724
|
+
"name": self.name,
|
|
719
725
|
"model": self.model,
|
|
720
726
|
"max_steps": self.max_steps,
|
|
721
727
|
"tool_mode": self.tool_mode,
|
|
728
|
+
"tool_count": len(self.all_tools),
|
|
729
|
+
"instructions_length": len(self.instructions or ""),
|
|
730
|
+
"stop_condition_count": len(self.stop_conditions),
|
|
731
|
+
"message_count": len(messages),
|
|
722
732
|
}
|
|
723
733
|
)
|
|
724
734
|
|
|
725
735
|
last_event: AgentEvent | None = None
|
|
726
|
-
with task_and_run(
|
|
736
|
+
with task_and_run(
|
|
737
|
+
name=self.name,
|
|
738
|
+
tags=self.tags,
|
|
739
|
+
label=self.label,
|
|
740
|
+
inputs=trace_inputs,
|
|
741
|
+
params=trace_params,
|
|
742
|
+
):
|
|
727
743
|
try:
|
|
728
744
|
async with aclosing(self._stream(thread, messages, hooks, commit=commit)) as stream:
|
|
729
745
|
async for event in stream:
|
|
@@ -3,6 +3,7 @@ from dreadnode.agent.hooks.base import (
|
|
|
3
3
|
Hook,
|
|
4
4
|
retry_with_feedback,
|
|
5
5
|
)
|
|
6
|
+
from dreadnode.agent.hooks.metrics import tool_metrics
|
|
6
7
|
from dreadnode.agent.hooks.summarize import summarize_when_long
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
@@ -11,4 +12,5 @@ __all__ = [
|
|
|
11
12
|
"backoff_on_ratelimit",
|
|
12
13
|
"retry_with_feedback",
|
|
13
14
|
"summarize_when_long",
|
|
15
|
+
"tool_metrics",
|
|
14
16
|
]
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
from dreadnode.agent.events import AgentEvent, ToolEnd, ToolStart
|
|
4
|
+
from dreadnode.agent.hooks import Hook
|
|
5
|
+
from dreadnode.meta import Config, component
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def tool_metrics(*, detailed: bool = False) -> Hook:
|
|
12
|
+
"""
|
|
13
|
+
Creates an agent hook to log metrics about tool usage, execution time, and success rates.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
detailed: If True, logs metrics for each specific tool in addition to general stats.
|
|
17
|
+
If False, only logs aggregate statistics across all tools.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
An async hook function that can be registered with an agent.
|
|
21
|
+
"""
|
|
22
|
+
_start_times: dict[str, datetime] = {}
|
|
23
|
+
|
|
24
|
+
@component
|
|
25
|
+
async def tool_metrics(
|
|
26
|
+
event: AgentEvent,
|
|
27
|
+
*,
|
|
28
|
+
detailed: bool = Config(
|
|
29
|
+
default=detailed,
|
|
30
|
+
help="If True, logs metrics for each specific tool in addition to general stats.",
|
|
31
|
+
),
|
|
32
|
+
) -> None:
|
|
33
|
+
"""The actual hook implementation that processes agent events."""
|
|
34
|
+
from dreadnode import log_metric
|
|
35
|
+
|
|
36
|
+
if isinstance(event, ToolStart):
|
|
37
|
+
log_metric("tool/total_count", 1, step=event.step, mode="count")
|
|
38
|
+
_start_times[event.tool_call.id] = event.timestamp
|
|
39
|
+
|
|
40
|
+
if detailed:
|
|
41
|
+
tool_name = event.tool_call.name
|
|
42
|
+
log_metric(f"tool/count.{tool_name}", 1, step=event.step, mode="count")
|
|
43
|
+
|
|
44
|
+
elif isinstance(event, ToolEnd):
|
|
45
|
+
tool_name = event.tool_call.name
|
|
46
|
+
start_time = _start_times.pop(event.tool_call.id, event.timestamp)
|
|
47
|
+
duration_seconds = (event.timestamp - start_time).total_seconds()
|
|
48
|
+
errored = "error" in event.message.metadata
|
|
49
|
+
|
|
50
|
+
log_metric("tool/total_time", duration_seconds, step=event.step, mode="sum")
|
|
51
|
+
log_metric("tool/success_rate", 0 if errored else 1, step=event.step, mode="avg")
|
|
52
|
+
|
|
53
|
+
if errored:
|
|
54
|
+
log_metric("tool/failed_count", 1, step=event.step, mode="count")
|
|
55
|
+
|
|
56
|
+
if detailed:
|
|
57
|
+
log_metric(
|
|
58
|
+
f"tool/time.{tool_name}",
|
|
59
|
+
duration_seconds,
|
|
60
|
+
step=event.step,
|
|
61
|
+
mode="sum",
|
|
62
|
+
)
|
|
63
|
+
log_metric(
|
|
64
|
+
f"tool/avg_time.{tool_name}",
|
|
65
|
+
duration_seconds,
|
|
66
|
+
step=event.step,
|
|
67
|
+
mode="avg",
|
|
68
|
+
)
|
|
69
|
+
log_metric(
|
|
70
|
+
f"tool/success_rate.{tool_name}",
|
|
71
|
+
0 if errored else 1,
|
|
72
|
+
step=event.step,
|
|
73
|
+
mode="avg",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if errored:
|
|
77
|
+
log_metric(
|
|
78
|
+
f"tool/failed_count.{tool_name}",
|
|
79
|
+
1,
|
|
80
|
+
step=event.step,
|
|
81
|
+
mode="count",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return tool_metrics
|
|
@@ -16,7 +16,7 @@ from dreadnode.agent.tools.base import (
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
if t.TYPE_CHECKING:
|
|
19
|
-
from dreadnode.agent.tools import fs, planning, reporting, tasking
|
|
19
|
+
from dreadnode.agent.tools import execute, fs, memory, planning, reporting, tasking
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
22
|
"AnyTool",
|
|
@@ -28,7 +28,9 @@ __all__ = [
|
|
|
28
28
|
"ToolMode",
|
|
29
29
|
"Toolset",
|
|
30
30
|
"discover_tools_on_obj",
|
|
31
|
+
"execute",
|
|
31
32
|
"fs",
|
|
33
|
+
"memory",
|
|
32
34
|
"planning",
|
|
33
35
|
"reporting",
|
|
34
36
|
"tasking",
|
|
@@ -36,7 +38,7 @@ __all__ = [
|
|
|
36
38
|
"tool_method",
|
|
37
39
|
]
|
|
38
40
|
|
|
39
|
-
__lazy_submodules__: list[str] = ["fs", "planning", "reporting", "tasking"]
|
|
41
|
+
__lazy_submodules__: list[str] = ["fs", "planning", "reporting", "tasking", "execute", "memory"]
|
|
40
42
|
__lazy_components__: dict[str, str] = {}
|
|
41
43
|
|
|
42
44
|
|
|
@@ -4,7 +4,7 @@ from pydantic import ConfigDict
|
|
|
4
4
|
from rigging import tools
|
|
5
5
|
from rigging.tools.base import ToolMethod as RiggingToolMethod
|
|
6
6
|
|
|
7
|
-
from dreadnode.meta import Component,
|
|
7
|
+
from dreadnode.meta import Component, Model
|
|
8
8
|
|
|
9
9
|
Tool = tools.Tool
|
|
10
10
|
ToolMode = tools.ToolMode
|
|
@@ -103,18 +103,18 @@ def tool_method(
|
|
|
103
103
|
description: str | None = None,
|
|
104
104
|
catch: bool | t.Iterable[type[Exception]] | None = None,
|
|
105
105
|
truncate: int | None = None,
|
|
106
|
-
) -> t.Callable[[t.Callable[P, R]], RiggingToolMethod[P, R]]: ...
|
|
106
|
+
) -> t.Callable[[t.Callable[t.Concatenate[t.Any, P], R]], RiggingToolMethod[P, R]]: ...
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
@t.overload
|
|
110
110
|
def tool_method(
|
|
111
|
-
func: t.Callable[P, R],
|
|
111
|
+
func: t.Callable[t.Concatenate[t.Any, P], R],
|
|
112
112
|
/,
|
|
113
113
|
) -> RiggingToolMethod[P, R]: ...
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def tool_method(
|
|
117
|
-
func: t.Callable[P, R] | None = None,
|
|
117
|
+
func: t.Callable[t.Concatenate[t.Any, P], R] | None = None,
|
|
118
118
|
/,
|
|
119
119
|
*,
|
|
120
120
|
variants: list[str] | None = None,
|
|
@@ -122,7 +122,10 @@ def tool_method(
|
|
|
122
122
|
description: str | None = None,
|
|
123
123
|
catch: bool | t.Iterable[type[Exception]] | None = None,
|
|
124
124
|
truncate: int | None = None,
|
|
125
|
-
) ->
|
|
125
|
+
) -> (
|
|
126
|
+
t.Callable[[t.Callable[t.Concatenate[t.Any, P], R]], RiggingToolMethod[P, R]]
|
|
127
|
+
| RiggingToolMethod[P, R]
|
|
128
|
+
):
|
|
126
129
|
"""
|
|
127
130
|
Marks a method on a Toolset as a tool, adding it to specified variants.
|
|
128
131
|
|
|
@@ -143,7 +146,9 @@ def tool_method(
|
|
|
143
146
|
truncate: The maximum number of characters for the tool's output.
|
|
144
147
|
"""
|
|
145
148
|
|
|
146
|
-
def make_tool_method(
|
|
149
|
+
def make_tool_method(
|
|
150
|
+
func: t.Callable[t.Concatenate[t.Any, P], R],
|
|
151
|
+
) -> RiggingToolMethod[P, R]:
|
|
147
152
|
tool_method_descriptor: RiggingToolMethod[P, R] = tools.tool_method(
|
|
148
153
|
name=name,
|
|
149
154
|
description=description,
|
|
@@ -168,7 +173,7 @@ class Toolset(Model):
|
|
|
168
173
|
- A `get_tools` method for discovering methods decorated with `@dreadnode.tool_method`.
|
|
169
174
|
"""
|
|
170
175
|
|
|
171
|
-
variant: str =
|
|
176
|
+
variant: str | None = None
|
|
172
177
|
"""The variant for filtering tools available in this toolset."""
|
|
173
178
|
|
|
174
179
|
model_config = ConfigDict(arbitrary_types_allowed=True, use_attribute_docstrings=True)
|
|
@@ -190,7 +195,7 @@ class Toolset(Model):
|
|
|
190
195
|
continue
|
|
191
196
|
|
|
192
197
|
variants = getattr(class_member, TOOL_VARIANTS_ATTR, [])
|
|
193
|
-
if variant in variants:
|
|
198
|
+
if not variant or not variants or variant in variants:
|
|
194
199
|
bound_tool = t.cast("AnyTool", getattr(self, name))
|
|
195
200
|
tools.append(bound_tool)
|
|
196
201
|
seen_names.add(name)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import contextlib
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
from dreadnode.agent.tools.base import tool
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@tool(catch=True)
|
|
11
|
+
async def command(
|
|
12
|
+
cmd: list[str],
|
|
13
|
+
*,
|
|
14
|
+
timeout: int = 120,
|
|
15
|
+
cwd: str | None = None,
|
|
16
|
+
env: dict[str, str] | None = None,
|
|
17
|
+
) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Execute a shell command.
|
|
20
|
+
|
|
21
|
+
Use this tool to run system utilities and command-line programs (e.g., `ls`, `cat`, `grep`). \
|
|
22
|
+
It is designed for straightforward, single-shot operations and returns the combined output and error streams.
|
|
23
|
+
|
|
24
|
+
## Best Practices
|
|
25
|
+
- Argument Format: The command and its arguments *must* be provided as a \
|
|
26
|
+
list of strings (e.g., `["ls", "-la", "/tmp"]`), not as a single string.
|
|
27
|
+
- No Shell Syntax: Does not use a shell. Features like pipes (`|`), \
|
|
28
|
+
redirection (`>`), and variable expansion (`$VAR`) are not supported.
|
|
29
|
+
- Error on Failure: The tool will raise a `RuntimeError` if the command returns a non-zero exit code.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
cmd: The command to execute, provided as a list of strings.
|
|
33
|
+
timeout: Maximum time in seconds to allow for command execution.
|
|
34
|
+
cwd: The working directory in which to execute the command.
|
|
35
|
+
env: Optional environment variables to set for the command.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
command_str = " ".join(cmd)
|
|
39
|
+
logger.debug(f"Executing '{command_str}'")
|
|
40
|
+
proc = await asyncio.create_subprocess_exec(
|
|
41
|
+
*cmd,
|
|
42
|
+
stdout=asyncio.subprocess.PIPE,
|
|
43
|
+
stderr=asyncio.subprocess.PIPE,
|
|
44
|
+
env=env,
|
|
45
|
+
cwd=cwd,
|
|
46
|
+
)
|
|
47
|
+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
|
48
|
+
output = stdout.decode() + stderr.decode()
|
|
49
|
+
except asyncio.TimeoutError as e:
|
|
50
|
+
logger.warning(f"Command '{command_str}' timed out after {timeout} seconds.")
|
|
51
|
+
with contextlib.suppress(OSError):
|
|
52
|
+
proc.kill()
|
|
53
|
+
raise TimeoutError(f"Command timed out after {timeout} seconds") from e
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.error(f"Error executing '{command_str}': {e}")
|
|
56
|
+
raise
|
|
57
|
+
|
|
58
|
+
if proc.returncode != 0:
|
|
59
|
+
logger.error(f"Command '{command_str}' failed with return code {proc.returncode}: {output}")
|
|
60
|
+
raise RuntimeError(f"Command failed ({proc.returncode}): {output}")
|
|
61
|
+
|
|
62
|
+
logger.debug(f"Command '{command_str}':\n{output}")
|
|
63
|
+
return output
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@tool(catch=True)
|
|
67
|
+
async def python(code: str, *, timeout: int = 120) -> str:
|
|
68
|
+
"""
|
|
69
|
+
Execute Python code.
|
|
70
|
+
|
|
71
|
+
This tool is ideal for tasks that require custom logic like loops and conditionals, \
|
|
72
|
+
or for parsing and transforming the output from other tools. Use it to implement a \
|
|
73
|
+
sequence of actions, perform file I/O, or create functionality not covered by other \
|
|
74
|
+
available tools.
|
|
75
|
+
|
|
76
|
+
## Best Practices
|
|
77
|
+
- Capture Output: Your script *must* print results to standard output (`print(...)`) to be captured.
|
|
78
|
+
- Self-Contained: Import all required standard libraries (e.g., `os`, `json`) within the script.
|
|
79
|
+
- Handle Errors: Write robust code. Unhandled exceptions in your script will cause the tool to fail.
|
|
80
|
+
- String-Based I/O: Ensure all printed output can be represented as a string. Use formats like JSON (`json.dumps`) for complex data.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
code: The Python code to execute as a string.
|
|
84
|
+
timeout: Maximum time in seconds to allow for code execution.
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
logger.debug(f"Executing python:\n{code}")
|
|
88
|
+
proc = await asyncio.create_subprocess_exec(
|
|
89
|
+
*[sys.executable, "-"],
|
|
90
|
+
stdin=asyncio.subprocess.PIPE,
|
|
91
|
+
stdout=asyncio.subprocess.PIPE,
|
|
92
|
+
stderr=asyncio.subprocess.PIPE,
|
|
93
|
+
)
|
|
94
|
+
stdout, stderr = await asyncio.wait_for(
|
|
95
|
+
proc.communicate(input=code.encode("utf-8")), timeout=timeout
|
|
96
|
+
)
|
|
97
|
+
output = stdout.decode(errors="ignore") + stderr.decode(errors="ignore")
|
|
98
|
+
except asyncio.TimeoutError as e:
|
|
99
|
+
with contextlib.suppress(ProcessLookupError):
|
|
100
|
+
proc.kill()
|
|
101
|
+
raise TimeoutError(f"Execution timed out after {timeout} seconds") from e
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.error(f"Error executing code in Python: {e}")
|
|
104
|
+
raise
|
|
105
|
+
|
|
106
|
+
if proc.returncode != 0:
|
|
107
|
+
logger.error(f"Execution failed with return code {proc.returncode}:\n{output}")
|
|
108
|
+
raise RuntimeError(f"Execution failed ({proc.returncode}):\n{output}")
|
|
109
|
+
|
|
110
|
+
logger.debug(f"Execution successful. Output:\n{output}")
|
|
111
|
+
return output
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
from pydantic import PrivateAttr
|
|
4
|
+
|
|
5
|
+
from dreadnode.agent.tools import Toolset, tool_method
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Memory(Toolset):
|
|
9
|
+
"""
|
|
10
|
+
Provides a stateful, in-memory key-value store for the toolset's lifetime.
|
|
11
|
+
|
|
12
|
+
This toolset allows the agent to save, retrieve, and manage data, enabling it to
|
|
13
|
+
remember information across multiple steps and tool calls.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
_memory: dict[str, str] = PrivateAttr(default_factory=dict)
|
|
17
|
+
|
|
18
|
+
@tool_method
|
|
19
|
+
def save_memory(
|
|
20
|
+
self,
|
|
21
|
+
key: t.Annotated[str, "The unique key to store the value under."],
|
|
22
|
+
value: t.Annotated[str, "The string value to store in memory."],
|
|
23
|
+
) -> str:
|
|
24
|
+
"""Saves a value to memory with the specified key, overwriting any existing value."""
|
|
25
|
+
self._memory[key] = value
|
|
26
|
+
return f"Value saved to memory key: '{key}'"
|
|
27
|
+
|
|
28
|
+
@tool_method(catch=True)
|
|
29
|
+
def retrieve_memory(self, key: t.Annotated[str, "The key of the value to retrieve."]) -> str:
|
|
30
|
+
"""Retrieves a value from memory using the specified key."""
|
|
31
|
+
return self._memory[key]
|
|
32
|
+
|
|
33
|
+
@tool_method
|
|
34
|
+
def list_memory_keys(self) -> list[str]:
|
|
35
|
+
"""Lists all keys currently stored in memory."""
|
|
36
|
+
return list(self._memory.keys())
|
|
37
|
+
|
|
38
|
+
@tool_method(catch=True)
|
|
39
|
+
def clear_memory(
|
|
40
|
+
self,
|
|
41
|
+
key: t.Annotated[
|
|
42
|
+
str | None, "The specific key to clear. If not provided, all memory is cleared."
|
|
43
|
+
] = None,
|
|
44
|
+
) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Clears a specific key from memory, or clears all memory if no key is provided.
|
|
47
|
+
"""
|
|
48
|
+
if key is None:
|
|
49
|
+
self._memory.clear()
|
|
50
|
+
return "All memory has been cleared."
|
|
51
|
+
|
|
52
|
+
if key not in self._memory:
|
|
53
|
+
return f"Key '{key}' not found in memory. Nothing to clear."
|
|
54
|
+
|
|
55
|
+
del self._memory[key]
|
|
56
|
+
return f"Cleared memory for key: '{key}'"
|
|
@@ -22,7 +22,7 @@ class TodoItem(BaseModel):
|
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
@tool
|
|
25
|
+
@tool(catch=True)
|
|
26
26
|
def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of todo items."]) -> str:
|
|
27
27
|
"""
|
|
28
28
|
Use this tool to create and manage a structured task list for your current session.
|
|
@@ -30,7 +30,6 @@ def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of to
|
|
|
30
30
|
It also helps the user understand the progress of the task and overall progress of their requests.
|
|
31
31
|
|
|
32
32
|
## When to Use This Tool
|
|
33
|
-
Use this tool proactively in these scenarios:
|
|
34
33
|
|
|
35
34
|
1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions
|
|
36
35
|
2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations
|
|
@@ -42,7 +41,6 @@ def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of to
|
|
|
42
41
|
|
|
43
42
|
## When NOT to Use This Tool
|
|
44
43
|
|
|
45
|
-
Skip using this tool when:
|
|
46
44
|
1. There is only a single, straightforward task
|
|
47
45
|
2. The task is trivial and tracking it provides no organizational benefit
|
|
48
46
|
3. The task can be completed in less than 3 trivial steps
|
|
@@ -111,3 +109,28 @@ def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of to
|
|
|
111
109
|
f"{status_counts['in_progress']} in progress, "
|
|
112
110
|
f"{status_counts['pending']} pending."
|
|
113
111
|
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@tool
|
|
115
|
+
def think(thought: str) -> None:
|
|
116
|
+
"""
|
|
117
|
+
Records a thought, reflection, or plan to document your reasoning process.
|
|
118
|
+
|
|
119
|
+
This tool acts as your internal monologue, allowing you to articulate your strategy. Use it to:
|
|
120
|
+
- Break down a complex problem into smaller steps.
|
|
121
|
+
- Formulate a multi-step plan before you act.
|
|
122
|
+
- Interpret the results of another tool's output.
|
|
123
|
+
- Document a change in strategy (self-correction).
|
|
124
|
+
|
|
125
|
+
A clear chain of thought is essential for explaining your actions.
|
|
126
|
+
|
|
127
|
+
## Best Practices
|
|
128
|
+
- Do Not Substitute for Action**: After thinking, you must call the appropriate \
|
|
129
|
+
tool to execute your plan. This tool performs no action on its own.
|
|
130
|
+
- Do Not Repeat Information**: Never use this to repeat the output of other tools. \
|
|
131
|
+
Use it to state your *conclusion* or *next step* based on that output.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
thought: A clear, concise statement of your thought process or plan.
|
|
135
|
+
"""
|
|
136
|
+
logger.info(f"Agent thought: {thought}")
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from loguru import logger
|
|
2
|
+
|
|
3
|
+
from dreadnode.agent.tools.base import tool
|
|
4
|
+
from dreadnode.data_types import Markdown
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@tool(catch=True)
|
|
8
|
+
async def highlight_for_review(title: str, interest_level: str, justification: str) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Flag a finding for human review. Use this to surface leads that warrant further investigation.
|
|
11
|
+
|
|
12
|
+
This tool is essential for escalating findings that appear anomalous, valuable, or potentially
|
|
13
|
+
vulnerable. It creates a "lead" for a human operator to pick up.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
title: A brief, descriptive summary of the finding.
|
|
17
|
+
interest_level: The priority of the finding. Must be one of:
|
|
18
|
+
- "high": Urgent. Potential for immediate impact or exploitation. (exposed credentials, pre-authentication vulnerability).
|
|
19
|
+
- "medium": Noteworthy. Suggests a potential weakness or area for deeper investigation. (debug endpoint, verbose error messages, PII exposure).
|
|
20
|
+
- "low": Informational. Provides useful context but is not an immediate risk. (software version disclosure, interesting file path).
|
|
21
|
+
justification: A technical, markdown-formatted explanation. Detail *why* the finding is interesting, what its potential impact is, and suggest next steps for a human analyst.
|
|
22
|
+
"""
|
|
23
|
+
from dreadnode import log_metric, log_output, tag
|
|
24
|
+
|
|
25
|
+
interest_level = interest_level.lower().strip()
|
|
26
|
+
if interest_level not in ["high", "medium", "low"]:
|
|
27
|
+
interest_level = "medium" # Default to medium if invalid
|
|
28
|
+
|
|
29
|
+
logger.success(f"Area of Interest - '{title}' [{interest_level}]:\n{justification}\n---")
|
|
30
|
+
|
|
31
|
+
tag(f"interest/{interest_level}")
|
|
32
|
+
log_output("markdown", Markdown(f"# {title} ({interest_level})\n\n{justification}"))
|
|
33
|
+
log_metric("count", 1, mode="count")
|
|
34
|
+
|
|
35
|
+
return "Highlighted."
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from loguru import logger
|
|
2
|
+
|
|
3
|
+
from dreadnode.agent.reactions import Fail, Finish
|
|
4
|
+
from dreadnode.agent.tools.base import tool
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@tool
|
|
8
|
+
async def finish_task(success: bool, summary: str) -> None: # noqa: ARG001, FBT001
|
|
9
|
+
"""
|
|
10
|
+
Concludes the task by reporting a final status and a comprehensive summary.
|
|
11
|
+
|
|
12
|
+
This is the **final tool** to call when your planned sequence of actions is complete, \
|
|
13
|
+
regardless of whether the outcome was successful. Use it when you have no more \
|
|
14
|
+
steps to take and are ready to present a final report.
|
|
15
|
+
|
|
16
|
+
## Best Practices
|
|
17
|
+
- Honest Status: The `success` flag must accurately reflect the final outcome. \
|
|
18
|
+
If any part of the task failed or objectives were not met, it must be `False`.
|
|
19
|
+
- Comprehensive Summary: The `summary` is your final report. It must be a complete, \
|
|
20
|
+
markdown-formatted document detailing all actions taken, tools used, and the results.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
success: True if the task's objectives were fully met, False otherwise.
|
|
24
|
+
summary: A complete markdown-formatted report of all actions and outcomes.
|
|
25
|
+
"""
|
|
26
|
+
from dreadnode import log_metric
|
|
27
|
+
|
|
28
|
+
log_func = logger.success if success else logger.warning
|
|
29
|
+
log_func(f"Agent finished the task (success={success})")
|
|
30
|
+
log_metric("task_success", success)
|
|
31
|
+
|
|
32
|
+
raise Finish if success else Fail("Agent marked the task as failed.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@tool
|
|
36
|
+
async def give_up_on_task(reason: str) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Aborts the task when you are irrecoverably stuck and cannot make progress.
|
|
39
|
+
|
|
40
|
+
This tool is a last resort and should only be used when you have exhausted all \
|
|
41
|
+
possible strategies and alternative approaches. It signals that you were unable \
|
|
42
|
+
to complete your assigned process.
|
|
43
|
+
|
|
44
|
+
## Best Practices
|
|
45
|
+
- Do Not Use for a Failed Outcome**: If the `finish_task` tool is available, use it to report failures. \
|
|
46
|
+
This tool is strictly for when you cannot *finish* your work.
|
|
47
|
+
- Provide a Clear Justification**: The `reason` must clearly explain why you are stuck. \
|
|
48
|
+
Detail the final obstacle you could not overcome and the approaches you already tried.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
reason: A concise explanation of why you are unable to continue the task.
|
|
52
|
+
"""
|
|
53
|
+
from dreadnode import log_metric
|
|
54
|
+
|
|
55
|
+
logger.warning(f"Agent gave up on the task: {reason}")
|
|
56
|
+
log_metric("task_give_up", 1)
|
|
57
|
+
|
|
58
|
+
raise Fail("Agent gave up on the task.")
|
|
@@ -139,8 +139,6 @@ async def run( # noqa: PLR0912, PLR0915
|
|
|
139
139
|
agent_cli.__annotations__["config"] = config_annotation
|
|
140
140
|
|
|
141
141
|
help_text = f"Run the '{agent_name}' agent."
|
|
142
|
-
if agent_blueprint.__doc__:
|
|
143
|
-
help_text += "\n\n" + agent_blueprint.__doc__
|
|
144
142
|
if agent_blueprint.description:
|
|
145
143
|
help_text += "\n\n" + agent_blueprint.description
|
|
146
144
|
|
|
@@ -128,8 +128,6 @@ async def run( # noqa: PLR0912, PLR0915
|
|
|
128
128
|
attack_cli.__annotations__["config"] = config_annotation
|
|
129
129
|
|
|
130
130
|
help_text = f"Run the '{attack_name}' attack."
|
|
131
|
-
if attack_blueprint.__doc__:
|
|
132
|
-
help_text += "\n\n" + attack_blueprint.__doc__
|
|
133
131
|
if attack_blueprint.description:
|
|
134
132
|
help_text += "\n\n" + attack_blueprint.description
|
|
135
133
|
|
|
@@ -129,8 +129,6 @@ async def run( # noqa: PLR0912, PLR0915
|
|
|
129
129
|
eval_cli.__annotations__["config"] = config_annotation
|
|
130
130
|
|
|
131
131
|
help_text = f"Run the '{eval_name}' eval."
|
|
132
|
-
if eval_blueprint.__doc__:
|
|
133
|
-
help_text += "\n\n" + eval_blueprint.__doc__
|
|
134
132
|
if eval_blueprint.description:
|
|
135
133
|
help_text += "\n\n" + eval_blueprint.description
|
|
136
134
|
|
|
@@ -130,8 +130,6 @@ async def run( # noqa: PLR0912, PLR0915
|
|
|
130
130
|
study_cli.__annotations__["config"] = config_annotation
|
|
131
131
|
|
|
132
132
|
help_text = f"Run the '{study_name}' study."
|
|
133
|
-
if study_blueprint.__doc__:
|
|
134
|
-
help_text += "\n\n" + study_blueprint.__doc__
|
|
135
133
|
if study_blueprint.description:
|
|
136
134
|
help_text += "\n\n" + study_blueprint.description
|
|
137
135
|
|
|
@@ -4,6 +4,7 @@ import itertools
|
|
|
4
4
|
import typing as t
|
|
5
5
|
from inspect import isawaitable
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from textwrap import dedent
|
|
7
8
|
|
|
8
9
|
import cyclopts
|
|
9
10
|
import rich
|
|
@@ -131,7 +132,7 @@ async def run( # noqa: PLR0912, PLR0915
|
|
|
131
132
|
|
|
132
133
|
help_text = f"Run the '{task_name}' task."
|
|
133
134
|
if task_blueprint.__doc__:
|
|
134
|
-
help_text += "\n\n" + task_blueprint.__doc__
|
|
135
|
+
help_text += "\n\n" + dedent(task_blueprint.__doc__)
|
|
135
136
|
|
|
136
137
|
task_app = cyclopts.App(
|
|
137
138
|
name=task_name,
|