agentharness-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentharness_sdk-0.1.0/.gitignore +38 -0
- agentharness_sdk-0.1.0/PKG-INFO +53 -0
- agentharness_sdk-0.1.0/README.md +35 -0
- agentharness_sdk-0.1.0/pyproject.toml +28 -0
- agentharness_sdk-0.1.0/src/agentharness/__init__.py +61 -0
- agentharness_sdk-0.1.0/src/agentharness/agent.py +78 -0
- agentharness_sdk-0.1.0/src/agentharness/py.typed +0 -0
- agentharness_sdk-0.1.0/src/agentharness/testing.py +19 -0
- agentharness_sdk-0.1.0/src/agentharness/tools.py +105 -0
- agentharness_sdk-0.1.0/tests/test_agent.py +71 -0
- agentharness_sdk-0.1.0/tests/test_tools.py +97 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
*.so
|
|
9
|
+
|
|
10
|
+
# Environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
.env
|
|
14
|
+
|
|
15
|
+
# uv
|
|
16
|
+
.uv/
|
|
17
|
+
|
|
18
|
+
# Docs build output
|
|
19
|
+
site/
|
|
20
|
+
|
|
21
|
+
# Tooling caches
|
|
22
|
+
.pytest_cache/
|
|
23
|
+
.mypy_cache/
|
|
24
|
+
.ruff_cache/
|
|
25
|
+
.hypothesis/
|
|
26
|
+
htmlcov/
|
|
27
|
+
.coverage
|
|
28
|
+
.coverage.*
|
|
29
|
+
|
|
30
|
+
# Codex run artifacts (local only)
|
|
31
|
+
codex-core.log
|
|
32
|
+
codex-core-result.txt
|
|
33
|
+
|
|
34
|
+
# Editors / OS
|
|
35
|
+
.idea/
|
|
36
|
+
.vscode/
|
|
37
|
+
.DS_Store
|
|
38
|
+
Thumbs.db
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentharness-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Ergonomic agent harness on top of agentharness-core: Agent, @tool, and testable, replayable runs. Imports as `agentharness`.
|
|
5
|
+
Project-URL: Homepage, https://github.com/aafre/agentharness
|
|
6
|
+
Project-URL: Source, https://github.com/aafre/agentharness
|
|
7
|
+
Author: AgentHarness contributors
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
Keywords: agent,ai,deterministic,llm,replay,state-machine,tools
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Typing :: Typed
|
|
15
|
+
Requires-Python: >=3.12
|
|
16
|
+
Requires-Dist: agentharness-core>=0.1.0
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# agentharness
|
|
20
|
+
|
|
21
|
+
The ergonomic, batteries-included layer over [`agentharness-core`](../agentharness-core).
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from agentharness import Agent, tool
|
|
25
|
+
|
|
26
|
+
@tool
|
|
27
|
+
def add(a: int, b: int) -> str:
|
|
28
|
+
"""Add two numbers."""
|
|
29
|
+
return str(a + b)
|
|
30
|
+
|
|
31
|
+
agent = Agent(model=my_model, tools=[add], system="You are helpful.")
|
|
32
|
+
run = agent.run("What is 2 + 3?")
|
|
33
|
+
print(run.result) # "2 + 3 = 5."
|
|
34
|
+
run.trace.save("run.jsonl") # the whole run is recorded and replayable
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
- **`@tool`** turns a typed Python function into a tool — the JSON schema is generated from
|
|
38
|
+
type hints, no hand-written schemas.
|
|
39
|
+
- **`Agent`** wraps a model + tools + optional system prompt. `run()` executes to completion;
|
|
40
|
+
`stream()` yields live events; `arun()`/`astream()` are the async variants.
|
|
41
|
+
- Built entirely on the deterministic core, so every agent you build is inspectable and
|
|
42
|
+
replayable for free (`from agentharness_core import replay`).
|
|
43
|
+
|
|
44
|
+
Test agents like ordinary code, with no network:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from agentharness.testing import FakeModel, assert_used_tool, assert_answer
|
|
48
|
+
|
|
49
|
+
agent = Agent(model=FakeModel([...]), tools=[add])
|
|
50
|
+
run = agent.run("What is 2 + 3?")
|
|
51
|
+
assert_used_tool(run, "add")
|
|
52
|
+
assert_answer(run, "2 + 3 = 5.")
|
|
53
|
+
```
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# agentharness
|
|
2
|
+
|
|
3
|
+
The ergonomic, batteries-included layer over [`agentharness-core`](../agentharness-core).
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
from agentharness import Agent, tool
|
|
7
|
+
|
|
8
|
+
@tool
|
|
9
|
+
def add(a: int, b: int) -> str:
|
|
10
|
+
"""Add two numbers."""
|
|
11
|
+
return str(a + b)
|
|
12
|
+
|
|
13
|
+
agent = Agent(model=my_model, tools=[add], system="You are helpful.")
|
|
14
|
+
run = agent.run("What is 2 + 3?")
|
|
15
|
+
print(run.result) # "2 + 3 = 5."
|
|
16
|
+
run.trace.save("run.jsonl") # the whole run is recorded and replayable
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
- **`@tool`** turns a typed Python function into a tool — the JSON schema is generated from
|
|
20
|
+
type hints, no hand-written schemas.
|
|
21
|
+
- **`Agent`** wraps a model + tools + optional system prompt. `run()` executes to completion;
|
|
22
|
+
`stream()` yields live events; `arun()`/`astream()` are the async variants.
|
|
23
|
+
- Built entirely on the deterministic core, so every agent you build is inspectable and
|
|
24
|
+
replayable for free (`from agentharness_core import replay`).
|
|
25
|
+
|
|
26
|
+
Test agents like ordinary code, with no network:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from agentharness.testing import FakeModel, assert_used_tool, assert_answer
|
|
30
|
+
|
|
31
|
+
agent = Agent(model=FakeModel([...]), tools=[add])
|
|
32
|
+
run = agent.run("What is 2 + 3?")
|
|
33
|
+
assert_used_tool(run, "add")
|
|
34
|
+
assert_answer(run, "2 + 3 = 5.")
|
|
35
|
+
```
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agentharness-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Ergonomic agent harness on top of agentharness-core: Agent, @tool, and testable, replayable runs. Imports as `agentharness`."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
authors = [{ name = "AgentHarness contributors" }]
|
|
13
|
+
keywords = ["agent", "llm", "ai", "tools", "state-machine", "deterministic", "replay"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Typing :: Typed",
|
|
20
|
+
]
|
|
21
|
+
dependencies = ["agentharness-core>=0.1.0"]
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/aafre/agentharness"
|
|
25
|
+
Source = "https://github.com/aafre/agentharness"
|
|
26
|
+
|
|
27
|
+
[tool.hatch.build.targets.wheel]
|
|
28
|
+
packages = ["src/agentharness"]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""AgentHarness: the ergonomic, batteries-included layer over ``agentharness-core``.
|
|
2
|
+
|
|
3
|
+
from agentharness import Agent, tool
|
|
4
|
+
|
|
5
|
+
@tool
|
|
6
|
+
def add(a: int, b: int) -> str:
|
|
7
|
+
'''Add two numbers.'''
|
|
8
|
+
return str(a + b)
|
|
9
|
+
|
|
10
|
+
agent = Agent(model=my_model, tools=[add])
|
|
11
|
+
print(agent.run("What is 2 + 3?").result)
|
|
12
|
+
|
|
13
|
+
Everything here is built on the deterministic, replayable core, so any agent you build is
|
|
14
|
+
inspectable and replayable for free.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
# Re-export the core essentials so most users need only ``import agentharness``.
|
|
20
|
+
from agentharness_core import (
|
|
21
|
+
AsyncRun,
|
|
22
|
+
DivergenceError,
|
|
23
|
+
Message,
|
|
24
|
+
Model,
|
|
25
|
+
Run,
|
|
26
|
+
State,
|
|
27
|
+
Tool,
|
|
28
|
+
ToolCall,
|
|
29
|
+
ToolResult,
|
|
30
|
+
Trace,
|
|
31
|
+
arun,
|
|
32
|
+
replay,
|
|
33
|
+
run,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from .agent import Agent
|
|
37
|
+
from .tools import FunctionTool, tool
|
|
38
|
+
|
|
39
|
+
__version__ = "0.1.0"
|
|
40
|
+
|
|
41
|
+
__all__ = [ # noqa: RUF022 - grouped by concept
|
|
42
|
+
# ergonomic layer
|
|
43
|
+
"Agent",
|
|
44
|
+
"tool",
|
|
45
|
+
"FunctionTool",
|
|
46
|
+
# core re-exports
|
|
47
|
+
"run",
|
|
48
|
+
"arun",
|
|
49
|
+
"replay",
|
|
50
|
+
"Run",
|
|
51
|
+
"AsyncRun",
|
|
52
|
+
"State",
|
|
53
|
+
"Message",
|
|
54
|
+
"ToolCall",
|
|
55
|
+
"ToolResult",
|
|
56
|
+
"Trace",
|
|
57
|
+
"Model",
|
|
58
|
+
"Tool",
|
|
59
|
+
"DivergenceError",
|
|
60
|
+
"__version__",
|
|
61
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""The ``Agent`` ergonomic entry point over the core state machine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
|
|
7
|
+
from agentharness_core import (
|
|
8
|
+
AsyncRun,
|
|
9
|
+
Message,
|
|
10
|
+
Model,
|
|
11
|
+
Run,
|
|
12
|
+
State,
|
|
13
|
+
Tool,
|
|
14
|
+
arun,
|
|
15
|
+
run,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
DEFAULT_MAX_STEPS = 64
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Agent:
|
|
22
|
+
"""A model plus its tools and an optional system prompt.
|
|
23
|
+
|
|
24
|
+
``run`` is eager (executes to completion and returns the finished ``Run``, so
|
|
25
|
+
``.result``/``.state``/``.trace`` are immediately available). ``stream`` is lazy and
|
|
26
|
+
yields the live event stream. Every run is recorded and therefore replayable.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
*,
|
|
32
|
+
model: Model,
|
|
33
|
+
tools: Sequence[Tool] = (),
|
|
34
|
+
system: str | None = None,
|
|
35
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
|
36
|
+
) -> None:
|
|
37
|
+
self.model = model
|
|
38
|
+
self.tools = list(tools)
|
|
39
|
+
self.system = system
|
|
40
|
+
self.max_steps = max_steps
|
|
41
|
+
|
|
42
|
+
def _initial_state(self, prompt: str) -> State[None]:
|
|
43
|
+
messages: list[Message] = []
|
|
44
|
+
if self.system is not None:
|
|
45
|
+
messages.append(Message("system", self.system))
|
|
46
|
+
messages.append(Message("user", prompt))
|
|
47
|
+
return State.start(messages)
|
|
48
|
+
|
|
49
|
+
def stream(self, prompt: str) -> Run:
|
|
50
|
+
"""Return a lazy ``Run`` whose iteration yields live events."""
|
|
51
|
+
return run(
|
|
52
|
+
self._initial_state(prompt),
|
|
53
|
+
model=self.model,
|
|
54
|
+
tools=self.tools,
|
|
55
|
+
max_steps=self.max_steps,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def run(self, prompt: str) -> Run:
|
|
59
|
+
"""Execute to completion and return the finished ``Run``."""
|
|
60
|
+
r = self.stream(prompt)
|
|
61
|
+
r.run_to_completion()
|
|
62
|
+
return r
|
|
63
|
+
|
|
64
|
+
def astream(self, prompt: str) -> AsyncRun:
|
|
65
|
+
"""Async-iterable variant of :meth:`stream`."""
|
|
66
|
+
return arun(
|
|
67
|
+
self._initial_state(prompt),
|
|
68
|
+
model=self.model,
|
|
69
|
+
tools=self.tools,
|
|
70
|
+
max_steps=self.max_steps,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
async def arun(self, prompt: str) -> AsyncRun:
|
|
74
|
+
"""Async variant of :meth:`run`: drain the run, then return it."""
|
|
75
|
+
r = self.astream(prompt)
|
|
76
|
+
async for _ in r:
|
|
77
|
+
pass
|
|
78
|
+
return r
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Test helpers for agents: deterministic models and readable assertions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from agentharness_core import Run
|
|
6
|
+
from agentharness_core.testing import FakeModel, ScriptedModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["FakeModel", "ScriptedModel", "assert_answer", "assert_used_tool"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def assert_used_tool(run: Run, name: str) -> None:
|
|
12
|
+
"""Assert the agent invoked the named tool at least once during ``run``."""
|
|
13
|
+
used = [m.name for m in run.state.messages if m.role == "tool"]
|
|
14
|
+
assert name in used, f"expected tool {name!r} to be used; tools used: {used}"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def assert_answer(run: Run, expected: str) -> None:
|
|
18
|
+
"""Assert the agent's final answer equals ``expected``."""
|
|
19
|
+
assert run.result == expected, f"expected final answer {expected!r}, got {run.result!r}"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""The ``@tool`` decorator: a typed Python function becomes a core-compatible Tool.
|
|
2
|
+
|
|
3
|
+
The JSON schema is derived from type hints and the ``required`` set from which parameters
|
|
4
|
+
lack defaults, so a tool is declared once, in plain Python, with no hand-written schema.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import inspect
|
|
10
|
+
import types as _pytypes
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from typing import Any, Union, get_args, get_origin, get_type_hints, overload
|
|
13
|
+
|
|
14
|
+
from agentharness_core import ToolResult
|
|
15
|
+
|
|
16
|
+
_JSON_SCALARS: dict[type, str] = {
|
|
17
|
+
int: "integer",
|
|
18
|
+
float: "number",
|
|
19
|
+
str: "string",
|
|
20
|
+
bool: "boolean",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _type_to_schema(tp: Any) -> dict[str, Any]:
|
|
25
|
+
if tp in _JSON_SCALARS:
|
|
26
|
+
return {"type": _JSON_SCALARS[tp]}
|
|
27
|
+
origin = get_origin(tp)
|
|
28
|
+
if origin is list:
|
|
29
|
+
args = get_args(tp)
|
|
30
|
+
return {"type": "array", "items": _type_to_schema(args[0]) if args else {}}
|
|
31
|
+
if origin is dict:
|
|
32
|
+
return {"type": "object"}
|
|
33
|
+
if origin is Union or origin is _pytypes.UnionType:
|
|
34
|
+
non_none = [a for a in get_args(tp) if a is not type(None)]
|
|
35
|
+
if len(non_none) == 1:
|
|
36
|
+
return _type_to_schema(non_none[0])
|
|
37
|
+
return {} # unknown / unconstrained
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _build_schema(func: Callable[..., Any]) -> dict[str, Any]:
|
|
41
|
+
signature = inspect.signature(func)
|
|
42
|
+
hints = get_type_hints(func)
|
|
43
|
+
properties: dict[str, Any] = {}
|
|
44
|
+
required: list[str] = []
|
|
45
|
+
for name, param in signature.parameters.items():
|
|
46
|
+
if name == "self" or param.kind in (
|
|
47
|
+
inspect.Parameter.VAR_POSITIONAL,
|
|
48
|
+
inspect.Parameter.VAR_KEYWORD,
|
|
49
|
+
):
|
|
50
|
+
continue
|
|
51
|
+
properties[name] = _type_to_schema(hints.get(name, str))
|
|
52
|
+
if param.default is inspect.Parameter.empty:
|
|
53
|
+
required.append(name)
|
|
54
|
+
return {"type": "object", "properties": properties, "required": required}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class FunctionTool:
|
|
58
|
+
"""Wraps a callable so it satisfies the core ``Tool`` protocol while staying callable."""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
func: Callable[..., Any],
|
|
63
|
+
*,
|
|
64
|
+
name: str | None = None,
|
|
65
|
+
description: str | None = None,
|
|
66
|
+
) -> None:
|
|
67
|
+
self._func = func
|
|
68
|
+
self.name = name or func.__name__
|
|
69
|
+
self.description = description or (inspect.getdoc(func) or "")
|
|
70
|
+
self.schema = _build_schema(func)
|
|
71
|
+
|
|
72
|
+
def call(self, arguments: dict[str, Any]) -> ToolResult:
|
|
73
|
+
result = self._func(**arguments)
|
|
74
|
+
if isinstance(result, ToolResult):
|
|
75
|
+
return result
|
|
76
|
+
return ToolResult(content=result if isinstance(result, str) else str(result))
|
|
77
|
+
|
|
78
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
79
|
+
return self._func(*args, **kwargs)
|
|
80
|
+
|
|
81
|
+
def __repr__(self) -> str:
|
|
82
|
+
return f"FunctionTool(name={self.name!r})"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@overload
|
|
86
|
+
def tool(func: Callable[..., Any], /) -> FunctionTool: ...
|
|
87
|
+
@overload
|
|
88
|
+
def tool(
|
|
89
|
+
*, name: str | None = ..., description: str | None = ...
|
|
90
|
+
) -> Callable[[Callable[..., Any]], FunctionTool]: ...
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def tool(
|
|
94
|
+
func: Callable[..., Any] | None = None,
|
|
95
|
+
/,
|
|
96
|
+
*,
|
|
97
|
+
name: str | None = None,
|
|
98
|
+
description: str | None = None,
|
|
99
|
+
) -> FunctionTool | Callable[[Callable[..., Any]], FunctionTool]:
|
|
100
|
+
"""Turn a function into a ``FunctionTool``. Usable as ``@tool`` or ``@tool(name=...)``."""
|
|
101
|
+
|
|
102
|
+
def wrap(f: Callable[..., Any]) -> FunctionTool:
|
|
103
|
+
return FunctionTool(f, name=name, description=description)
|
|
104
|
+
|
|
105
|
+
return wrap(func) if func is not None else wrap
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Contract: Agent is the one-import ergonomic entry point over the core loop."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from agentharness import Agent, tool
|
|
6
|
+
from agentharness.testing import FakeModel, assert_answer, assert_used_tool
|
|
7
|
+
from agentharness_core import Message, RunFinished, ToolCall
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@tool
|
|
11
|
+
def add(a: int, b: int) -> str:
|
|
12
|
+
"""Add two numbers."""
|
|
13
|
+
return str(a + b)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _add_then_answer() -> FakeModel:
|
|
17
|
+
return FakeModel(
|
|
18
|
+
[
|
|
19
|
+
Message(
|
|
20
|
+
role="assistant",
|
|
21
|
+
content=None,
|
|
22
|
+
tool_calls=(ToolCall(id="c1", name="add", arguments={"a": 2, "b": 3}),),
|
|
23
|
+
),
|
|
24
|
+
Message(role="assistant", content="2 + 3 = 5."),
|
|
25
|
+
]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_agent_runs_to_completion_and_returns_result() -> None:
|
|
30
|
+
agent = Agent(model=_add_then_answer(), tools=[add])
|
|
31
|
+
run = agent.run("What is 2 + 3?")
|
|
32
|
+
|
|
33
|
+
assert run.result == "2 + 3 = 5."
|
|
34
|
+
assert run.state.status == "done"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_agent_uses_tools() -> None:
|
|
38
|
+
agent = Agent(model=_add_then_answer(), tools=[add])
|
|
39
|
+
run = agent.run("What is 2 + 3?")
|
|
40
|
+
|
|
41
|
+
assert_used_tool(run, "add")
|
|
42
|
+
assert_answer(run, "2 + 3 = 5.")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_agent_includes_system_prompt() -> None:
|
|
46
|
+
agent = Agent(
|
|
47
|
+
model=FakeModel([Message(role="assistant", content="hi")]),
|
|
48
|
+
system="You are terse.",
|
|
49
|
+
)
|
|
50
|
+
run = agent.run("hello")
|
|
51
|
+
assert run.state.messages[0].role == "system"
|
|
52
|
+
assert run.state.messages[0].content == "You are terse."
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_agent_stream_yields_live_events() -> None:
|
|
56
|
+
agent = Agent(model=_add_then_answer(), tools=[add])
|
|
57
|
+
events = list(agent.stream("What is 2 + 3?"))
|
|
58
|
+
assert isinstance(events[-1], RunFinished)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_agent_run_is_replayable() -> None:
|
|
62
|
+
from agentharness_core import State, replay
|
|
63
|
+
|
|
64
|
+
agent = Agent(model=_add_then_answer(), tools=[add])
|
|
65
|
+
run = agent.run("What is 2 + 3?")
|
|
66
|
+
|
|
67
|
+
# The recorded run replays identically with no model and no tools.
|
|
68
|
+
start = State.start([Message("user", "What is 2 + 3?")])
|
|
69
|
+
replayed = replay(start, trace=run.trace)
|
|
70
|
+
replayed.run_to_completion()
|
|
71
|
+
assert replayed.result == run.result
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Contract: @tool turns a typed Python function into a core-compatible Tool."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from agentharness import FunctionTool, tool
|
|
6
|
+
from agentharness_core import Tool, ToolResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_tool_decorator_produces_a_core_tool() -> None:
|
|
10
|
+
@tool
|
|
11
|
+
def add(a: int, b: int) -> str:
|
|
12
|
+
"""Add two numbers."""
|
|
13
|
+
return str(a + b)
|
|
14
|
+
|
|
15
|
+
assert isinstance(add, FunctionTool)
|
|
16
|
+
assert isinstance(add, Tool) # satisfies the structural core protocol
|
|
17
|
+
assert add.name == "add"
|
|
18
|
+
assert add.description == "Add two numbers."
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_schema_is_generated_from_type_hints() -> None:
|
|
22
|
+
@tool
|
|
23
|
+
def add(a: int, b: int) -> str:
|
|
24
|
+
return str(a + b)
|
|
25
|
+
|
|
26
|
+
assert add.schema == {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}},
|
|
29
|
+
"required": ["a", "b"],
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_call_wraps_return_in_toolresult() -> None:
|
|
34
|
+
@tool
|
|
35
|
+
def add(a: int, b: int) -> str:
|
|
36
|
+
return str(a + b)
|
|
37
|
+
|
|
38
|
+
result = add.call({"a": 2, "b": 3})
|
|
39
|
+
assert isinstance(result, ToolResult)
|
|
40
|
+
assert result.content == "5"
|
|
41
|
+
assert result.is_error is False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_tool_remains_directly_callable() -> None:
|
|
45
|
+
@tool
|
|
46
|
+
def add(a: int, b: int) -> str:
|
|
47
|
+
return str(a + b)
|
|
48
|
+
|
|
49
|
+
assert add(2, 3) == "5" # the original function still works
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_non_string_return_is_stringified() -> None:
|
|
53
|
+
@tool
|
|
54
|
+
def count(n: int) -> int:
|
|
55
|
+
return n * 2
|
|
56
|
+
|
|
57
|
+
assert count.call({"n": 4}).content == "8"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_toolresult_return_passes_through() -> None:
|
|
61
|
+
@tool
|
|
62
|
+
def risky(x: int) -> ToolResult:
|
|
63
|
+
return ToolResult(content="boom", is_error=True)
|
|
64
|
+
|
|
65
|
+
r = risky.call({"x": 1})
|
|
66
|
+
assert r.is_error is True
|
|
67
|
+
assert r.content == "boom"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_decorator_accepts_name_and_description_overrides() -> None:
|
|
71
|
+
@tool(name="sum2", description="custom")
|
|
72
|
+
def add(a: int, b: int) -> str:
|
|
73
|
+
return str(a + b)
|
|
74
|
+
|
|
75
|
+
assert add.name == "sum2"
|
|
76
|
+
assert add.description == "custom"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_optional_and_defaulted_params_are_not_required() -> None:
|
|
80
|
+
@tool
|
|
81
|
+
def greet(name: str, greeting: str = "hello", times: int | None = None) -> str:
|
|
82
|
+
return f"{greeting} {name}"
|
|
83
|
+
|
|
84
|
+
assert greet.schema["required"] == ["name"]
|
|
85
|
+
assert greet.schema["properties"]["greeting"] == {"type": "string"}
|
|
86
|
+
assert greet.schema["properties"]["times"] == {"type": "integer"}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_list_param_schema() -> None:
|
|
90
|
+
@tool
|
|
91
|
+
def total(values: list[int]) -> str:
|
|
92
|
+
return str(sum(values))
|
|
93
|
+
|
|
94
|
+
assert total.schema["properties"]["values"] == {
|
|
95
|
+
"type": "array",
|
|
96
|
+
"items": {"type": "integer"},
|
|
97
|
+
}
|