agentharness-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ *.so
9
+
10
+ # Environments
11
+ .venv/
12
+ venv/
13
+ .env
14
+
15
+ # uv
16
+ .uv/
17
+
18
+ # Docs build output
19
+ site/
20
+
21
+ # Tooling caches
22
+ .pytest_cache/
23
+ .mypy_cache/
24
+ .ruff_cache/
25
+ .hypothesis/
26
+ htmlcov/
27
+ .coverage
28
+ .coverage.*
29
+
30
+ # Codex run artifacts (local only)
31
+ codex-core.log
32
+ codex-core-result.txt
33
+
34
+ # Editors / OS
35
+ .idea/
36
+ .vscode/
37
+ .DS_Store
38
+ Thumbs.db
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentharness-sdk
3
+ Version: 0.1.0
4
+ Summary: Ergonomic agent harness on top of agentharness-core: Agent, @tool, and testable, replayable runs. Imports as `agentharness`.
5
+ Project-URL: Homepage, https://github.com/aafre/agentharness
6
+ Project-URL: Source, https://github.com/aafre/agentharness
7
+ Author: AgentHarness contributors
8
+ License-Expression: Apache-2.0
9
+ Keywords: agent,ai,deterministic,llm,replay,state-machine,tools
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Typing :: Typed
15
+ Requires-Python: >=3.12
16
+ Requires-Dist: agentharness-core>=0.1.0
17
+ Description-Content-Type: text/markdown
18
+
19
+ # agentharness
20
+
21
+ The ergonomic, batteries-included layer over [`agentharness-core`](../agentharness-core).
22
+
23
+ ```python
24
+ from agentharness import Agent, tool
25
+
26
+ @tool
27
+ def add(a: int, b: int) -> str:
28
+ """Add two numbers."""
29
+ return str(a + b)
30
+
31
+ agent = Agent(model=my_model, tools=[add], system="You are helpful.")
32
+ run = agent.run("What is 2 + 3?")
33
+ print(run.result) # "2 + 3 = 5."
34
+ run.trace.save("run.jsonl") # the whole run is recorded and replayable
35
+ ```
36
+
37
+ - **`@tool`** turns a typed Python function into a tool — the JSON schema is generated from
38
+ type hints, no hand-written schemas.
39
+ - **`Agent`** wraps a model + tools + optional system prompt. `run()` executes to completion;
40
+ `stream()` yields live events; `arun()`/`astream()` are the async variants.
41
+ - Built entirely on the deterministic core, so every agent you build is inspectable and
42
+ replayable for free (`from agentharness_core import replay`).
43
+
44
+ Test agents like ordinary code, with no network:
45
+
46
+ ```python
47
+ from agentharness.testing import FakeModel, assert_used_tool, assert_answer
48
+
49
+ agent = Agent(model=FakeModel([...]), tools=[add])
50
+ run = agent.run("What is 2 + 3?")
51
+ assert_used_tool(run, "add")
52
+ assert_answer(run, "2 + 3 = 5.")
53
+ ```
@@ -0,0 +1,35 @@
1
+ # agentharness
2
+
3
+ The ergonomic, batteries-included layer over [`agentharness-core`](../agentharness-core).
4
+
5
+ ```python
6
+ from agentharness import Agent, tool
7
+
8
+ @tool
9
+ def add(a: int, b: int) -> str:
10
+ """Add two numbers."""
11
+ return str(a + b)
12
+
13
+ agent = Agent(model=my_model, tools=[add], system="You are helpful.")
14
+ run = agent.run("What is 2 + 3?")
15
+ print(run.result) # "2 + 3 = 5."
16
+ run.trace.save("run.jsonl") # the whole run is recorded and replayable
17
+ ```
18
+
19
+ - **`@tool`** turns a typed Python function into a tool — the JSON schema is generated from
20
+ type hints, no hand-written schemas.
21
+ - **`Agent`** wraps a model + tools + optional system prompt. `run()` executes to completion;
22
+ `stream()` yields live events; `arun()`/`astream()` are the async variants.
23
+ - Built entirely on the deterministic core, so every agent you build is inspectable and
24
+ replayable for free (`from agentharness_core import replay`).
25
+
26
+ Test agents like ordinary code, with no network:
27
+
28
+ ```python
29
+ from agentharness.testing import FakeModel, assert_used_tool, assert_answer
30
+
31
+ agent = Agent(model=FakeModel([...]), tools=[add])
32
+ run = agent.run("What is 2 + 3?")
33
+ assert_used_tool(run, "add")
34
+ assert_answer(run, "2 + 3 = 5.")
35
+ ```
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agentharness-sdk"
7
+ version = "0.1.0"
8
+ description = "Ergonomic agent harness on top of agentharness-core: Agent, @tool, and testable, replayable runs. Imports as `agentharness`."
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = "Apache-2.0"
12
+ authors = [{ name = "AgentHarness contributors" }]
13
+ keywords = ["agent", "llm", "ai", "tools", "state-machine", "deterministic", "replay"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: Apache Software License",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Typing :: Typed",
20
+ ]
21
+ dependencies = ["agentharness-core>=0.1.0"]
22
+
23
+ [project.urls]
24
+ Homepage = "https://github.com/aafre/agentharness"
25
+ Source = "https://github.com/aafre/agentharness"
26
+
27
+ [tool.hatch.build.targets.wheel]
28
+ packages = ["src/agentharness"]
@@ -0,0 +1,61 @@
1
+ """AgentHarness: the ergonomic, batteries-included layer over ``agentharness-core``.
2
+
3
+ from agentharness import Agent, tool
4
+
5
+ @tool
6
+ def add(a: int, b: int) -> str:
7
+ '''Add two numbers.'''
8
+ return str(a + b)
9
+
10
+ agent = Agent(model=my_model, tools=[add])
11
+ print(agent.run("What is 2 + 3?").result)
12
+
13
+ Everything here is built on the deterministic, replayable core, so any agent you build is
14
+ inspectable and replayable for free.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ # Re-export the core essentials so most users need only ``import agentharness``.
20
+ from agentharness_core import (
21
+ AsyncRun,
22
+ DivergenceError,
23
+ Message,
24
+ Model,
25
+ Run,
26
+ State,
27
+ Tool,
28
+ ToolCall,
29
+ ToolResult,
30
+ Trace,
31
+ arun,
32
+ replay,
33
+ run,
34
+ )
35
+
36
+ from .agent import Agent
37
+ from .tools import FunctionTool, tool
38
+
39
+ __version__ = "0.1.0"
40
+
41
+ __all__ = [ # noqa: RUF022 - grouped by concept
42
+ # ergonomic layer
43
+ "Agent",
44
+ "tool",
45
+ "FunctionTool",
46
+ # core re-exports
47
+ "run",
48
+ "arun",
49
+ "replay",
50
+ "Run",
51
+ "AsyncRun",
52
+ "State",
53
+ "Message",
54
+ "ToolCall",
55
+ "ToolResult",
56
+ "Trace",
57
+ "Model",
58
+ "Tool",
59
+ "DivergenceError",
60
+ "__version__",
61
+ ]
@@ -0,0 +1,78 @@
1
+ """The ``Agent`` ergonomic entry point over the core state machine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+
7
+ from agentharness_core import (
8
+ AsyncRun,
9
+ Message,
10
+ Model,
11
+ Run,
12
+ State,
13
+ Tool,
14
+ arun,
15
+ run,
16
+ )
17
+
18
+ DEFAULT_MAX_STEPS = 64
19
+
20
+
21
+ class Agent:
22
+ """A model plus its tools and an optional system prompt.
23
+
24
+ ``run`` is eager (executes to completion and returns the finished ``Run``, so
25
+ ``.result``/``.state``/``.trace`` are immediately available). ``stream`` is lazy and
26
+ yields the live event stream. Every run is recorded and therefore replayable.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ *,
32
+ model: Model,
33
+ tools: Sequence[Tool] = (),
34
+ system: str | None = None,
35
+ max_steps: int = DEFAULT_MAX_STEPS,
36
+ ) -> None:
37
+ self.model = model
38
+ self.tools = list(tools)
39
+ self.system = system
40
+ self.max_steps = max_steps
41
+
42
+ def _initial_state(self, prompt: str) -> State[None]:
43
+ messages: list[Message] = []
44
+ if self.system is not None:
45
+ messages.append(Message("system", self.system))
46
+ messages.append(Message("user", prompt))
47
+ return State.start(messages)
48
+
49
+ def stream(self, prompt: str) -> Run:
50
+ """Return a lazy ``Run`` whose iteration yields live events."""
51
+ return run(
52
+ self._initial_state(prompt),
53
+ model=self.model,
54
+ tools=self.tools,
55
+ max_steps=self.max_steps,
56
+ )
57
+
58
+ def run(self, prompt: str) -> Run:
59
+ """Execute to completion and return the finished ``Run``."""
60
+ r = self.stream(prompt)
61
+ r.run_to_completion()
62
+ return r
63
+
64
+ def astream(self, prompt: str) -> AsyncRun:
65
+ """Async-iterable variant of :meth:`stream`."""
66
+ return arun(
67
+ self._initial_state(prompt),
68
+ model=self.model,
69
+ tools=self.tools,
70
+ max_steps=self.max_steps,
71
+ )
72
+
73
+ async def arun(self, prompt: str) -> AsyncRun:
74
+ """Async variant of :meth:`run`: drain the run, then return it."""
75
+ r = self.astream(prompt)
76
+ async for _ in r:
77
+ pass
78
+ return r
File without changes
@@ -0,0 +1,19 @@
1
+ """Test helpers for agents: deterministic models and readable assertions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from agentharness_core import Run
6
+ from agentharness_core.testing import FakeModel, ScriptedModel
7
+
8
+ __all__ = ["FakeModel", "ScriptedModel", "assert_answer", "assert_used_tool"]
9
+
10
+
11
+ def assert_used_tool(run: Run, name: str) -> None:
12
+ """Assert the agent invoked the named tool at least once during ``run``."""
13
+ used = [m.name for m in run.state.messages if m.role == "tool"]
14
+ assert name in used, f"expected tool {name!r} to be used; tools used: {used}"
15
+
16
+
17
+ def assert_answer(run: Run, expected: str) -> None:
18
+ """Assert the agent's final answer equals ``expected``."""
19
+ assert run.result == expected, f"expected final answer {expected!r}, got {run.result!r}"
@@ -0,0 +1,105 @@
1
+ """The ``@tool`` decorator: a typed Python function becomes a core-compatible Tool.
2
+
3
+ The JSON schema is derived from type hints and the ``required`` set from which parameters
4
+ lack defaults, so a tool is declared once, in plain Python, with no hand-written schema.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import inspect
10
+ import types as _pytypes
11
+ from collections.abc import Callable
12
+ from typing import Any, Union, get_args, get_origin, get_type_hints, overload
13
+
14
+ from agentharness_core import ToolResult
15
+
16
+ _JSON_SCALARS: dict[type, str] = {
17
+ int: "integer",
18
+ float: "number",
19
+ str: "string",
20
+ bool: "boolean",
21
+ }
22
+
23
+
24
+ def _type_to_schema(tp: Any) -> dict[str, Any]:
25
+ if tp in _JSON_SCALARS:
26
+ return {"type": _JSON_SCALARS[tp]}
27
+ origin = get_origin(tp)
28
+ if origin is list:
29
+ args = get_args(tp)
30
+ return {"type": "array", "items": _type_to_schema(args[0]) if args else {}}
31
+ if origin is dict:
32
+ return {"type": "object"}
33
+ if origin is Union or origin is _pytypes.UnionType:
34
+ non_none = [a for a in get_args(tp) if a is not type(None)]
35
+ if len(non_none) == 1:
36
+ return _type_to_schema(non_none[0])
37
+ return {} # unknown / unconstrained
38
+
39
+
40
+ def _build_schema(func: Callable[..., Any]) -> dict[str, Any]:
41
+ signature = inspect.signature(func)
42
+ hints = get_type_hints(func)
43
+ properties: dict[str, Any] = {}
44
+ required: list[str] = []
45
+ for name, param in signature.parameters.items():
46
+ if name == "self" or param.kind in (
47
+ inspect.Parameter.VAR_POSITIONAL,
48
+ inspect.Parameter.VAR_KEYWORD,
49
+ ):
50
+ continue
51
+ properties[name] = _type_to_schema(hints.get(name, str))
52
+ if param.default is inspect.Parameter.empty:
53
+ required.append(name)
54
+ return {"type": "object", "properties": properties, "required": required}
55
+
56
+
57
+ class FunctionTool:
58
+ """Wraps a callable so it satisfies the core ``Tool`` protocol while staying callable."""
59
+
60
+ def __init__(
61
+ self,
62
+ func: Callable[..., Any],
63
+ *,
64
+ name: str | None = None,
65
+ description: str | None = None,
66
+ ) -> None:
67
+ self._func = func
68
+ self.name = name or func.__name__
69
+ self.description = description or (inspect.getdoc(func) or "")
70
+ self.schema = _build_schema(func)
71
+
72
+ def call(self, arguments: dict[str, Any]) -> ToolResult:
73
+ result = self._func(**arguments)
74
+ if isinstance(result, ToolResult):
75
+ return result
76
+ return ToolResult(content=result if isinstance(result, str) else str(result))
77
+
78
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
79
+ return self._func(*args, **kwargs)
80
+
81
+ def __repr__(self) -> str:
82
+ return f"FunctionTool(name={self.name!r})"
83
+
84
+
85
+ @overload
86
+ def tool(func: Callable[..., Any], /) -> FunctionTool: ...
87
+ @overload
88
+ def tool(
89
+ *, name: str | None = ..., description: str | None = ...
90
+ ) -> Callable[[Callable[..., Any]], FunctionTool]: ...
91
+
92
+
93
+ def tool(
94
+ func: Callable[..., Any] | None = None,
95
+ /,
96
+ *,
97
+ name: str | None = None,
98
+ description: str | None = None,
99
+ ) -> FunctionTool | Callable[[Callable[..., Any]], FunctionTool]:
100
+ """Turn a function into a ``FunctionTool``. Usable as ``@tool`` or ``@tool(name=...)``."""
101
+
102
+ def wrap(f: Callable[..., Any]) -> FunctionTool:
103
+ return FunctionTool(f, name=name, description=description)
104
+
105
+ return wrap(func) if func is not None else wrap
@@ -0,0 +1,71 @@
1
+ """Contract: Agent is the one-import ergonomic entry point over the core loop."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from agentharness import Agent, tool
6
+ from agentharness.testing import FakeModel, assert_answer, assert_used_tool
7
+ from agentharness_core import Message, RunFinished, ToolCall
8
+
9
+
10
+ @tool
11
+ def add(a: int, b: int) -> str:
12
+ """Add two numbers."""
13
+ return str(a + b)
14
+
15
+
16
+ def _add_then_answer() -> FakeModel:
17
+ return FakeModel(
18
+ [
19
+ Message(
20
+ role="assistant",
21
+ content=None,
22
+ tool_calls=(ToolCall(id="c1", name="add", arguments={"a": 2, "b": 3}),),
23
+ ),
24
+ Message(role="assistant", content="2 + 3 = 5."),
25
+ ]
26
+ )
27
+
28
+
29
+ def test_agent_runs_to_completion_and_returns_result() -> None:
30
+ agent = Agent(model=_add_then_answer(), tools=[add])
31
+ run = agent.run("What is 2 + 3?")
32
+
33
+ assert run.result == "2 + 3 = 5."
34
+ assert run.state.status == "done"
35
+
36
+
37
+ def test_agent_uses_tools() -> None:
38
+ agent = Agent(model=_add_then_answer(), tools=[add])
39
+ run = agent.run("What is 2 + 3?")
40
+
41
+ assert_used_tool(run, "add")
42
+ assert_answer(run, "2 + 3 = 5.")
43
+
44
+
45
+ def test_agent_includes_system_prompt() -> None:
46
+ agent = Agent(
47
+ model=FakeModel([Message(role="assistant", content="hi")]),
48
+ system="You are terse.",
49
+ )
50
+ run = agent.run("hello")
51
+ assert run.state.messages[0].role == "system"
52
+ assert run.state.messages[0].content == "You are terse."
53
+
54
+
55
+ def test_agent_stream_yields_live_events() -> None:
56
+ agent = Agent(model=_add_then_answer(), tools=[add])
57
+ events = list(agent.stream("What is 2 + 3?"))
58
+ assert isinstance(events[-1], RunFinished)
59
+
60
+
61
+ def test_agent_run_is_replayable() -> None:
62
+ from agentharness_core import State, replay
63
+
64
+ agent = Agent(model=_add_then_answer(), tools=[add])
65
+ run = agent.run("What is 2 + 3?")
66
+
67
+ # The recorded run replays identically with no model and no tools.
68
+ start = State.start([Message("user", "What is 2 + 3?")])
69
+ replayed = replay(start, trace=run.trace)
70
+ replayed.run_to_completion()
71
+ assert replayed.result == run.result
@@ -0,0 +1,97 @@
1
+ """Contract: @tool turns a typed Python function into a core-compatible Tool."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from agentharness import FunctionTool, tool
6
+ from agentharness_core import Tool, ToolResult
7
+
8
+
9
+ def test_tool_decorator_produces_a_core_tool() -> None:
10
+ @tool
11
+ def add(a: int, b: int) -> str:
12
+ """Add two numbers."""
13
+ return str(a + b)
14
+
15
+ assert isinstance(add, FunctionTool)
16
+ assert isinstance(add, Tool) # satisfies the structural core protocol
17
+ assert add.name == "add"
18
+ assert add.description == "Add two numbers."
19
+
20
+
21
+ def test_schema_is_generated_from_type_hints() -> None:
22
+ @tool
23
+ def add(a: int, b: int) -> str:
24
+ return str(a + b)
25
+
26
+ assert add.schema == {
27
+ "type": "object",
28
+ "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}},
29
+ "required": ["a", "b"],
30
+ }
31
+
32
+
33
+ def test_call_wraps_return_in_toolresult() -> None:
34
+ @tool
35
+ def add(a: int, b: int) -> str:
36
+ return str(a + b)
37
+
38
+ result = add.call({"a": 2, "b": 3})
39
+ assert isinstance(result, ToolResult)
40
+ assert result.content == "5"
41
+ assert result.is_error is False
42
+
43
+
44
+ def test_tool_remains_directly_callable() -> None:
45
+ @tool
46
+ def add(a: int, b: int) -> str:
47
+ return str(a + b)
48
+
49
+ assert add(2, 3) == "5" # the original function still works
50
+
51
+
52
+ def test_non_string_return_is_stringified() -> None:
53
+ @tool
54
+ def count(n: int) -> int:
55
+ return n * 2
56
+
57
+ assert count.call({"n": 4}).content == "8"
58
+
59
+
60
+ def test_toolresult_return_passes_through() -> None:
61
+ @tool
62
+ def risky(x: int) -> ToolResult:
63
+ return ToolResult(content="boom", is_error=True)
64
+
65
+ r = risky.call({"x": 1})
66
+ assert r.is_error is True
67
+ assert r.content == "boom"
68
+
69
+
70
+ def test_decorator_accepts_name_and_description_overrides() -> None:
71
+ @tool(name="sum2", description="custom")
72
+ def add(a: int, b: int) -> str:
73
+ return str(a + b)
74
+
75
+ assert add.name == "sum2"
76
+ assert add.description == "custom"
77
+
78
+
79
+ def test_optional_and_defaulted_params_are_not_required() -> None:
80
+ @tool
81
+ def greet(name: str, greeting: str = "hello", times: int | None = None) -> str:
82
+ return f"{greeting} {name}"
83
+
84
+ assert greet.schema["required"] == ["name"]
85
+ assert greet.schema["properties"]["greeting"] == {"type": "string"}
86
+ assert greet.schema["properties"]["times"] == {"type": "integer"}
87
+
88
+
89
+ def test_list_param_schema() -> None:
90
+ @tool
91
+ def total(values: list[int]) -> str:
92
+ return str(sum(values))
93
+
94
+ assert total.schema["properties"]["values"] == {
95
+ "type": "array",
96
+ "items": {"type": "integer"},
97
+ }