TigerHarness 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. tigerharness/__init__.py +14 -0
  2. tigerharness/agent_sdk/README.md +231 -0
  3. tigerharness/agent_sdk/__init__.py +131 -0
  4. tigerharness/agent_sdk/backends/__init__.py +5 -0
  5. tigerharness/agent_sdk/backends/_base.py +82 -0
  6. tigerharness/agent_sdk/backends/anthropic_sdk.py +567 -0
  7. tigerharness/agent_sdk/backends/claude_p.py +657 -0
  8. tigerharness/agent_sdk/backends/openai_sdk.py +43 -0
  9. tigerharness/agent_sdk/docs/HANDOFF.md +539 -0
  10. tigerharness/agent_sdk/docs/agent_sdk_comparison.md +727 -0
  11. tigerharness/agent_sdk/errors.py +38 -0
  12. tigerharness/agent_sdk/examples/__init__.py +0 -0
  13. tigerharness/agent_sdk/examples/basic.py +27 -0
  14. tigerharness/agent_sdk/examples/builtin_tools.py +58 -0
  15. tigerharness/agent_sdk/examples/multi_turn.py +36 -0
  16. tigerharness/agent_sdk/examples/streaming.py +59 -0
  17. tigerharness/agent_sdk/factory.py +86 -0
  18. tigerharness/agent_sdk/retry.py +125 -0
  19. tigerharness/agent_sdk/types.py +353 -0
  20. tigerharness/cli.py +60 -0
  21. tigerharness/init.py +182 -0
  22. tigerharness/py.typed +0 -0
  23. tigerharness/slack_bridge/__init__.py +5 -0
  24. tigerharness/slack_bridge/__main__.py +89 -0
  25. tigerharness/slack_bridge/bridge.py +354 -0
  26. tigerharness/slack_bridge/config.py +91 -0
  27. tigerharness/slack_bridge/downloader.py +163 -0
  28. tigerharness/slack_bridge/notify.py +337 -0
  29. tigerharness/slack_bridge/persistence.py +104 -0
  30. tigerharness/task_runner/__init__.py +6 -0
  31. tigerharness/task_runner/__main__.py +7 -0
  32. tigerharness/task_runner/cli.py +587 -0
  33. tigerharness/task_runner/notifier.py +307 -0
  34. tigerharness/task_runner/personas.py +394 -0
  35. tigerharness/task_runner/registry.py +211 -0
  36. tigerharness/task_runner/runner.py +1051 -0
  37. tigerharness/task_runner/stuck_watchdog.py +567 -0
  38. tigerharness/tiger_memory/__init__.py +11 -0
  39. tigerharness/tiger_memory/briefing.py +356 -0
  40. tigerharness/tiger_memory/cli.py +158 -0
  41. tigerharness/tiger_memory/config.py +336 -0
  42. tigerharness/tiger_memory/drill.py +390 -0
  43. tigerharness/tiger_memory/embedders.py +135 -0
  44. tigerharness/tiger_memory/frontmatter.py +65 -0
  45. tigerharness/tiger_memory/lifecycle.py +951 -0
  46. tigerharness/tiger_memory/must_memorize.py +372 -0
  47. tigerharness/tiger_memory/rag.py +204 -0
  48. tigerharness/tiger_memory/sources/__init__.py +27 -0
  49. tigerharness/tiger_memory/sources/base.py +40 -0
  50. tigerharness/tiger_memory/sources/claude_transcript.py +310 -0
  51. tigerharness/tiger_memory/sources/docs.py +97 -0
  52. tigerharness/tiger_memory/state.py +123 -0
  53. tigerharness/tiger_memory/store.py +330 -0
  54. tigerharness/tiger_memory/summarizers/__init__.py +19 -0
  55. tigerharness/tiger_memory/summarizers/anthropic.py +115 -0
  56. tigerharness/tiger_memory/summarizers/base.py +45 -0
  57. tigerharness/tiger_memory/summarizers/mock.py +28 -0
  58. tigerharness/tiger_memory/summarizers/prompts/default/v1/daily_rollup.md +20 -0
  59. tigerharness/tiger_memory/summarizers/prompts/default/v1/detailed_summary.md +40 -0
  60. tigerharness/tiger_memory/summarizers/prompts/default/v1/longer_memory.md +24 -0
  61. tigerharness/tiger_memory/summarizers/prompts/default/v1/monthly_rollup.md +30 -0
  62. tigerharness/tiger_memory/summarizers/prompts/default/v1/must_memorize_extract.md +41 -0
  63. tigerharness/tiger_memory/summarizers/prompts/default/v1/short_summary.md +30 -0
  64. tigerharness/tiger_memory/summarizers/prompts/default/v1/weekly_rollup.md +27 -0
  65. tigerharness/tiger_memory/templates/briefing_readme.md +71 -0
  66. tigerharness-0.1.3.dist-info/METADATA +191 -0
  67. tigerharness-0.1.3.dist-info/RECORD +70 -0
  68. tigerharness-0.1.3.dist-info/WHEEL +4 -0
  69. tigerharness-0.1.3.dist-info/entry_points.txt +3 -0
  70. tigerharness-0.1.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,14 @@
1
+ """tigerharness — a generic Claude Code agent harness.
2
+
3
+ Sub-packages:
4
+ tigerharness.task_runner — iterative task execution loop
5
+ tigerharness.slack_bridge — Slack Socket Mode bridge to Claude
6
+ tigerharness.tiger_memory — persistent memory: archive, journal, briefing
7
+ """
8
+
9
+ from importlib.metadata import PackageNotFoundError, version
10
+
11
+ try:
12
+ __version__ = version("tigerharness")
13
+ except PackageNotFoundError: # pragma: no cover (only hits during in-tree dev without an install)
14
+ __version__ = "0.0.0+unknown"
@@ -0,0 +1,231 @@
1
+ # agent_sdk
2
+
3
+ Backend-agnostic Python interface for running LLM agents. Today it ships with
4
+ a working `claude -p` subprocess backend; you can swap in the official
5
+ `claude-agent-sdk` or OpenAI's `openai-agents` later by changing one string.
6
+
7
+ ## Why
8
+
9
+ Every agent framework defines its own `Agent`, `Runner`, `Tool`, `Session`,
10
+ and event types. Pin your production code to one of them and you've made
11
+ switching providers expensive. This package extracts the common surface
12
+ (`AgentConfig`, `AgentBackend`, normalized `Event`s) so backends are
13
+ hot-swappable.
14
+
15
+ See [`docs/agent_sdk_comparison.md`](docs/agent_sdk_comparison.md) for the
16
+ design rationale and [`docs/HANDOFF.md`](docs/HANDOFF.md) for the full
17
+ workspace map and open work.
18
+
19
+ ## Install
20
+
21
+ The SDK has no required third-party Python dependencies. To use the
22
+ `claude_p` backend you need the Claude Code CLI on `PATH`. Install
23
+ Claude Code from anthropic.com/claude-code, then verify:
24
+
25
+ ```bash
26
+ claude --version
27
+ ```
28
+
29
+ The project root (one level up from this README, at `agent-sdk/`) ships a
30
+ `pyproject.toml`. From a sibling project in the workspace, depend on it
31
+ with uv:
32
+
33
+ ```toml
34
+ # in your sibling project's pyproject.toml
35
+ [project]
36
+ dependencies = ["agent-sdk"]
37
+
38
+ [tool.uv.sources]
39
+ agent-sdk = { path = "../agent-sdk", editable = true }
40
+ ```
41
+
42
+ Or with pip from the project root: `pip install -e .`.
43
+
44
+ Requires Python 3.10+ (uses PEP 604 union types, `match` statements, and
45
+ `from __future__ import annotations`).
46
+
47
+ ## Quick start
48
+
49
+ ```python
50
+ import asyncio
51
+ from tigerharness.agent_sdk import AgentConfig, get_backend
52
+
53
+
54
+ async def main():
55
+ backend = get_backend("claude_p")
56
+ cfg = AgentConfig(name="qa", instructions="Be concise.")
57
+ result = await backend.run(cfg, "What is 2 + 2?")
58
+ print(result.final_output)
59
+ print(f"cost = ${result.cost_usd}")
60
+
61
+ asyncio.run(main())
62
+ ```
63
+
64
+ ## Backends
65
+
66
+ | Name | Status | Notes |
67
+ |---|---|---|
68
+ | `claude_p` | working | Spawns `claude -p` per call. Always available. Subprocess transport over stream-json. |
69
+ | `anthropic_sdk` | working | Wraps Anthropic's official `claude-agent-sdk`. Install with `pip install tigerharness[anthropic]`. Supports built-in tools, sessions, cancellation, and approval callbacks. |
70
+ | `openai_sdk` | stub | Future: `pip install openai-agents`. Will support function tools, hosted tools, handoffs, and approval-loop wrappers. |
71
+
72
+ Switch backends by changing the factory call — caller code stays identical:
73
+
74
+ ```python
75
+ # Subprocess transport, always available
76
+ backend = get_backend("claude_p")
77
+
78
+ # Same agent code, but now via the official claude-agent-sdk
79
+ backend = get_backend("anthropic_sdk")
80
+
81
+ # (future)
82
+ # backend = get_backend("openai_sdk")
83
+ ```
84
+
85
+ You can also register your own:
86
+
87
+ ```python
88
+ from tigerharness.agent_sdk import register_backend, AgentBackend
89
+
90
+ class MyBackend:
91
+ # implement run, run_stream, open_session
92
+ ...
93
+
94
+ register_backend("mine", lambda **kw: MyBackend(**kw))
95
+ backend = get_backend("mine")
96
+ ```
97
+
98
+ ## Concepts
99
+
100
+ ### AgentConfig
101
+ Declarative agent description: `name`, `instructions`, `model`, `tools`,
102
+ `builtin_tools`, `output_schema`, `max_turns`, plus an `extra: dict` for
103
+ backend-specific knobs.
104
+
105
+ ### Tools
106
+ - `ToolSpec(name, description, input_schema, handler)` — Python-defined tools.
107
+ *Not supported by `claude_p`.*
108
+ - `BuiltinTool(name, config)` — provider-hosted tools (`Bash`, `Read`,
109
+ `WebSearch`, `web_search`, `code_interpreter`, ...).
110
+
111
+ ### Run vs. run_stream
112
+ ```python
113
+ # One-shot:
114
+ result = await backend.run(cfg, prompt)
115
+
116
+ # Streaming — consume to completion:
117
+ handle = backend.run_stream(cfg, prompt)
118
+ async for event in handle:
119
+ ...
120
+ result = handle.result # populated after the stream completes
121
+
122
+ # Streaming — break out early with guaranteed cleanup:
123
+ async with backend.run_stream(cfg, prompt) as handle:
124
+ async for event in handle:
125
+ if some_condition:
126
+ break # __aexit__ kills the subprocess
127
+
128
+ # Or explicit cancel:
129
+ await handle.cancel() # mid-stream cancel; SIGINT to subprocess
130
+ ```
131
+
132
+ If you neither consume the stream to completion nor wrap it in `async with`
133
+ nor call `cancel()`, the underlying subprocess will linger until the OS
134
+ eventually reaps it (typically on the next stdout write, which gets
135
+ SIGPIPE'd). Prefer the `async with` form.
136
+
137
+ ### Events
138
+ Discriminated union: `RunStart`, `TextDelta`, `MessageComplete`, `ToolCall`,
139
+ `ToolResult`, `Thinking`, `AgentChanged`, `ErrorEvent`, `RunDone`. Use
140
+ `match` / `isinstance` to handle each.
141
+
142
+ ### Sessions
143
+ ```python
144
+ session = await backend.open_session()
145
+ await backend.run(cfg, "first turn", session=session)
146
+ await backend.run(cfg, "follow-up", session=session)
147
+ ```
148
+ Sessions are **not** portable across backends. The id is empty until the
149
+ first run populates it.
150
+
151
+ ### Approval (HITL)
152
+ ```python
153
+ async def gate(req: ApprovalRequest) -> ApprovalDecision:
154
+ if req.tool_call.name == "Bash" and "rm " in str(req.tool_call.arguments):
155
+ return ApprovalDecision(allow=False, reason="rm denied")
156
+ return ApprovalDecision(allow=True)
157
+
158
+ await backend.run(cfg, prompt, approval=gate)
159
+ ```
160
+ *Not supported by `claude_p`.* Use `cfg.extra={"permission_mode": ...}` for
161
+ coarse policy instead, or switch to `anthropic_sdk` for inline approval.
162
+
163
+ ## Examples
164
+
165
+ See `examples/` — recommended reading order:
166
+
167
+ 1. `basic.py` — one-shot Q&A
168
+ 2. `streaming.py` — consume streaming events with `async with`
169
+ 3. `multi_turn.py` — session resume across turns
170
+ 4. `builtin_tools.py` — Claude Code's `Bash` and `Read` tools
171
+
172
+ Run any of them with:
173
+
174
+ ```bash
175
+ python -m agent_sdk.examples.basic
176
+ ```
177
+
178
+ ## `claude_p` extras
179
+
180
+ The `claude_p` backend reads a few keys from `cfg.extra`:
181
+
182
+ | Key | Type | Maps to |
183
+ |---|---|---|
184
+ | `permission_mode` | str | `--permission-mode` (default / acceptEdits / plan / bypassPermissions / dontAsk) |
185
+ | `max_budget_usd` | float | `--max-budget-usd` |
186
+ | `add_dirs` | list[str] | one `--add-dir` per entry |
187
+ | `disallowed_tools` | list[str] | `--disallowedTools` |
188
+ | `settings` | str | `--settings` |
189
+ | `cli_args` | dict[str, str \| None] | arbitrary `--<key> <value>` (None values become bare flags) |
190
+
191
+ `AgentConfig.output_schema` is wired to `--json-schema` (accepts a JSON
192
+ Schema dict or a pydantic model — v1 or v2). The CLI populates
193
+ `structured_output` in its result event, which `RunResult.final_output`
194
+ reflects.
195
+
196
+ ## Testing
197
+
198
+ The pytest suite lives at `agent_sdk/tests/` (excluded from the wheel). From
199
+ the project root:
200
+
201
+ ```bash
202
+ # One-time dev setup
203
+ uv sync --group dev
204
+
205
+ # Run the full suite (160 tests, ~3 seconds)
206
+ uv run pytest
207
+
208
+ # With coverage (uses .coveragerc which excludes examples and tests)
209
+ uv run coverage run -m pytest && uv run coverage report -m
210
+
211
+ # Type-check the package
212
+ uv run mypy --python-version 3.10 agent_sdk
213
+ ```
214
+
215
+ The tests use a set of fake `claude` shell scripts as stand-ins for the real
216
+ CLI, so the suite runs without Claude Code installed. Coverage of the
217
+ `agent_sdk/` source is at 100%.
218
+
219
+ ## Limitations of `claude_p`
220
+
221
+ - No user-defined Python tools (raises `BackendNotImplementedError`)
222
+ - No inline approval callbacks (raises `BackendNotImplementedError`)
223
+ - `AgentConfig.temperature` is ignored (the CC CLI doesn't expose it as a
224
+ flag — set it via a settings file passed through `extra={"settings": ...}`)
225
+ - `BuiltinTool(name, config={...})` rejects per-tool config (the CLI
226
+ configures hosted tools via settings, not flags)
227
+ - One subprocess per `run_stream` call; multi-turn happens via `--resume`
228
+ - `cancel()` sends SIGINT; `after_turn=True` is a hint, not a hard guarantee
229
+
230
+ For any of those features, switch to the `anthropic_sdk` backend once it's
231
+ implemented (the interface stays the same).
@@ -0,0 +1,131 @@
1
+ """Backend-agnostic agent SDK.
2
+
3
+ Public API entry points:
4
+
5
+ from tigerharness.agent_sdk import (
6
+ AgentConfig, ToolSpec, BuiltinTool, ToolOutput,
7
+ InputMessage, ApprovalRequest, ApprovalDecision,
8
+ get_backend, register_backend,
9
+ )
10
+
11
+ backend = get_backend("claude_p") # `claude -p` subprocess
12
+ # backend = get_backend("anthropic_sdk") # future
13
+ # backend = get_backend("openai_sdk") # future
14
+
15
+ cfg = AgentConfig(name="qa", instructions="Be concise.")
16
+ result = await backend.run(cfg, "What is 2 + 2?")
17
+ print(result.final_output)
18
+
19
+ The interface is designed so caller code stays identical when you switch
20
+ backends. See ``agent_sdk_comparison.md`` for the design rationale.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from .errors import (
26
+ AgentSDKError,
27
+ BackendNotImplementedError,
28
+ CLIError,
29
+ StreamNotConsumedError,
30
+ ToolApprovalDenied,
31
+ )
32
+ from .factory import get_backend, list_backends, register_backend
33
+ from .retry import run_with_retry
34
+ from .types import (
35
+ # Backend Protocol
36
+ AgentBackend,
37
+ # Config
38
+ AgentConfig,
39
+ AgentChanged,
40
+ # Approval
41
+ ApprovalCallback,
42
+ ApprovalDecision,
43
+ ApprovalRequest,
44
+ # Tools
45
+ BuiltinTool,
46
+ ContentPart,
47
+ ErrorEvent,
48
+ Event,
49
+ InputMessage,
50
+ MessageComplete,
51
+ NormalizedMessage,
52
+ Role,
53
+ # Result
54
+ RunDone,
55
+ RunResult,
56
+ RunStart,
57
+ # Session / stream
58
+ Session,
59
+ StopReason,
60
+ StreamHandle,
61
+ # Events
62
+ TextDelta,
63
+ TextPart,
64
+ Thinking,
65
+ ThinkingPart,
66
+ ToolCall,
67
+ ToolHandler,
68
+ ToolOutput,
69
+ ToolResult,
70
+ ToolResultPart,
71
+ ToolSpec,
72
+ ToolUsePart,
73
+ )
74
+
75
+
76
+ __version__ = "0.1.0"
77
+
78
+ __all__ = [
79
+ # Version
80
+ "__version__",
81
+ # Errors
82
+ "AgentSDKError",
83
+ "BackendNotImplementedError",
84
+ "CLIError",
85
+ "StreamNotConsumedError",
86
+ "ToolApprovalDenied",
87
+ # Factory
88
+ "get_backend",
89
+ "list_backends",
90
+ "register_backend",
91
+ # Retry
92
+ "run_with_retry",
93
+ # Backend protocol
94
+ "AgentBackend",
95
+ # Config
96
+ "AgentConfig",
97
+ # Content
98
+ "ContentPart",
99
+ "InputMessage",
100
+ "NormalizedMessage",
101
+ "Role",
102
+ "TextPart",
103
+ "ThinkingPart",
104
+ "ToolResultPart",
105
+ "ToolUsePart",
106
+ # Tools
107
+ "BuiltinTool",
108
+ "ToolHandler",
109
+ "ToolOutput",
110
+ "ToolSpec",
111
+ # Approval
112
+ "ApprovalCallback",
113
+ "ApprovalDecision",
114
+ "ApprovalRequest",
115
+ # Events
116
+ "AgentChanged",
117
+ "ErrorEvent",
118
+ "Event",
119
+ "MessageComplete",
120
+ "RunDone",
121
+ "RunStart",
122
+ "StopReason",
123
+ "TextDelta",
124
+ "Thinking",
125
+ "ToolCall",
126
+ "ToolResult",
127
+ # Result, session, stream
128
+ "RunResult",
129
+ "Session",
130
+ "StreamHandle",
131
+ ]
@@ -0,0 +1,5 @@
1
+ """Concrete backend implementations.
2
+
3
+ Don't import from here directly in user code; use ``agent_sdk.get_backend(name)``
4
+ instead so backends stay swappable.
5
+ """
@@ -0,0 +1,82 @@
1
+ """Helpers shared by backend implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import AsyncIterator
6
+ from typing import Any
7
+
8
+ from ..errors import StreamNotConsumedError
9
+ from ..types import Event, RunResult, StreamHandle
10
+
11
+
12
+ class BaseStreamHandle:
13
+ """Convenience base class for backends.
14
+
15
+ Subclasses implement ``_iter()`` as an ``async`` generator that yields
16
+ Events and, before returning, sets ``self._result`` to a populated
17
+ ``RunResult``. The base class wires up ``__aiter__`` / ``__anext__``,
18
+ ``.result`` and ``.is_complete`` for free.
19
+ """
20
+
21
+ def __init__(self) -> None:
22
+ self._result: RunResult | None = None
23
+ self._gen: AsyncIterator[Event] | None = None
24
+
25
+ def _start(self, gen: AsyncIterator[Event]) -> None:
26
+ self._gen = gen
27
+
28
+ def __aiter__(self) -> "BaseStreamHandle":
29
+ return self
30
+
31
+ async def __anext__(self) -> Event:
32
+ if self._gen is None:
33
+ raise RuntimeError("Stream not started; subclass forgot to call _start().")
34
+ return await self._gen.__anext__()
35
+
36
+ async def __aenter__(self) -> "BaseStreamHandle":
37
+ return self
38
+
39
+ async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
40
+ # Best-effort cleanup: ask the backend to cancel, then close the
41
+ # underlying generator so its `finally` block reaps any subprocess.
42
+ if not self.is_complete:
43
+ try:
44
+ await self.cancel()
45
+ except NotImplementedError:
46
+ pass
47
+ except Exception:
48
+ pass
49
+ if self._gen is not None and hasattr(self._gen, "aclose"):
50
+ try:
51
+ await self._gen.aclose() # type: ignore[union-attr]
52
+ except Exception:
53
+ pass
54
+
55
+ @property
56
+ def result(self) -> RunResult:
57
+ if self._result is None:
58
+ raise StreamNotConsumedError(
59
+ "Stream has not been fully consumed yet. Iterate to completion "
60
+ "first, or read .result inside the `async with` block after "
61
+ "finishing the loop."
62
+ )
63
+ return self._result
64
+
65
+ @property
66
+ def is_complete(self) -> bool:
67
+ return self._result is not None
68
+
69
+ async def cancel(self, *, after_turn: bool = False) -> None: # pragma: no cover
70
+ raise NotImplementedError("This backend does not support cancellation.")
71
+
72
+
73
+ async def run_via_stream(handle: StreamHandle) -> RunResult:
74
+ """Drain a stream handle and return its final RunResult.
75
+
76
+ Backends usually implement ``run()`` as ``return await
77
+ run_via_stream(self.run_stream(...))`` so the streaming and non-streaming
78
+ paths share one code path.
79
+ """
80
+ async for _ in handle:
81
+ pass
82
+ return handle.result