PyPI - TigerHarness - Versions diffs - 0.1.3__py3-none-any.whl - Mend

TigerHarness 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

tigerharness/__init__.py +14 -0
tigerharness/agent_sdk/README.md +231 -0
tigerharness/agent_sdk/__init__.py +131 -0
tigerharness/agent_sdk/backends/__init__.py +5 -0
tigerharness/agent_sdk/backends/_base.py +82 -0
tigerharness/agent_sdk/backends/anthropic_sdk.py +567 -0
tigerharness/agent_sdk/backends/claude_p.py +657 -0
tigerharness/agent_sdk/backends/openai_sdk.py +43 -0
tigerharness/agent_sdk/docs/HANDOFF.md +539 -0
tigerharness/agent_sdk/docs/agent_sdk_comparison.md +727 -0
tigerharness/agent_sdk/errors.py +38 -0
tigerharness/agent_sdk/examples/__init__.py +0 -0
tigerharness/agent_sdk/examples/basic.py +27 -0
tigerharness/agent_sdk/examples/builtin_tools.py +58 -0
tigerharness/agent_sdk/examples/multi_turn.py +36 -0
tigerharness/agent_sdk/examples/streaming.py +59 -0
tigerharness/agent_sdk/factory.py +86 -0
tigerharness/agent_sdk/retry.py +125 -0
tigerharness/agent_sdk/types.py +353 -0
tigerharness/cli.py +60 -0
tigerharness/init.py +182 -0
tigerharness/py.typed +0 -0
tigerharness/slack_bridge/__init__.py +5 -0
tigerharness/slack_bridge/__main__.py +89 -0
tigerharness/slack_bridge/bridge.py +354 -0
tigerharness/slack_bridge/config.py +91 -0
tigerharness/slack_bridge/downloader.py +163 -0
tigerharness/slack_bridge/notify.py +337 -0
tigerharness/slack_bridge/persistence.py +104 -0
tigerharness/task_runner/__init__.py +6 -0
tigerharness/task_runner/__main__.py +7 -0
tigerharness/task_runner/cli.py +587 -0
tigerharness/task_runner/notifier.py +307 -0
tigerharness/task_runner/personas.py +394 -0
tigerharness/task_runner/registry.py +211 -0
tigerharness/task_runner/runner.py +1051 -0
tigerharness/task_runner/stuck_watchdog.py +567 -0
tigerharness/tiger_memory/__init__.py +11 -0
tigerharness/tiger_memory/briefing.py +356 -0
tigerharness/tiger_memory/cli.py +158 -0
tigerharness/tiger_memory/config.py +336 -0
tigerharness/tiger_memory/drill.py +390 -0
tigerharness/tiger_memory/embedders.py +135 -0
tigerharness/tiger_memory/frontmatter.py +65 -0
tigerharness/tiger_memory/lifecycle.py +951 -0
tigerharness/tiger_memory/must_memorize.py +372 -0
tigerharness/tiger_memory/rag.py +204 -0
tigerharness/tiger_memory/sources/__init__.py +27 -0
tigerharness/tiger_memory/sources/base.py +40 -0
tigerharness/tiger_memory/sources/claude_transcript.py +310 -0
tigerharness/tiger_memory/sources/docs.py +97 -0
tigerharness/tiger_memory/state.py +123 -0
tigerharness/tiger_memory/store.py +330 -0
tigerharness/tiger_memory/summarizers/__init__.py +19 -0
tigerharness/tiger_memory/summarizers/anthropic.py +115 -0
tigerharness/tiger_memory/summarizers/base.py +45 -0
tigerharness/tiger_memory/summarizers/mock.py +28 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/daily_rollup.md +20 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/detailed_summary.md +40 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/longer_memory.md +24 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/monthly_rollup.md +30 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/must_memorize_extract.md +41 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/short_summary.md +30 -0
tigerharness/tiger_memory/summarizers/prompts/default/v1/weekly_rollup.md +27 -0
tigerharness/tiger_memory/templates/briefing_readme.md +71 -0
tigerharness-0.1.3.dist-info/METADATA +191 -0
tigerharness-0.1.3.dist-info/RECORD +70 -0
tigerharness-0.1.3.dist-info/WHEEL +4 -0
tigerharness-0.1.3.dist-info/entry_points.txt +3 -0
tigerharness-0.1.3.dist-info/licenses/LICENSE +21 -0

tigerharness/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""tigerharness — a generic Claude Code agent harness.
+Sub-packages:
+    tigerharness.task_runner   — iterative task execution loop
+    tigerharness.slack_bridge  — Slack Socket Mode bridge to Claude
+    tigerharness.tiger_memory  — persistent memory: archive, journal, briefing
+"""
+from importlib.metadata import PackageNotFoundError, version
+try:
+    __version__ = version("tigerharness")
+except PackageNotFoundError:  # pragma: no cover  (only hits during in-tree dev without an install)
+    __version__ = "0.0.0+unknown"

tigerharness/agent_sdk/README.md ADDED Viewed

@@ -0,0 +1,231 @@
+# agent_sdk
+Backend-agnostic Python interface for running LLM agents. Today it ships with
+a working `claude -p` subprocess backend; you can swap in the official
+`claude-agent-sdk` or OpenAI's `openai-agents` later by changing one string.
+## Why
+Every agent framework defines its own `Agent`, `Runner`, `Tool`, `Session`,
+and event types. Pin your production code to one of them and you've made
+switching providers expensive. This package extracts the common surface
+(`AgentConfig`, `AgentBackend`, normalized `Event`s) so backends are
+hot-swappable.
+See [`docs/agent_sdk_comparison.md`](docs/agent_sdk_comparison.md) for the
+design rationale and [`docs/HANDOFF.md`](docs/HANDOFF.md) for the full
+workspace map and open work.
+## Install
+The SDK has no required third-party Python dependencies. To use the
+`claude_p` backend you need the Claude Code CLI on `PATH`. Install
+Claude Code from anthropic.com/claude-code, then verify:
+```bash
+claude --version
+```
+The project root (one level up from this README, at `agent-sdk/`) ships a
+`pyproject.toml`. From a sibling project in the workspace, depend on it
+with uv:
+```toml
+# in your sibling project's pyproject.toml
+[project]
+dependencies = ["agent-sdk"]
+[tool.uv.sources]
+agent-sdk = { path = "../agent-sdk", editable = true }
+```
+Or with pip from the project root: `pip install -e .`.
+Requires Python 3.10+ (uses PEP 604 union types, `match` statements, and
+`from __future__ import annotations`).
+## Quick start
+```python
+import asyncio
+from tigerharness.agent_sdk import AgentConfig, get_backend
+async def main():
+    backend = get_backend("claude_p")
+    cfg = AgentConfig(name="qa", instructions="Be concise.")
+    result = await backend.run(cfg, "What is 2 + 2?")
+    print(result.final_output)
+    print(f"cost = ${result.cost_usd}")
+asyncio.run(main())
+```
+## Backends
+| Name | Status | Notes |
+|---|---|---|
+| `claude_p` | working | Spawns `claude -p` per call. Always available. Subprocess transport over stream-json. |
+| `anthropic_sdk` | working | Wraps Anthropic's official `claude-agent-sdk`. Install with `pip install tigerharness[anthropic]`. Supports built-in tools, sessions, cancellation, and approval callbacks. |
+| `openai_sdk` | stub | Future: `pip install openai-agents`. Will support function tools, hosted tools, handoffs, and approval-loop wrappers. |
+Switch backends by changing the factory call — caller code stays identical:
+```python
+# Subprocess transport, always available
+backend = get_backend("claude_p")
+# Same agent code, but now via the official claude-agent-sdk
+backend = get_backend("anthropic_sdk")
+# (future)
+# backend = get_backend("openai_sdk")
+```
+You can also register your own:
+```python
+from tigerharness.agent_sdk import register_backend, AgentBackend
+class MyBackend:
+    # implement run, run_stream, open_session
+    ...
+register_backend("mine", lambda **kw: MyBackend(**kw))
+backend = get_backend("mine")
+```
+## Concepts
+### AgentConfig
+Declarative agent description: `name`, `instructions`, `model`, `tools`,
+`builtin_tools`, `output_schema`, `max_turns`, plus an `extra: dict` for
+backend-specific knobs.
+### Tools
+- `ToolSpec(name, description, input_schema, handler)` — Python-defined tools.
+  *Not supported by `claude_p`.*
+- `BuiltinTool(name, config)` — provider-hosted tools (`Bash`, `Read`,
+  `WebSearch`, `web_search`, `code_interpreter`, ...).
+### Run vs. run_stream
+```python
+# One-shot:
+result = await backend.run(cfg, prompt)
+# Streaming — consume to completion:
+handle = backend.run_stream(cfg, prompt)
+async for event in handle:
+    ...
+result = handle.result            # populated after the stream completes
+# Streaming — break out early with guaranteed cleanup:
+async with backend.run_stream(cfg, prompt) as handle:
+    async for event in handle:
+        if some_condition:
+            break                  # __aexit__ kills the subprocess
+# Or explicit cancel:
+await handle.cancel()              # mid-stream cancel; SIGINT to subprocess
+```
+If you neither consume the stream to completion nor wrap it in `async with`
+nor call `cancel()`, the underlying subprocess will linger until the OS
+eventually reaps it (typically on the next stdout write, which gets
+SIGPIPE'd). Prefer the `async with` form.
+### Events
+Discriminated union: `RunStart`, `TextDelta`, `MessageComplete`, `ToolCall`,
+`ToolResult`, `Thinking`, `AgentChanged`, `ErrorEvent`, `RunDone`. Use
+`match` / `isinstance` to handle each.
+### Sessions
+```python
+session = await backend.open_session()
+await backend.run(cfg, "first turn", session=session)
+await backend.run(cfg, "follow-up", session=session)
+```
+Sessions are **not** portable across backends. The id is empty until the
+first run populates it.
+### Approval (HITL)
+```python
+async def gate(req: ApprovalRequest) -> ApprovalDecision:
+    if req.tool_call.name == "Bash" and "rm " in str(req.tool_call.arguments):
+        return ApprovalDecision(allow=False, reason="rm denied")
+    return ApprovalDecision(allow=True)
+await backend.run(cfg, prompt, approval=gate)
+```
+*Not supported by `claude_p`.* Use `cfg.extra={"permission_mode": ...}` for
+coarse policy instead, or switch to `anthropic_sdk` for inline approval.
+## Examples
+See `examples/` — recommended reading order:
+1. `basic.py` — one-shot Q&A
+2. `streaming.py` — consume streaming events with `async with`
+3. `multi_turn.py` — session resume across turns
+4. `builtin_tools.py` — Claude Code's `Bash` and `Read` tools
+Run any of them with:
+```bash
+python -m agent_sdk.examples.basic
+```
+## `claude_p` extras
+The `claude_p` backend reads a few keys from `cfg.extra`:
+| Key | Type | Maps to |
+|---|---|---|
+| `permission_mode` | str | `--permission-mode` (default / acceptEdits / plan / bypassPermissions / dontAsk) |
+| `max_budget_usd` | float | `--max-budget-usd` |
+| `add_dirs` | list[str] | one `--add-dir` per entry |
+| `disallowed_tools` | list[str] | `--disallowedTools` |
+| `settings` | str | `--settings` |
+| `cli_args` | dict[str, str \| None] | arbitrary `--<key> <value>` (None values become bare flags) |
+`AgentConfig.output_schema` is wired to `--json-schema` (accepts a JSON
+Schema dict or a pydantic model — v1 or v2). The CLI populates
+`structured_output` in its result event, which `RunResult.final_output`
+reflects.
+## Testing
+The pytest suite lives at `agent_sdk/tests/` (excluded from the wheel). From
+the project root:
+```bash
+# One-time dev setup
+uv sync --group dev
+# Run the full suite (160 tests, ~3 seconds)
+uv run pytest
+# With coverage (uses .coveragerc which excludes examples and tests)
+uv run coverage run -m pytest && uv run coverage report -m
+# Type-check the package
+uv run mypy --python-version 3.10 agent_sdk
+```
+The tests use a set of fake `claude` shell scripts as stand-ins for the real
+CLI, so the suite runs without Claude Code installed. Coverage of the
+`agent_sdk/` source is at 100%.
+## Limitations of `claude_p`
+- No user-defined Python tools (raises `BackendNotImplementedError`)
+- No inline approval callbacks (raises `BackendNotImplementedError`)
+- `AgentConfig.temperature` is ignored (the CC CLI doesn't expose it as a
+  flag — set it via a settings file passed through `extra={"settings": ...}`)
+- `BuiltinTool(name, config={...})` rejects per-tool config (the CLI
+  configures hosted tools via settings, not flags)
+- One subprocess per `run_stream` call; multi-turn happens via `--resume`
+- `cancel()` sends SIGINT; `after_turn=True` is a hint, not a hard guarantee
+For any of those features, switch to the `anthropic_sdk` backend once it's
+implemented (the interface stays the same).

tigerharness/agent_sdk/__init__.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""Backend-agnostic agent SDK.
+Public API entry points:
+    from tigerharness.agent_sdk import (
+        AgentConfig, ToolSpec, BuiltinTool, ToolOutput,
+        InputMessage, ApprovalRequest, ApprovalDecision,
+        get_backend, register_backend,
+    )
+    backend = get_backend("claude_p")              # `claude -p` subprocess
+    # backend = get_backend("anthropic_sdk")       # future
+    # backend = get_backend("openai_sdk")          # future
+    cfg = AgentConfig(name="qa", instructions="Be concise.")
+    result = await backend.run(cfg, "What is 2 + 2?")
+    print(result.final_output)
+The interface is designed so caller code stays identical when you switch
+backends. See ``agent_sdk_comparison.md`` for the design rationale.
+"""
+from __future__ import annotations
+from .errors import (
+    AgentSDKError,
+    BackendNotImplementedError,
+    CLIError,
+    StreamNotConsumedError,
+    ToolApprovalDenied,
+)
+from .factory import get_backend, list_backends, register_backend
+from .retry import run_with_retry
+from .types import (
+    # Backend Protocol
+    AgentBackend,
+    # Config
+    AgentConfig,
+    AgentChanged,
+    # Approval
+    ApprovalCallback,
+    ApprovalDecision,
+    ApprovalRequest,
+    # Tools
+    BuiltinTool,
+    ContentPart,
+    ErrorEvent,
+    Event,
+    InputMessage,
+    MessageComplete,
+    NormalizedMessage,
+    Role,
+    # Result
+    RunDone,
+    RunResult,
+    RunStart,
+    # Session / stream
+    Session,
+    StopReason,
+    StreamHandle,
+    # Events
+    TextDelta,
+    TextPart,
+    Thinking,
+    ThinkingPart,
+    ToolCall,
+    ToolHandler,
+    ToolOutput,
+    ToolResult,
+    ToolResultPart,
+    ToolSpec,
+    ToolUsePart,
+)
+__version__ = "0.1.0"
+__all__ = [
+    # Version
+    "__version__",
+    # Errors
+    "AgentSDKError",
+    "BackendNotImplementedError",
+    "CLIError",
+    "StreamNotConsumedError",
+    "ToolApprovalDenied",
+    # Factory
+    "get_backend",
+    "list_backends",
+    "register_backend",
+    # Retry
+    "run_with_retry",
+    # Backend protocol
+    "AgentBackend",
+    # Config
+    "AgentConfig",
+    # Content
+    "ContentPart",
+    "InputMessage",
+    "NormalizedMessage",
+    "Role",
+    "TextPart",
+    "ThinkingPart",
+    "ToolResultPart",
+    "ToolUsePart",
+    # Tools
+    "BuiltinTool",
+    "ToolHandler",
+    "ToolOutput",
+    "ToolSpec",
+    # Approval
+    "ApprovalCallback",
+    "ApprovalDecision",
+    "ApprovalRequest",
+    # Events
+    "AgentChanged",
+    "ErrorEvent",
+    "Event",
+    "MessageComplete",
+    "RunDone",
+    "RunStart",
+    "StopReason",
+    "TextDelta",
+    "Thinking",
+    "ToolCall",
+    "ToolResult",
+    # Result, session, stream
+    "RunResult",
+    "Session",
+    "StreamHandle",
+]

tigerharness/agent_sdk/backends/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Concrete backend implementations.
+Don't import from here directly in user code; use ``agent_sdk.get_backend(name)``
+instead so backends stay swappable.
+"""

tigerharness/agent_sdk/backends/_base.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Helpers shared by backend implementations."""
+from __future__ import annotations
+from collections.abc import AsyncIterator
+from typing import Any
+from ..errors import StreamNotConsumedError
+from ..types import Event, RunResult, StreamHandle
+class BaseStreamHandle:
+    """Convenience base class for backends.
+    Subclasses implement ``_iter()`` as an ``async`` generator that yields
+    Events and, before returning, sets ``self._result`` to a populated
+    ``RunResult``. The base class wires up ``__aiter__`` / ``__anext__``,
+    ``.result`` and ``.is_complete`` for free.
+    """
+    def __init__(self) -> None:
+        self._result: RunResult | None = None
+        self._gen: AsyncIterator[Event] | None = None
+    def _start(self, gen: AsyncIterator[Event]) -> None:
+        self._gen = gen
+    def __aiter__(self) -> "BaseStreamHandle":
+        return self
+    async def __anext__(self) -> Event:
+        if self._gen is None:
+            raise RuntimeError("Stream not started; subclass forgot to call _start().")
+        return await self._gen.__anext__()
+    async def __aenter__(self) -> "BaseStreamHandle":
+        return self
+    async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
+        # Best-effort cleanup: ask the backend to cancel, then close the
+        # underlying generator so its `finally` block reaps any subprocess.
+        if not self.is_complete:
+            try:
+                await self.cancel()
+            except NotImplementedError:
+                pass
+            except Exception:
+                pass
+        if self._gen is not None and hasattr(self._gen, "aclose"):
+            try:
+                await self._gen.aclose()  # type: ignore[union-attr]
+            except Exception:
+                pass
+    @property
+    def result(self) -> RunResult:
+        if self._result is None:
+            raise StreamNotConsumedError(
+                "Stream has not been fully consumed yet. Iterate to completion "
+                "first, or read .result inside the `async with` block after "
+                "finishing the loop."
+            )
+        return self._result
+    @property
+    def is_complete(self) -> bool:
+        return self._result is not None
+    async def cancel(self, *, after_turn: bool = False) -> None:  # pragma: no cover
+        raise NotImplementedError("This backend does not support cancellation.")
+async def run_via_stream(handle: StreamHandle) -> RunResult:
+    """Drain a stream handle and return its final RunResult.
+    Backends usually implement ``run()`` as ``return await
+    run_via_stream(self.run_stream(...))`` so the streaming and non-streaming
+    paths share one code path.
+    """
+    async for _ in handle:
+        pass
+    return handle.result