PyPI - arbor-agent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

arbor-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

arbor/__init__.py +7 -0
arbor/_app.py +30 -0
arbor/cli/__init__.py +1 -0
arbor/cli/_autodetect.py +101 -0
arbor/cli/_constants.py +81 -0
arbor/cli/app.py +100 -0
arbor/cli/branch_guard.py +128 -0
arbor/cli/chart.py +243 -0
arbor/cli/commands/__init__.py +1 -0
arbor/cli/commands/config_cmd.py +230 -0
arbor/cli/commands/doctor_cmd.py +134 -0
arbor/cli/commands/report_cmd.py +41 -0
arbor/cli/commands/run.py +921 -0
arbor/cli/commands/setup_cmd.py +133 -0
arbor/cli/companion.py +485 -0
arbor/cli/i18n.py +76 -0
arbor/cli/intake/__init__.py +16 -0
arbor/cli/intake/display.py +206 -0
arbor/cli/intake/launch_tool.py +190 -0
arbor/cli/intake/repl.py +744 -0
arbor/cli/intake/system_prompt.py +332 -0
arbor/cli/post_run.py +331 -0
arbor/cli/preflight.py +218 -0
arbor/cli/resume_picker.py +232 -0
arbor/cli/run_dashboard.py +2695 -0
arbor/cli/run_state.py +898 -0
arbor/cli/style.py +196 -0
arbor/cli/user_config.py +50 -0
arbor/coordinator/__init__.py +17 -0
arbor/coordinator/checkpoint.py +277 -0
arbor/coordinator/config.py +516 -0
arbor/coordinator/context_prune.py +219 -0
arbor/coordinator/convergence.py +362 -0
arbor/coordinator/hitl.py +73 -0
arbor/coordinator/idea_tree.py +583 -0
arbor/coordinator/main.py +255 -0
arbor/coordinator/orchestrator.py +1169 -0
arbor/coordinator/prompts.py +781 -0
arbor/coordinator/tools/__init__.py +140 -0
arbor/coordinator/tools/ask_user.py +117 -0
arbor/coordinator/tools/executor_run.py +1307 -0
arbor/coordinator/tools/git_ops.py +576 -0
arbor/coordinator/tools/search_ctx.py +586 -0
arbor/coordinator/tools/tree_ops.py +635 -0
arbor/core/__init__.py +111 -0
arbor/core/agent.py +824 -0
arbor/core/config.py +103 -0
arbor/core/config_cli.py +161 -0
arbor/core/config_resolve.py +309 -0
arbor/core/config_schema.py +388 -0
arbor/core/context.py +420 -0
arbor/core/experiment.py +282 -0
arbor/core/git_artifacts.py +63 -0
arbor/core/llm/__init__.py +13 -0
arbor/core/llm/base.py +203 -0
arbor/core/llm/claude.py +391 -0
arbor/core/llm/litellm_provider.py +182 -0
arbor/core/llm/openai_compat.py +408 -0
arbor/core/llm/openai_responses.py +398 -0
arbor/core/logging_setup.py +39 -0
arbor/core/skill_registry.py +144 -0
arbor/core/tools/__init__.py +74 -0
arbor/core/tools/base.py +106 -0
arbor/core/tools/bash.py +411 -0
arbor/core/tools/executor_tool.py +135 -0
arbor/core/tools/file_edit.py +201 -0
arbor/core/tools/file_read.py +178 -0
arbor/core/tools/file_write.py +69 -0
arbor/core/tools/glob_tool.py +91 -0
arbor/core/tools/grep.py +226 -0
arbor/core/tools/path_guard.py +36 -0
arbor/core/tools/run_training.py +444 -0
arbor/core/tools/skill.py +78 -0
arbor/core/tools/web/__init__.py +11 -0
arbor/core/tools/web/_coerce.py +72 -0
arbor/core/tools/web/prompts.py +20 -0
arbor/core/tools/web/search.py +404 -0
arbor/core/tools/web/visit.py +237 -0
arbor/dashboard.py +781 -0
arbor/events/__init__.py +14 -0
arbor/events/bus.py +126 -0
arbor/events/mock.py +60 -0
arbor/events/payloads.py +133 -0
arbor/events/subscribers/__init__.py +1 -0
arbor/events/subscribers/cli_logger.py +255 -0
arbor/events/subscribers/file_logger.py +58 -0
arbor/events/subscribers/stats_collector.py +111 -0
arbor/events/types.py +64 -0
arbor/executor/__init__.py +6 -0
arbor/executor/main.py +183 -0
arbor/executor/prompts.py +437 -0
arbor/plugins/__init__.py +5 -0
arbor/plugins/base.py +160 -0
arbor/plugins/mle_kaggle.yaml +269 -0
arbor/report/__init__.py +5 -0
arbor/report/generator.py +250 -0
arbor/review.py +325 -0
arbor/run.py +733 -0
arbor/search_agent/__init__.py +20 -0
arbor/search_agent/agent.py +146 -0
arbor/search_agent/main.py +118 -0
arbor/search_agent/prompts.py +130 -0
arbor/skills/first_principles_probe.md +34 -0
arbor/skills/idea_drafting.md +244 -0
arbor/webui/__init__.py +6 -0
arbor/webui/index.html +1036 -0
arbor/webui/launcher.py +50 -0
arbor/webui/server.py +320 -0
arbor/webui/snapshot.py +168 -0
arbor_agent-0.1.0.dist-info/METADATA +458 -0
arbor_agent-0.1.0.dist-info/RECORD +115 -0
arbor_agent-0.1.0.dist-info/WHEEL +5 -0
arbor_agent-0.1.0.dist-info/entry_points.txt +6 -0
arbor_agent-0.1.0.dist-info/licenses/LICENSE +201 -0
arbor_agent-0.1.0.dist-info/top_level.txt +1 -0

arbor/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""arbor — AI-powered autonomous research framework.
+Sub-packages:
+- core: Shared infrastructure (Agent, tools, LLM providers, context management)
+- executor: Research executor that implements individual ideas
+- coordinator: arbor-guided orchestrator that manages the Idea Tree
+"""

arbor/_app.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Single-source-of-truth for the application's brand name.
+Future renames only need to change APP_NAME below; all derived strings
+(CLI command, config dir, config file) update automatically. Do not write
+the literal string "arbor" anywhere else in the codebase.
+"""
+from pathlib import Path
+APP_NAME = "arbor"
+CLI_COMMAND = APP_NAME
+# Product taglines, shown on the splash banner and in `--help`. Kept here as a
+# single source of truth so the two surfaces never drift. TAGLINE is the punchy
+# hero line; TAGLINE_SUB explains what the agent actually does (branch → prune →
+# harvest), mirroring the tree/arbor brand.
+TAGLINE = "Grow evidence, not logs."
+TAGLINE_SUB = "Every hypothesis becomes a branch — pruned if it fails, harvested if it works."
+CONFIG_DIR_NAME = f".{APP_NAME}"
+CONFIG_FILE_NAME = f"{APP_NAME}.yaml"
+GLOBAL_CONFIG_DIR = Path.home() / f".{APP_NAME}"
+GLOBAL_CONFIG_FILE = GLOBAL_CONFIG_DIR / "config.yaml"
+# Legacy paths kept for one release so users with a pre-rename config
+# don't lose their settings. The user_config loader falls back to these.
+LEGACY_GLOBAL_CONFIG_DIR = Path.home() / ".autoresearch"
+LEGACY_GLOBAL_CONFIG_FILE = LEGACY_GLOBAL_CONFIG_DIR / "config.yaml"

arbor/cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """CLI entry point for the arbor tool."""

arbor/cli/_autodetect.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""Setup-time backend auto-detection for ``provider: auto``.
+When the user picks ``auto`` we resolve it to a *concrete* backend **once**, at
+``arbor setup`` / ``arbor config init`` time, and freeze the result in the config
+file. The runtime stays pure and fast (``resolve_backend`` never touches the
+network); the only network probe happens here, during setup.
+Resolution rules:
+* ``claude*`` → ``anthropic`` (native Messages API: signed thinking blocks +
+  prompt caching), against the official endpoint or a custom ``base_url``.
+* Anything else → **probe** ``{base_url}/responses``. If the endpoint serves the
+  OpenAI Responses API we pick ``openai-responses`` so the reasoning chain is
+  preserved across ReAct turns; otherwise we fall back to ``openai-chat`` (chat
+  completions), which every OpenAI-compatible endpoint supports.
+The probe is best-effort and never raises — an inconclusive result (network
+error, bad key, …) falls back to ``openai-chat``, which the user can always
+override by setting ``provider`` explicitly.
+"""
+from __future__ import annotations
+import logging
+log = logging.getLogger(__name__)
+# How long to wait for the one-shot Responses probe before giving up and
+# falling back to chat completions. Setup is interactive, so keep it snappy.
+_PROBE_TIMEOUT = 10.0
+def probe_responses_api(
+    *,
+    model: str,
+    base_url: str | None,
+    api_key: str | None,
+    timeout: float = _PROBE_TIMEOUT,
+) -> bool:
+    """Best-effort: ``True`` iff ``{base_url}/responses`` actually answers this
+    model with a usable response.
+    Never raises. Only a clean success counts as "supported" — this is
+    deliberately conservative. A false positive (picking the Responses API when
+    it won't work) breaks every run, which is exactly the failure we're trying
+    to prevent; a false negative merely costs the reasoning-chain upgrade and
+    falls back to chat completions, which still works. Note that a route can
+    *exist* yet reject the model (e.g. a proxy that answers ``/responses`` with
+    400 "this model does not support the responses endpoint"), so "the route is
+    there" is not enough — we require an actual 2xx.
+    """
+    try:
+        from openai import OpenAI
+    except Exception:  # pragma: no cover - openai always installed in practice
+        return False
+    try:
+        client = OpenAI(
+            api_key=api_key or "dummy",
+            base_url=base_url or None,
+            max_retries=0,
+            timeout=timeout,
+        )
+        # Minimal request: no reasoning, tiny output. We only care whether the
+        # /responses route returns a usable response for this model.
+        resp = client.responses.create(model=model, input="ping", max_output_tokens=16)
+        # A genuine Responses API returns an object with an id; anything else
+        # (a chat shim echoing JSON, an empty body, …) is not the real thing.
+        return getattr(resp, "id", None) is not None
+    except Exception as e:
+        # 404 (no route), 400 (route exists but model unsupported / bad request),
+        # auth, connection, timeout — all inconclusive or negative. Fall back to
+        # the universal chat path.
+        log.debug("responses probe failed for %s @ %s: %s", model, base_url, e)
+        return False
+def resolve_auto_provider(
+    *,
+    model: str,
+    base_url: str | None,
+    api_key: str | None,
+    timeout: float = _PROBE_TIMEOUT,
+) -> tuple[str, str]:
+    """Resolve ``provider: auto`` to a concrete backend at setup time.
+    Returns ``(provider, reason)`` where ``provider`` is one of ``anthropic`` |
+    ``openai-responses`` | ``openai-chat`` (the user-facing menu values minus
+    ``auto``) and ``reason`` is a short human-readable note for the setup output.
+    """
+    bare = (model or "").rsplit("/", 1)[-1].lower()
+    if bare.startswith(("claude", "anthropic")):
+        return "anthropic", "Claude model → native Anthropic Messages API"
+    if probe_responses_api(model=model, base_url=base_url, api_key=api_key, timeout=timeout):
+        return (
+            "openai-responses",
+            "endpoint serves the Responses API → openai-responses (reasoning chain preserved)",
+        )
+    return "openai-chat", "no Responses API on this endpoint → openai-chat (chat completions)"

arbor/cli/_constants.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Shared CLI constants and small provider helpers.
+Single source of truth for values the ``run`` / ``config`` commands and the
+intake REPL all need, so they cannot drift apart — e.g. a provider added in one
+command but forgotten in another, or two copies of a "default model" helper that
+quietly disagree.
+"""
+from __future__ import annotations
+# User-facing provider menu (setup wizard + `config init --provider`), in
+# display order. Each is a single-axis value that maps 1:1 onto a backend, so
+# the config file reads the same as the menu. `auto` resolves to one of the
+# concrete three at setup time.
+PROVIDER_CHOICES = ("auto", "openai-responses", "openai-chat", "anthropic")
+# Concrete providers Arbor can store + serve after `auto` is resolved. `litellm`
+# stays a valid backend for back-compat / advanced hand-edited configs, but is
+# no longer advertised in the menu.
+_BACKEND_PROVIDERS = {"anthropic", "openai-responses", "openai-chat", "litellm"}
+VALID_OPENAI_APIS = {"chat", "responses"}
+# Intake-agent LLM call budget — seeded into the agent config by ``run`` and
+# applied directly by the REPL.
+INTAKE_LLM_TIMEOUT = 20.0
+INTAKE_LLM_PROVIDER_RETRIES = 0
+INTAKE_LLM_RETRY_ATTEMPTS = 2
+INTAKE_LLM_RETRY_BASE_DELAY = 1.0
+INTAKE_LLM_RETRY_MAX_DELAY = 2.0
+# Intake is a planning conversation (read the eval, propose a contract), not a
+# deep-reasoning task — so it overrides the user's reasoning_effort (often
+# "high") with a lighter setting to keep each turn snappy.
+INTAKE_REASONING_EFFORT = "low"
+DEFAULT_OPENAI_MODEL = "gpt-4o"
+DEFAULT_CLAUDE_MODEL = "claude-sonnet-4-20250514"
+# Read-only WebUI: the browser monitor binds here by default for interactive
+# runs (no flag needed). If the port is taken we walk the next few ports so a
+# second concurrent run doesn't collide.
+DEFAULT_WEBUI_PORT = 8765
+WEBUI_PORT_SCAN = 10
+def canonical_provider(provider: str | None, openai_api: str | None = None) -> str:
+    """Collapse any provider alias onto a single canonical, single-axis value.
+    Returns one of ``auto`` | ``anthropic`` | ``openai-responses`` |
+    ``openai-chat`` | ``litellm``. The legacy two-axis form (``openai`` plus
+    ``openai_api: chat|responses``) folds into the matching ``openai-*`` value,
+    so newly written configs only ever carry the single ``provider`` field.
+    """
+    p = (provider or "anthropic").strip().lower()
+    api = (openai_api or "").strip().lower()
+    if p == "auto":
+        return "auto"
+    if p in ("claude", "anthropic"):
+        return "anthropic"
+    if p == "litellm":
+        return "litellm"
+    if p in ("openai-chat", "chat", "openai_compat", "openai_chat"):
+        return "openai-chat"
+    if p in ("openai-responses", "responses", "openai_responses", "openai_response"):
+        return "openai-responses"
+    if p == "openai":  # legacy bare provider: respect the openai_api axis
+        return "openai-chat" if api == "chat" else "openai-responses"
+    return p  # unknown → passthrough; resolve_backend decides or errors later
+def default_model_for_provider(provider: str | None) -> str | None:
+    """Default model for ``provider``, or ``None`` to defer to the provider.
+    ``anthropic``/``auto`` return ``None`` because Claude supplies its own
+    default; the OpenAI family and litellm need an explicit model here. Callers
+    that must persist a concrete string substitute :data:`DEFAULT_CLAUDE_MODEL`.
+    """
+    canon = canonical_provider(provider)
+    if canon.startswith("openai") or canon == "litellm":
+        return DEFAULT_OPENAI_MODEL
+    return None

arbor/cli/app.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Top-level Typer app for the arbor CLI."""
+from __future__ import annotations
+import sys
+from difflib import get_close_matches
+import typer
+from .._app import APP_NAME, TAGLINE, TAGLINE_SUB
+from .commands.run import run_command
+from .commands.report_cmd import report_command
+from .commands.config_cmd import config_app
+from .commands.doctor_cmd import doctor_command
+from .commands.setup_cmd import setup_command
+# We don't use a Typer.callback() default because that would shadow flag
+# handling for `arbor --help`. Instead, we detect the no-subcommand case
+# in main() and rewrite argv to insert "run" before delegating.
+app = typer.Typer(
+    name=APP_NAME,
+    help=(
+        f"{APP_NAME} — {TAGLINE}\n\n"
+        f"{TAGLINE_SUB}\n\n"
+        f"Tip: run `{APP_NAME}` (no subcommand) inside your project to start "
+        f"an interactive session — equivalent to `{APP_NAME} run`."
+    ),
+    no_args_is_help=False,
+    add_completion=False,
+)
+app.command("run")(run_command)
+app.command("report")(report_command)
+app.command("doctor")(doctor_command)
+app.command("setup")(setup_command)
+app.add_typer(config_app, name="config")
+@app.command("version")
+def version_command() -> None:
+    """Print the installed version."""
+    try:
+        from importlib.metadata import version as _v
+        ver = _v(APP_NAME)
+    except Exception:
+        ver = "unknown"
+    typer.echo(f"{APP_NAME} {ver}")
+_KNOWN_COMMANDS = {"run", "report", "config", "version", "doctor", "setup"}
+_ROOT_FLAGS = {"--help", "-h"}
+_VERSION_FLAGS = {"--version", "-V"}
+def main() -> None:
+    """Console-script entry point.
+    If invoked with no subcommand (e.g. `arbor` or `arbor --cwd .`),
+    default to `run`. The only flags that stay at root level are --help / -h.
+    """
+    # Some terminals (notably macOS Terminal.app with "Set locale env vars on
+    # startup" off) hand Python a non-UTF-8 stdout, and any glyph or CJK text
+    # we print then raises UnicodeEncodeError and crashes. Force UTF-8 with
+    # replacement so the worst case is a "?" rather than a dead process.
+    for _stream in (sys.stdout, sys.stderr):
+        try:
+            enc = (getattr(_stream, "encoding", None) or "").lower()
+            if hasattr(_stream, "reconfigure") and enc not in ("utf-8", "utf8"):
+                _stream.reconfigure(encoding="utf-8", errors="replace")
+        except Exception:
+            pass
+    argv = sys.argv[1:]
+    first = argv[0] if argv else None
+    if first in _VERSION_FLAGS:
+        sys.argv = [sys.argv[0], "version", *argv[1:]]
+        app()
+        return
+    needs_default = (
+        not argv
+        or (first not in _KNOWN_COMMANDS and first not in _ROOT_FLAGS)
+    )
+    if first and first not in _KNOWN_COMMANDS and first not in _ROOT_FLAGS and not first.startswith("-"):
+        match = get_close_matches(first, sorted(_KNOWN_COMMANDS), n=1, cutoff=0.74)
+        if match:
+            typer.secho(
+                f"error: unknown command {first!r}. Did you mean {match[0]!r}?",
+                fg=typer.colors.RED,
+                err=True,
+            )
+            sys.exit(2)
+    if needs_default:
+        sys.argv = [sys.argv[0], "run", *argv]
+    app()
+if __name__ == "__main__":
+    main()

arbor/cli/branch_guard.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""Pre-launch git base-branch guard.
+Every research run creates its trunk/experiment branches off the project's base
+branch (``main``/``master``) and, when it finishes, leaves you checked out on the
+working trunk (``coordinator/trunk``). The *next* run in that repo would then hit
+the engine's "refusing to create a trunk from a non-base branch" guard and die
+after the whole dashboard has spun up.
+This module catches that situation up front and, in an interactive terminal,
+offers to switch back to the base branch (the common case) / proceed anyway /
+abort — so the user gets a one-keypress recovery instead of a raw error.
+"""
+from __future__ import annotations
+import subprocess
+from pathlib import Path
+from typing import Any, Literal
+def _git(cwd: Path, *args: str) -> str | None:
+    try:
+        return subprocess.check_output(
+            ["git", *args], cwd=str(cwd), stderr=subprocess.DEVNULL, text=True,
+        ).strip()
+    except (subprocess.CalledProcessError, OSError):
+        return None
+def current_branch(cwd: Path) -> str | None:
+    return _git(cwd, "branch", "--show-current") or None
+def _branch_exists(cwd: Path, name: str) -> bool:
+    return _git(cwd, "rev-parse", "--verify", "--quiet", f"refs/heads/{name}") is not None
+def resolve_base_branch(cwd: Path, configured: str | None) -> str | None:
+    """The repo's base branch: the configured one, else the first of main/master
+    that exists. None if we can't tell (not a git repo / detached / neither)."""
+    if configured:
+        return configured
+    for name in ("main", "master"):
+        if _branch_exists(cwd, name):
+            return name
+    return None
+def on_base_branch(cwd: Path, configured_base: str | None) -> tuple[bool, str | None, str | None]:
+    """Return (is_on_base, current, base). ``is_on_base`` is True when we can't
+    determine current/base (detached HEAD, no base) — the engine's own guard
+    stays the backstop for those odd states."""
+    cur = current_branch(cwd)
+    base = resolve_base_branch(cwd, configured_base)
+    if cur is None or base is None:
+        return True, cur, base
+    return cur == base, cur, base
+def resolve_start_branch(
+    cwd: Path,
+    config: Any,
+    *,
+    allow_non_base: bool,
+    interactive: bool,
+    console: Any,
+) -> Literal["proceed", "abort"]:
+    """Ensure the run starts from the base branch, or the user knowingly opts out.
+    Side effects: may ``git checkout`` the base branch, or set
+    ``config.require_base_branch = False`` when the user proceeds on a non-base
+    branch. Returns "proceed" or "abort".
+    """
+    on_base, cur, base = on_base_branch(cwd, config.base_branch)
+    if on_base:
+        return "proceed"
+    # Explicit opt-out (flag) — honor it without prompting.
+    if allow_non_base:
+        config.require_base_branch = False
+        return "proceed"
+    # Non-interactive (piped / --yes / CI): can't ask. Fail clean with the
+    # followable fix instead of crashing mid-run.
+    if not interactive:
+        from .style import render_error_panel
+        render_error_panel(
+            "not on the base branch",
+            f"Currently on '{cur}', but runs start from the base branch '{base}'.\n"
+            f"A previous run likely left you on '{cur}'. Either:\n"
+            f"  • git checkout {base}\n"
+            f"  • or re-run with --allow-non-base-branch to use this branch as-is.",
+        )
+        return "abort"
+    # Interactive: offer the one-keypress recovery.
+    import typer
+    console.print()
+    console.print(
+        f"[yellow]You're on branch [bold]{cur}[/], not the base branch "
+        f"[bold]{base}[/].[/]")
+    console.print(f"[dim]A previous run usually leaves you on '{cur}'.[/]\n")
+    console.print(f"  [bold]m[/]  checkout '{base}' and start fresh [dim](recommended)[/]")
+    console.print(f"  [bold]p[/]  proceed on '{cur}' as-is")
+    console.print("  [bold]a[/]  abort\n")
+    while True:
+        choice = typer.prompt(f"Checkout {base}, proceed, or abort? [m/p/a]",
+                              default="m").strip().lower()
+        if choice in ("m", "main", base):
+            if _git(cwd, "checkout", base) is None:
+                from .style import render_error_panel
+                render_error_panel(
+                    "checkout failed",
+                    f"Could not checkout '{base}' (uncommitted changes on '{cur}'?). "
+                    f"Resolve it manually, then re-run.",
+                )
+                return "abort"
+            console.print(f"[green]✓[/] now on '{base}'")
+            return "proceed"
+        if choice in ("p", "proceed"):
+            config.require_base_branch = False
+            console.print(f"[dim]proceeding on '{cur}'[/]")
+            return "proceed"
+        if choice in ("a", "abort", "q"):
+            return "abort"
+        console.print("[yellow]  enter m, p, or a[/]")