PyPI - arbor-agent - Versions diffs - 0.1.0__tar.gz - Mend

arbor-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

arbor_agent-0.1.0/LICENSE +201 -0
arbor_agent-0.1.0/PKG-INFO +458 -0
arbor_agent-0.1.0/README.md +430 -0
arbor_agent-0.1.0/arbor_agent.egg-info/PKG-INFO +458 -0
arbor_agent-0.1.0/arbor_agent.egg-info/SOURCES.txt +120 -0
arbor_agent-0.1.0/arbor_agent.egg-info/dependency_links.txt +1 -0
arbor_agent-0.1.0/arbor_agent.egg-info/entry_points.txt +6 -0
arbor_agent-0.1.0/arbor_agent.egg-info/requires.txt +14 -0
arbor_agent-0.1.0/arbor_agent.egg-info/top_level.txt +1 -0
arbor_agent-0.1.0/pyproject.toml +75 -0
arbor_agent-0.1.0/setup.cfg +4 -0
arbor_agent-0.1.0/src/__init__.py +7 -0
arbor_agent-0.1.0/src/_app.py +30 -0
arbor_agent-0.1.0/src/cli/__init__.py +1 -0
arbor_agent-0.1.0/src/cli/_autodetect.py +101 -0
arbor_agent-0.1.0/src/cli/_constants.py +81 -0
arbor_agent-0.1.0/src/cli/app.py +100 -0
arbor_agent-0.1.0/src/cli/branch_guard.py +128 -0
arbor_agent-0.1.0/src/cli/chart.py +243 -0
arbor_agent-0.1.0/src/cli/commands/__init__.py +1 -0
arbor_agent-0.1.0/src/cli/commands/config_cmd.py +230 -0
arbor_agent-0.1.0/src/cli/commands/doctor_cmd.py +134 -0
arbor_agent-0.1.0/src/cli/commands/report_cmd.py +41 -0
arbor_agent-0.1.0/src/cli/commands/run.py +921 -0
arbor_agent-0.1.0/src/cli/commands/setup_cmd.py +133 -0
arbor_agent-0.1.0/src/cli/companion.py +485 -0
arbor_agent-0.1.0/src/cli/i18n.py +76 -0
arbor_agent-0.1.0/src/cli/intake/__init__.py +16 -0
arbor_agent-0.1.0/src/cli/intake/display.py +206 -0
arbor_agent-0.1.0/src/cli/intake/launch_tool.py +190 -0
arbor_agent-0.1.0/src/cli/intake/repl.py +744 -0
arbor_agent-0.1.0/src/cli/intake/system_prompt.py +332 -0
arbor_agent-0.1.0/src/cli/post_run.py +331 -0
arbor_agent-0.1.0/src/cli/preflight.py +218 -0
arbor_agent-0.1.0/src/cli/resume_picker.py +232 -0
arbor_agent-0.1.0/src/cli/run_dashboard.py +2695 -0
arbor_agent-0.1.0/src/cli/run_state.py +898 -0
arbor_agent-0.1.0/src/cli/style.py +196 -0
arbor_agent-0.1.0/src/cli/user_config.py +50 -0
arbor_agent-0.1.0/src/coordinator/__init__.py +17 -0
arbor_agent-0.1.0/src/coordinator/checkpoint.py +277 -0
arbor_agent-0.1.0/src/coordinator/config.py +516 -0
arbor_agent-0.1.0/src/coordinator/context_prune.py +219 -0
arbor_agent-0.1.0/src/coordinator/convergence.py +362 -0
arbor_agent-0.1.0/src/coordinator/hitl.py +73 -0
arbor_agent-0.1.0/src/coordinator/idea_tree.py +583 -0
arbor_agent-0.1.0/src/coordinator/main.py +255 -0
arbor_agent-0.1.0/src/coordinator/orchestrator.py +1169 -0
arbor_agent-0.1.0/src/coordinator/prompts.py +781 -0
arbor_agent-0.1.0/src/coordinator/tools/__init__.py +140 -0
arbor_agent-0.1.0/src/coordinator/tools/ask_user.py +117 -0
arbor_agent-0.1.0/src/coordinator/tools/executor_run.py +1307 -0
arbor_agent-0.1.0/src/coordinator/tools/git_ops.py +576 -0
arbor_agent-0.1.0/src/coordinator/tools/search_ctx.py +586 -0
arbor_agent-0.1.0/src/coordinator/tools/tree_ops.py +635 -0
arbor_agent-0.1.0/src/core/__init__.py +111 -0
arbor_agent-0.1.0/src/core/agent.py +824 -0
arbor_agent-0.1.0/src/core/config.py +103 -0
arbor_agent-0.1.0/src/core/config_cli.py +161 -0
arbor_agent-0.1.0/src/core/config_resolve.py +309 -0
arbor_agent-0.1.0/src/core/config_schema.py +388 -0
arbor_agent-0.1.0/src/core/context.py +420 -0
arbor_agent-0.1.0/src/core/experiment.py +282 -0
arbor_agent-0.1.0/src/core/git_artifacts.py +63 -0
arbor_agent-0.1.0/src/core/llm/__init__.py +13 -0
arbor_agent-0.1.0/src/core/llm/base.py +203 -0
arbor_agent-0.1.0/src/core/llm/claude.py +391 -0
arbor_agent-0.1.0/src/core/llm/litellm_provider.py +182 -0
arbor_agent-0.1.0/src/core/llm/openai_compat.py +408 -0
arbor_agent-0.1.0/src/core/llm/openai_responses.py +398 -0
arbor_agent-0.1.0/src/core/logging_setup.py +39 -0
arbor_agent-0.1.0/src/core/skill_registry.py +144 -0
arbor_agent-0.1.0/src/core/tools/__init__.py +74 -0
arbor_agent-0.1.0/src/core/tools/base.py +106 -0
arbor_agent-0.1.0/src/core/tools/bash.py +411 -0
arbor_agent-0.1.0/src/core/tools/executor_tool.py +135 -0
arbor_agent-0.1.0/src/core/tools/file_edit.py +201 -0
arbor_agent-0.1.0/src/core/tools/file_read.py +178 -0
arbor_agent-0.1.0/src/core/tools/file_write.py +69 -0
arbor_agent-0.1.0/src/core/tools/glob_tool.py +91 -0
arbor_agent-0.1.0/src/core/tools/grep.py +226 -0
arbor_agent-0.1.0/src/core/tools/path_guard.py +36 -0
arbor_agent-0.1.0/src/core/tools/run_training.py +444 -0
arbor_agent-0.1.0/src/core/tools/skill.py +78 -0
arbor_agent-0.1.0/src/core/tools/web/__init__.py +11 -0
arbor_agent-0.1.0/src/core/tools/web/_coerce.py +72 -0
arbor_agent-0.1.0/src/core/tools/web/prompts.py +20 -0
arbor_agent-0.1.0/src/core/tools/web/search.py +404 -0
arbor_agent-0.1.0/src/core/tools/web/visit.py +237 -0
arbor_agent-0.1.0/src/dashboard.py +781 -0
arbor_agent-0.1.0/src/events/__init__.py +14 -0
arbor_agent-0.1.0/src/events/bus.py +126 -0
arbor_agent-0.1.0/src/events/mock.py +60 -0
arbor_agent-0.1.0/src/events/payloads.py +133 -0
arbor_agent-0.1.0/src/events/subscribers/__init__.py +1 -0
arbor_agent-0.1.0/src/events/subscribers/cli_logger.py +255 -0
arbor_agent-0.1.0/src/events/subscribers/file_logger.py +58 -0
arbor_agent-0.1.0/src/events/subscribers/stats_collector.py +111 -0
arbor_agent-0.1.0/src/events/types.py +64 -0
arbor_agent-0.1.0/src/executor/__init__.py +6 -0
arbor_agent-0.1.0/src/executor/main.py +183 -0
arbor_agent-0.1.0/src/executor/prompts.py +437 -0
arbor_agent-0.1.0/src/plugins/__init__.py +5 -0
arbor_agent-0.1.0/src/plugins/base.py +160 -0
arbor_agent-0.1.0/src/plugins/mle_kaggle.yaml +269 -0
arbor_agent-0.1.0/src/report/__init__.py +5 -0
arbor_agent-0.1.0/src/report/generator.py +250 -0
arbor_agent-0.1.0/src/review.py +325 -0
arbor_agent-0.1.0/src/run.py +733 -0
arbor_agent-0.1.0/src/search_agent/__init__.py +20 -0
arbor_agent-0.1.0/src/search_agent/agent.py +146 -0
arbor_agent-0.1.0/src/search_agent/main.py +118 -0
arbor_agent-0.1.0/src/search_agent/prompts.py +130 -0
arbor_agent-0.1.0/src/skills/first_principles_probe.md +34 -0
arbor_agent-0.1.0/src/skills/idea_drafting.md +244 -0
arbor_agent-0.1.0/src/webui/__init__.py +6 -0
arbor_agent-0.1.0/src/webui/index.html +1036 -0
arbor_agent-0.1.0/src/webui/launcher.py +50 -0
arbor_agent-0.1.0/src/webui/server.py +320 -0
arbor_agent-0.1.0/src/webui/snapshot.py +168 -0
arbor_agent-0.1.0/tests/test_executor_resume.py +99 -0
arbor_agent-0.1.0/tests/test_executor_resume_integration.py +385 -0

arbor_agent-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2026 Renmin University of China & Microsoft Research
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

arbor_agent-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,458 @@
+Metadata-Version: 2.4
+Name: arbor-agent
+Version: 0.1.0
+Summary: Arbor — an autonomous research agent that proposes ideas, edits code, runs experiments, and iterates on a hypothesis tree.
+Author-email: Jiajie Jin <jinjiajie@ruc.edu.cn>
+License-Expression: Apache-2.0
+Project-URL: Homepage, https://github.com/RUC-NLPIR/Arbor
+Project-URL: Repository, https://github.com/RUC-NLPIR/Arbor
+Project-URL: Issues, https://github.com/RUC-NLPIR/Arbor/issues
+Keywords: ai,agent,autonomous-research,llm,experimentation
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: anthropic>=0.52.0
+Requires-Dist: openai>=1.30.0
+Requires-Dist: litellm>=1.55.0
+Requires-Dist: tiktoken>=0.7.0
+Requires-Dist: typer>=0.12.0
+Requires-Dist: rich>=13.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: prompt-toolkit>=3.0
+Requires-Dist: pydantic>=2.0
+Requires-Dist: pydantic-settings>=2.0
+Provides-Extra: docs
+Requires-Dist: mkdocs-material>=9.5; extra == "docs"
+Requires-Dist: mkdocs-static-i18n>=1.2; extra == "docs"
+Dynamic: license-file
+<p align="center">
+  <img src="assets/hero.svg" alt="Arbor — Optimize anything" width="100%">
+</p>
+# Toward Generalist Autonomous Research via Hypothesis-Tree Refinement
+<p align="center">
+  <a href="https://arxiv.org/pdf/2606.11926"><img src="https://img.shields.io/badge/Paper-arXiv-B31B1B?style=for-the-badge&logo=arxiv&logoColor=white" alt="Paper"></a>
+  <a href="https://github.com/RUC-NLPIR/Arbor"><img src="https://img.shields.io/badge/Code-GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="GitHub"></a>
+  <a href="https://RUC-NLPIR.github.io/Arbor/"><img src="https://img.shields.io/badge/Project_Page-Live-0E9B9B?style=for-the-badge&logo=githubpages&logoColor=white" alt="Project Page"></a>
+  <a href="https://RUC-NLPIR.github.io/Arbor/docs/"><img src="https://img.shields.io/badge/Docs-Material-526CFE?style=for-the-badge&logo=materialformkdocs&logoColor=white" alt="Docs"></a>
+  <a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-D22128?style=for-the-badge&logo=apache&logoColor=white" alt="License: Apache 2.0"></a>
+</p>
+<p align="center">
+  <b>English</b> | <a href="README.zh-CN.md">简体中文</a>
+</p>
+**Arbor is an autonomous research agent that turns a long-horizon objective into a
+cumulative search.** Give it a benchmark and a goal; it proposes hypotheses, edits
+code, runs real experiments, learns from the results, and keeps the improvements that
+hold up on held-out data. Instead of one-shot attempts that forget what failed, Arbor
+grows a **hypothesis tree**: every idea becomes a branch — pruned if it fails,
+harvested if it works — and insights propagate back so later ideas start smarter.
+For more details, visit our [project page](https://RUC-NLPIR.github.io/Arbor/)
+and read the [paper](https://arxiv.org/pdf/2606.11926). For a more detailed usage manual,
+see our [documentation](https://RUC-NLPIR.github.io/Arbor/docs/). 🧭 You can also
+choose the [CLI or Skill version](#-cli-and-skill-versions) depending on your
+environment and workflow.
+## 💡 Why Arbor
+* **General-purpose optimization** — From model training and harness engineering
+  to data synthesis, Arbor can optimize any task as long as it has a target to
+  improve and a metric to measure progress.
+* **Practical agent runtime** — Arbor is not only a research prototype; it ships
+  with both a native CLI runtime and an Agent Skill Suite for Codex and Claude
+  Code, so you can use the full CLI for the strongest Arbor behavior or load the
+  skill suite inside another coding agent.
+* **Long-horizon structured exploration** — The hypothesis-tree framework lets
+  Arbor keep running as a cumulative search: results, failure modes, and
+  distilled insights persist in the Idea Tree and propagate upward, so later
+  ideas start smarter instead of being lost in a scrollback buffer.
+* **Real experiment discipline** — Executors iterate on a dev split, validate on
+  a held-out test split, and only merge gains that clear a configurable margin,
+  reducing overfitting to the metric being optimized.
+* **Isolated, reversible execution** — Every experiment runs in its own git
+  worktree on a dedicated branch, so your `main` branch is never touched until
+  you choose to merge.
+* **Built for long experiments** — Long-running training is first-class, with
+  generous timeouts, partial-metric recovery on timeout, and optional staged
+  budgets from smoke to pilot to full runs.
+* **Model and workflow flexibility** — Arbor supports Anthropic, OpenAI /
+  Responses API, and OpenAI-compatible backends through LiteLLM, including
+  DeepSeek, Gemini, Qwen, vLLM, Ollama, and local gateways.
+* **Steerable and adaptable** — A live terminal dashboard, read-only WebUI,
+  optional human-in-the-loop review, and one-line domain plugins let you steer
+  experiments without changing Arbor's core code.
+## 🧩 Framework
+<p align="center">
+  <img src="assets/framework.png" alt="Arbor framework" width="100%">
+</p>
+Arbor runs **two cooperating agents**:
+- **Coordinator** — the research director. It maintains the Idea Tree, drives the
+  search via the *arbor cycle*, and dispatches experiments.
+- **Executor** — the research engineer. Given one idea, it faithfully implements the
+  code changes, runs the experiment in an isolated git worktree, and reports evidence.
+Together they repeat a six-step **arbor cycle**:
+1. **Observe** — the Coordinator re-grounds itself in the Idea Tree, reading the
+   active frontier, constraints, ancestor insights, recent evidence, and current
+   best artifact.
+2. **Ideate** — it chooses a parent node and proposes child hypotheses that refine,
+   correct, or extend what the tree has already learned.
+3. **Select** — it chooses the most promising pending leaves to test, balancing
+   the current best direction with unresolved alternatives.
+4. **Dispatch** — selected hypotheses are sent to independent Executors, which
+   implement them in fresh worktrees and evaluate them on the dev signal.
+5. **Backpropagate** — Arbor records each result, score, insight, and branch, then
+   abstracts the lesson upward so ancestor nodes and future ideas inherit it.
+6. **Decide** — the Coordinator chooses whether to merge, prune, continue, leave a
+   node pending, or stop, using held-out validation for merge decisions.
+## 🎬 Demo
+https://github.com/user-attachments/assets/49c1a306-d2e9-49d6-9c83-65e38a62df30
+## 🚀 CLI And Skill Versions
+This repository includes two ways to use Arbor:
+| Version | Location | Best for | Recommendation |
+| --- | --- | --- | --- |
+| Native CLI runtime | Python package and `arbor` command | Real Arbor research runs, long experiments, dashboard, checkpoints, executor tools, merge/test discipline, plugins, reports | Recommended. This path is more complete, more reliable, and gives the best Arbor behavior. |
+| Agent Skill Suite | [`skills/`](skills/README.md) | Codex or Claude Code environments where you want Arbor-style behavior without running the native Arbor runtime | Useful integration layer and fallback, but less complete than the CLI runtime. |
+If you can run the CLI, use the CLI. The native `arbor` runtime contains the full
+implementation: intake, Research Contract, live dashboard, EventBus,
+checkpoint/resume, executor dispatch, protected dev/test evaluation discipline,
+SearchAgent, plugins, and final report generation.
+The repo-root [`skills/`](skills/README.md) directory is a Codex/Claude Code
+skill suite. After installation, invoke `$arbor-research-agent` in Codex or
+`/arbor-research-agent` in Claude Code and describe your research objective as
+you would in Arbor. The skill suite performs Arbor-style clarification first
+when target, metric, data, permissions, budget, or run mode are unclear, then
+loads the orchestrator and phase skills. This is separate from the internal
+runtime skills stored under `src/skills/`.
+---
+## 📦 Install
+**Requirements:** Python ≥ 3.10 and Git. A virtual environment is recommended.
+```bash
+git clone https://github.com/RUC-NLPIR/Arbor.git
+cd Arbor
+python -m venv .venv && source .venv/bin/activate   # recommended
+pip install -e .                                    # or: uv pip install -e .
+arbor doctor                                        # verify PATH, git, API keys
+```
+> Prefer a global command? `pipx install -e .` makes `arbor` available everywhere.
+> For the docs site, `pip install -e ".[docs]" && mkdocs serve`, or read them online
+> via the **Docs** badge above.
+---
+## ⚡ Getting Started
+```bash
+arbor setup       # one-time: configure provider / model / base_url / API key
+arbor             # start an interactive session in the current directory
+arbor doctor      # diagnose the install
+```
+`arbor setup` writes `~/.arbor/config.yaml`, so day-to-day you can just run `arbor`
+with no flags. The first thing Arbor does is an **intake conversation** that turns your
+goal, target directory, metric, baseline, budget, dev/test discipline, and artifact
+paths into a one-screen **Arbor Research Contract**. Once you confirm it, the live
+dashboard takes over.
+```bash
+# Point at a benchmark directory and a config
+arbor --cwd ./benchmark --config research_config.yaml
+# Give an initial goal up front; intake refines the rest
+arbor "improve validation score without touching the test split" --cwd ./benchmark
+# Small dry run
+arbor --cwd ./benchmark --config research_config.yaml --max-cycles 3
+```
+During a run you can type `/status`, `/tree`, `/evidence`, `/branches`, `/cost`,
+`/pause`, `/resume`, `/report`, or `/abort`.
+### Prepare a benchmark
+Your target directory should have:
+- a runnable evaluation script (e.g. `run_eval.py`),
+- evaluation data (ideally a **dev** split and a held-out **test** split), and
+- a clean git repository (no uncommitted changes).
+A minimal `research_config.yaml`:
+```yaml
+# LLM/API live in `arbor setup`; project config is usually just the task and budget.
+task: >
+  Optimize the agent's accuracy on the benchmark.
+  Do NOT modify the evaluation harness or data files.
+coordinator:
+  max_cycles: 10          # arbor cycles to explore
+  max_depth: 2            # Idea Tree depth
+  merge_threshold: 5.0    # min held-out % gain to merge into trunk
+  ui:
+    interaction_mode: review   # auto | direction | review | collaborative
+executor:
+  max_turns: 100
+```
+A copy-pasteable example with every option lives in
+[`examples/research_config.example.yaml`](examples/research_config.example.yaml).
+---
+## 🧠 How It Works
+### The arbor cycle
+Each cycle runs six steps:
+```
+① OBSERVE   analyze current results and failure modes
+② IDEATE    propose 1–3 new ideas from the analysis and tree insights
+③ SELECT    pick the highest-priority idea to test
+④ DISPATCH  run an Executor on it in an isolated git worktree
+⑤ BACKPROP  record the result; abstract the insight up to ancestor nodes
+⑥ DECIDE    continue / merge into trunk / prune / stop
+```
+### The Idea Tree
+```
+ROOT (baseline: 20%)
+├── 1: Retrieval optimization        [insight: "retrieval quality is the bottleneck"]
+│   ├── 1.1: Constraint decomposition + verification   [40%, merged]
+│   ├── 1.2: Periodic re-read injection                [40%, pruned — no net gain]
+│   └── 1.3: Answer-extraction tuning                  [35%, pruned]
+├── 2: Multi-perspective search      [insight: "search scaffolding hurts here"]
+│   └── 2.1: Breadth-first search                      [25%, pruned]
+└── 3: Code-level intervention       [insight: "code-level > prompt-level"]
+    ├── 3.1: Continuation injection                    [70%, merged]
+    └── 3.2: ANSWER-tag extraction                     [45%, done]
+```
+- **Depth 0 (Root):** the research objective and global insights.
+- **Depth 1:** research directions (paper-title-level ideas).
+- **Depth 2+:** concrete methods, implemented and tested by Executors.
+### Git strategy & evaluation
+Each Executor works in its own worktree on a dedicated branch. Verified improvements merge
+into a per-run `trunk`; you promote `trunk` into `main` only when satisfied
+(`git merge research/run_xxx/trunk`). Executors iterate on a **dev** split, but a change is
+kept only if it clears a margin on the **held-out test** split — guarding against
+overfitting.
+### Human-in-the-loop
+Set `ui.interaction_mode` (or `--interaction-mode`) to choose how much you steer:
+| Mode | Behavior |
+| --- | --- |
+| `auto` | Fully autonomous. |
+| `direction` | Asks you where to go next at ideation. |
+| `review` | Pauses before each node and Executor. |
+| `collaborative` | `direction` + `review`. |
+When paused, your input opens an isolated discussion with a read-only companion — it never
+pollutes the Coordinator's context. See [`docs/`](docs/index.md) for the full method.
+---
+## ⚙️ Configuration
+LLM access is configured once with `arbor setup` (stored in `~/.arbor/config.yaml`) via a
+single `provider` field:
+| `provider` | Use it for |
+| --- | --- |
+| `auto` *(default)* | Let Arbor pick. It probes your endpoint's OpenAI **Responses** API and uses it when available (reasoning chain preserved), otherwise falls back to chat completions; Claude models use the native Anthropic API. The detected backend is frozen into the config. |
+| `openai-responses` | OpenAI / o-series models via the Responses API (encrypted reasoning chain preserved across turns). |
+| `openai-chat` | Any OpenAI-compatible chat-completions endpoint — DeepSeek / Qwen / GLM / vLLM / Ollama / local gateways. |
+| `anthropic` | Claude via the native Anthropic Messages API (signed thinking + prompt caching). |
+Most users just run `arbor setup`, keep `auto`, and fill in `model` + `base_url`. Keys come
+from the environment or the config; per-project task and budget settings live in
+`research_config.yaml`. See the
+[configuration guide](https://RUC-NLPIR.github.io/Arbor/docs/configuration/) and
+[`examples/research_config.example.yaml`](examples/research_config.example.yaml) for every
+option.
+---
+## 🧰 CLI Reference
+Day to day you only need `arbor`:
+| Command | What it does |
+| --- | --- |
+| `arbor` | Start an interactive research session. |
+| `arbor setup` | Configure provider / model / keys → `~/.arbor/config.yaml`. |
+| `arbor report <session>` | Re-render `REPORT.md` for a past session. |
+| `arbor doctor` | Diagnose install, PATH, git, and API keys. |
+| `arbor version` | Print the installed version. |
+Lower-level entry points (`run-research`, `coordinator`, `executor`, `review-research`)
+remain for debugging — see the [CLI reference](https://RUC-NLPIR.github.io/Arbor/docs/cli/).
+---
+## 🔌 Plugins & Skills
+A single line retargets the agent to a new domain — evaluation protocol, protected
+data directories, required outputs, and timeout presets all come from the plugin:
+```yaml
+plugin: mle_kaggle   # switches to Kaggle/MLE mode
+```
+A plugin is one YAML file (prompt-injection points + config overrides + profiles +
+lifecycle hooks + an eval contract); a Skill is a markdown playbook the agent loads on
+demand at runtime. A copy-pasteable Kaggle config lives in
+[`examples/kaggle_config.example.yaml`](examples/kaggle_config.example.yaml).
+---
+## 💾 Output & Resume
+Each run writes a session directory with `REPORT.md`, `events.jsonl`, `run_stats.json`, the
+Idea Tree, and per-experiment artifacts under `.arbor/sessions/`. Runs are resumable —
+interrupt with `Ctrl+C` and continue later with `--resume`; Arbor reloads the Idea Tree and
+picks up where it left off.
+```bash
+arbor report .arbor/sessions/<run_name>   # re-render a past report
+arbor --resume --run-name <run_name>      # continue an interrupted run
+```
+---
+## 📊 Results
+Arbor was evaluated as a single controller across model training, harness engineering,
+and data synthesis — only the material, objective, evaluator, and budget change. It
+wins the held-out test on all six tasks against strong single-agent baselines.
+| Task | Direction | Initial | Codex | Claude Code | **Arbor** | Gain |
+| --- | --- | --- | --- | --- | --- | --- |
+| Optimizer Design | steps ↓ | 3325 | 3325 | 3287.5 | **3237.5** | +2.63% |
+| Architecture Design | loss ↓ | 1.098 | 1.083 | 1.033 | **1.028** | +6.38% |
+| Terminal-Bench 2.0 | pass ↑ | 69.81 | 73.59 | 71.70 | **77.36** | +7.55 |
+| BrowseComp | acc ↑ | 45.33 | 50.00 | 53.33 | **67.67** | +22.34 |
+| Search-Agent Data | gap ↑ | 5.00 | 9.00 | 12.00 | **18.00** | +13.0 |
+| Math-Reasoning Data | gap ↑ | 1.04 | 6.25 | 8.33 | **20.83** | +19.79 |
+On **MLE-Bench Lite** with GPT-5.5, Arbor reaches **86.36% Any-Medal** (100% valid
+submissions, 95.45% above median, 77.27% gold). See the [paper](https://arxiv.org/pdf/2606.11926)
+for full protocols and ablations.
+---
+## 🗂️ Project Structure
+The code lives in `src/` and is imported as the `arbor` package.
+```
+src/                 # the `arbor` package
+├── core/            Shared infrastructure: ReAct loop, tools, LLM providers, context mgmt
+├── executor/        Executor agent + `executor` CLI
+├── coordinator/     Coordinator agent, Idea Tree, orchestrator, coordinator tools
+├── cli/             `arbor` CLI: intake, live dashboard, setup, doctor, config
+├── events/          Typed event bus and payloads
+├── report/          Report generation
+├── webui/           Read-only run-monitoring web server
+├── plugins/         Domain plugins (e.g. mle_kaggle.yaml)
+├── skills/          On-demand markdown playbooks
+├── dashboard.py     HTML dashboard generator
+├── run.py           `run-research` CLI
+└── review.py        `review-research` CLI
+```
+---
+## 🙏 Acknowledgements
+Arbor is built on the excellent foundation of
+[claw-code](https://github.com/ultraworkers/claw-code).
+claw-code is an open-source Rust reimplementation of Claude Code. It provided
+the REPL framework, tool-calling infrastructure, and cross-platform compilation
+that made Arbor's CLI possible. Huge thanks to the ultraworkers team for their
+outstanding work.
+🔗 claw-code: https://github.com/ultraworkers/claw-code
+---
+## 📚 Citation
+```bibtex
+@misc{jin2026arbor,
+  title  = {Toward Generalist Autonomous Research via Hypothesis-Tree Refinement},
+  author = {Jiajie Jin and Yuyang Hu and Kai Qiu and Qi Dai and Chong Luo and
+            Guanting Dong and Xiaoxi Li and Tong Zhao and Xiaolong Ma and
+            Gongrui Zhang and Zhirong Wu and Bei Liu and Zhengyuan Yang and
+            Linjie Li and Lijuan Wang and Hongjin Qian and Yutao Zhu and Zhicheng Dou},
+  year   = {2026},
+  eprint = {2606.11926},
+  archivePrefix = {arXiv},
+  url    = {https://arxiv.org/abs/2606.11926}
+}
+```
+---
+## Star History
+<picture>
+  <source
+    media="(prefers-color-scheme: dark)"
+    srcset="https://api.star-history.com/svg?repos=RUC-NLPIR/Arbor&type=Date&theme=dark"
+  />
+  <source
+    media="(prefers-color-scheme: light)"
+    srcset="https://api.star-history.com/svg?repos=RUC-NLPIR/Arbor&type=Date"
+  />
+  <img
+    alt="Star History Chart"
+    src="https://api.star-history.com/svg?repos=RUC-NLPIR/Arbor&type=Date"
+  />
+</picture>
+---
+## 📄 License
+Released under the [Apache License 2.0](LICENSE).
+---
+Built at the Gaoling School of Artificial Intelligence, Renmin University of China, and
+Microsoft Research.