arbor-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. arbor_agent-0.1.0/LICENSE +201 -0
  2. arbor_agent-0.1.0/PKG-INFO +458 -0
  3. arbor_agent-0.1.0/README.md +430 -0
  4. arbor_agent-0.1.0/arbor_agent.egg-info/PKG-INFO +458 -0
  5. arbor_agent-0.1.0/arbor_agent.egg-info/SOURCES.txt +120 -0
  6. arbor_agent-0.1.0/arbor_agent.egg-info/dependency_links.txt +1 -0
  7. arbor_agent-0.1.0/arbor_agent.egg-info/entry_points.txt +6 -0
  8. arbor_agent-0.1.0/arbor_agent.egg-info/requires.txt +14 -0
  9. arbor_agent-0.1.0/arbor_agent.egg-info/top_level.txt +1 -0
  10. arbor_agent-0.1.0/pyproject.toml +75 -0
  11. arbor_agent-0.1.0/setup.cfg +4 -0
  12. arbor_agent-0.1.0/src/__init__.py +7 -0
  13. arbor_agent-0.1.0/src/_app.py +30 -0
  14. arbor_agent-0.1.0/src/cli/__init__.py +1 -0
  15. arbor_agent-0.1.0/src/cli/_autodetect.py +101 -0
  16. arbor_agent-0.1.0/src/cli/_constants.py +81 -0
  17. arbor_agent-0.1.0/src/cli/app.py +100 -0
  18. arbor_agent-0.1.0/src/cli/branch_guard.py +128 -0
  19. arbor_agent-0.1.0/src/cli/chart.py +243 -0
  20. arbor_agent-0.1.0/src/cli/commands/__init__.py +1 -0
  21. arbor_agent-0.1.0/src/cli/commands/config_cmd.py +230 -0
  22. arbor_agent-0.1.0/src/cli/commands/doctor_cmd.py +134 -0
  23. arbor_agent-0.1.0/src/cli/commands/report_cmd.py +41 -0
  24. arbor_agent-0.1.0/src/cli/commands/run.py +921 -0
  25. arbor_agent-0.1.0/src/cli/commands/setup_cmd.py +133 -0
  26. arbor_agent-0.1.0/src/cli/companion.py +485 -0
  27. arbor_agent-0.1.0/src/cli/i18n.py +76 -0
  28. arbor_agent-0.1.0/src/cli/intake/__init__.py +16 -0
  29. arbor_agent-0.1.0/src/cli/intake/display.py +206 -0
  30. arbor_agent-0.1.0/src/cli/intake/launch_tool.py +190 -0
  31. arbor_agent-0.1.0/src/cli/intake/repl.py +744 -0
  32. arbor_agent-0.1.0/src/cli/intake/system_prompt.py +332 -0
  33. arbor_agent-0.1.0/src/cli/post_run.py +331 -0
  34. arbor_agent-0.1.0/src/cli/preflight.py +218 -0
  35. arbor_agent-0.1.0/src/cli/resume_picker.py +232 -0
  36. arbor_agent-0.1.0/src/cli/run_dashboard.py +2695 -0
  37. arbor_agent-0.1.0/src/cli/run_state.py +898 -0
  38. arbor_agent-0.1.0/src/cli/style.py +196 -0
  39. arbor_agent-0.1.0/src/cli/user_config.py +50 -0
  40. arbor_agent-0.1.0/src/coordinator/__init__.py +17 -0
  41. arbor_agent-0.1.0/src/coordinator/checkpoint.py +277 -0
  42. arbor_agent-0.1.0/src/coordinator/config.py +516 -0
  43. arbor_agent-0.1.0/src/coordinator/context_prune.py +219 -0
  44. arbor_agent-0.1.0/src/coordinator/convergence.py +362 -0
  45. arbor_agent-0.1.0/src/coordinator/hitl.py +73 -0
  46. arbor_agent-0.1.0/src/coordinator/idea_tree.py +583 -0
  47. arbor_agent-0.1.0/src/coordinator/main.py +255 -0
  48. arbor_agent-0.1.0/src/coordinator/orchestrator.py +1169 -0
  49. arbor_agent-0.1.0/src/coordinator/prompts.py +781 -0
  50. arbor_agent-0.1.0/src/coordinator/tools/__init__.py +140 -0
  51. arbor_agent-0.1.0/src/coordinator/tools/ask_user.py +117 -0
  52. arbor_agent-0.1.0/src/coordinator/tools/executor_run.py +1307 -0
  53. arbor_agent-0.1.0/src/coordinator/tools/git_ops.py +576 -0
  54. arbor_agent-0.1.0/src/coordinator/tools/search_ctx.py +586 -0
  55. arbor_agent-0.1.0/src/coordinator/tools/tree_ops.py +635 -0
  56. arbor_agent-0.1.0/src/core/__init__.py +111 -0
  57. arbor_agent-0.1.0/src/core/agent.py +824 -0
  58. arbor_agent-0.1.0/src/core/config.py +103 -0
  59. arbor_agent-0.1.0/src/core/config_cli.py +161 -0
  60. arbor_agent-0.1.0/src/core/config_resolve.py +309 -0
  61. arbor_agent-0.1.0/src/core/config_schema.py +388 -0
  62. arbor_agent-0.1.0/src/core/context.py +420 -0
  63. arbor_agent-0.1.0/src/core/experiment.py +282 -0
  64. arbor_agent-0.1.0/src/core/git_artifacts.py +63 -0
  65. arbor_agent-0.1.0/src/core/llm/__init__.py +13 -0
  66. arbor_agent-0.1.0/src/core/llm/base.py +203 -0
  67. arbor_agent-0.1.0/src/core/llm/claude.py +391 -0
  68. arbor_agent-0.1.0/src/core/llm/litellm_provider.py +182 -0
  69. arbor_agent-0.1.0/src/core/llm/openai_compat.py +408 -0
  70. arbor_agent-0.1.0/src/core/llm/openai_responses.py +398 -0
  71. arbor_agent-0.1.0/src/core/logging_setup.py +39 -0
  72. arbor_agent-0.1.0/src/core/skill_registry.py +144 -0
  73. arbor_agent-0.1.0/src/core/tools/__init__.py +74 -0
  74. arbor_agent-0.1.0/src/core/tools/base.py +106 -0
  75. arbor_agent-0.1.0/src/core/tools/bash.py +411 -0
  76. arbor_agent-0.1.0/src/core/tools/executor_tool.py +135 -0
  77. arbor_agent-0.1.0/src/core/tools/file_edit.py +201 -0
  78. arbor_agent-0.1.0/src/core/tools/file_read.py +178 -0
  79. arbor_agent-0.1.0/src/core/tools/file_write.py +69 -0
  80. arbor_agent-0.1.0/src/core/tools/glob_tool.py +91 -0
  81. arbor_agent-0.1.0/src/core/tools/grep.py +226 -0
  82. arbor_agent-0.1.0/src/core/tools/path_guard.py +36 -0
  83. arbor_agent-0.1.0/src/core/tools/run_training.py +444 -0
  84. arbor_agent-0.1.0/src/core/tools/skill.py +78 -0
  85. arbor_agent-0.1.0/src/core/tools/web/__init__.py +11 -0
  86. arbor_agent-0.1.0/src/core/tools/web/_coerce.py +72 -0
  87. arbor_agent-0.1.0/src/core/tools/web/prompts.py +20 -0
  88. arbor_agent-0.1.0/src/core/tools/web/search.py +404 -0
  89. arbor_agent-0.1.0/src/core/tools/web/visit.py +237 -0
  90. arbor_agent-0.1.0/src/dashboard.py +781 -0
  91. arbor_agent-0.1.0/src/events/__init__.py +14 -0
  92. arbor_agent-0.1.0/src/events/bus.py +126 -0
  93. arbor_agent-0.1.0/src/events/mock.py +60 -0
  94. arbor_agent-0.1.0/src/events/payloads.py +133 -0
  95. arbor_agent-0.1.0/src/events/subscribers/__init__.py +1 -0
  96. arbor_agent-0.1.0/src/events/subscribers/cli_logger.py +255 -0
  97. arbor_agent-0.1.0/src/events/subscribers/file_logger.py +58 -0
  98. arbor_agent-0.1.0/src/events/subscribers/stats_collector.py +111 -0
  99. arbor_agent-0.1.0/src/events/types.py +64 -0
  100. arbor_agent-0.1.0/src/executor/__init__.py +6 -0
  101. arbor_agent-0.1.0/src/executor/main.py +183 -0
  102. arbor_agent-0.1.0/src/executor/prompts.py +437 -0
  103. arbor_agent-0.1.0/src/plugins/__init__.py +5 -0
  104. arbor_agent-0.1.0/src/plugins/base.py +160 -0
  105. arbor_agent-0.1.0/src/plugins/mle_kaggle.yaml +269 -0
  106. arbor_agent-0.1.0/src/report/__init__.py +5 -0
  107. arbor_agent-0.1.0/src/report/generator.py +250 -0
  108. arbor_agent-0.1.0/src/review.py +325 -0
  109. arbor_agent-0.1.0/src/run.py +733 -0
  110. arbor_agent-0.1.0/src/search_agent/__init__.py +20 -0
  111. arbor_agent-0.1.0/src/search_agent/agent.py +146 -0
  112. arbor_agent-0.1.0/src/search_agent/main.py +118 -0
  113. arbor_agent-0.1.0/src/search_agent/prompts.py +130 -0
  114. arbor_agent-0.1.0/src/skills/first_principles_probe.md +34 -0
  115. arbor_agent-0.1.0/src/skills/idea_drafting.md +244 -0
  116. arbor_agent-0.1.0/src/webui/__init__.py +6 -0
  117. arbor_agent-0.1.0/src/webui/index.html +1036 -0
  118. arbor_agent-0.1.0/src/webui/launcher.py +50 -0
  119. arbor_agent-0.1.0/src/webui/server.py +320 -0
  120. arbor_agent-0.1.0/src/webui/snapshot.py +168 -0
  121. arbor_agent-0.1.0/tests/test_executor_resume.py +99 -0
  122. arbor_agent-0.1.0/tests/test_executor_resume_integration.py +385 -0
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2026 Renmin University of China & Microsoft Research
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -0,0 +1,458 @@
1
+ Metadata-Version: 2.4
2
+ Name: arbor-agent
3
+ Version: 0.1.0
4
+ Summary: Arbor — an autonomous research agent that proposes ideas, edits code, runs experiments, and iterates on a hypothesis tree.
5
+ Author-email: Jiajie Jin <jinjiajie@ruc.edu.cn>
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/RUC-NLPIR/Arbor
8
+ Project-URL: Repository, https://github.com/RUC-NLPIR/Arbor
9
+ Project-URL: Issues, https://github.com/RUC-NLPIR/Arbor/issues
10
+ Keywords: ai,agent,autonomous-research,llm,experimentation
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: anthropic>=0.52.0
15
+ Requires-Dist: openai>=1.30.0
16
+ Requires-Dist: litellm>=1.55.0
17
+ Requires-Dist: tiktoken>=0.7.0
18
+ Requires-Dist: typer>=0.12.0
19
+ Requires-Dist: rich>=13.0
20
+ Requires-Dist: pyyaml>=6.0
21
+ Requires-Dist: prompt-toolkit>=3.0
22
+ Requires-Dist: pydantic>=2.0
23
+ Requires-Dist: pydantic-settings>=2.0
24
+ Provides-Extra: docs
25
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
26
+ Requires-Dist: mkdocs-static-i18n>=1.2; extra == "docs"
27
+ Dynamic: license-file
28
+
29
+ <p align="center">
30
+ <img src="assets/hero.svg" alt="Arbor — Optimize anything" width="100%">
31
+ </p>
32
+
33
+
34
+ # Toward Generalist Autonomous Research via Hypothesis-Tree Refinement
35
+
36
+
37
+ <p align="center">
38
+ <a href="https://arxiv.org/pdf/2606.11926"><img src="https://img.shields.io/badge/Paper-arXiv-B31B1B?style=for-the-badge&logo=arxiv&logoColor=white" alt="Paper"></a>
39
+ <a href="https://github.com/RUC-NLPIR/Arbor"><img src="https://img.shields.io/badge/Code-GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="GitHub"></a>
40
+ <a href="https://RUC-NLPIR.github.io/Arbor/"><img src="https://img.shields.io/badge/Project_Page-Live-0E9B9B?style=for-the-badge&logo=githubpages&logoColor=white" alt="Project Page"></a>
41
+ <a href="https://RUC-NLPIR.github.io/Arbor/docs/"><img src="https://img.shields.io/badge/Docs-Material-526CFE?style=for-the-badge&logo=materialformkdocs&logoColor=white" alt="Docs"></a>
42
+ <a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-D22128?style=for-the-badge&logo=apache&logoColor=white" alt="License: Apache 2.0"></a>
43
+ </p>
44
+
45
+ <p align="center">
46
+ <b>English</b> | <a href="README.zh-CN.md">简体中文</a>
47
+ </p>
48
+
49
+ **Arbor is an autonomous research agent that turns a long-horizon objective into a
50
+ cumulative search.** Give it a benchmark and a goal; it proposes hypotheses, edits
51
+ code, runs real experiments, learns from the results, and keeps the improvements that
52
+ hold up on held-out data. Instead of one-shot attempts that forget what failed, Arbor
53
+ grows a **hypothesis tree**: every idea becomes a branch — pruned if it fails,
54
+ harvested if it works — and insights propagate back so later ideas start smarter.
55
+
56
+ For more details, visit our [project page](https://RUC-NLPIR.github.io/Arbor/)
57
+ and read the [paper](https://arxiv.org/pdf/2606.11926). For a more detailed usage manual,
58
+ see our [documentation](https://RUC-NLPIR.github.io/Arbor/docs/). 🧭 You can also
59
+ choose the [CLI or Skill version](#-cli-and-skill-versions) depending on your
60
+ environment and workflow.
61
+
62
+ ## 💡 Why Arbor
63
+
64
+ * **General-purpose optimization** — From model training and harness engineering
65
+ to data synthesis, Arbor can optimize any task as long as it has a target to
66
+ improve and a metric to measure progress.
67
+ * **Practical agent runtime** — Arbor is not only a research prototype; it ships
68
+ with both a native CLI runtime and an Agent Skill Suite for Codex and Claude
69
+ Code, so you can use the full CLI for the strongest Arbor behavior or load the
70
+ skill suite inside another coding agent.
71
+ * **Long-horizon structured exploration** — The hypothesis-tree framework lets
72
+ Arbor keep running as a cumulative search: results, failure modes, and
73
+ distilled insights persist in the Idea Tree and propagate upward, so later
74
+ ideas start smarter instead of being lost in a scrollback buffer.
75
+ * **Real experiment discipline** — Executors iterate on a dev split, validate on
76
+ a held-out test split, and only merge gains that clear a configurable margin,
77
+ reducing overfitting to the metric being optimized.
78
+ * **Isolated, reversible execution** — Every experiment runs in its own git
79
+ worktree on a dedicated branch, so your `main` branch is never touched until
80
+ you choose to merge.
81
+ * **Built for long experiments** — Long-running training is first-class, with
82
+ generous timeouts, partial-metric recovery on timeout, and optional staged
83
+ budgets from smoke to pilot to full runs.
84
+ * **Model and workflow flexibility** — Arbor supports Anthropic, OpenAI /
85
+ Responses API, and OpenAI-compatible backends through LiteLLM, including
86
+ DeepSeek, Gemini, Qwen, vLLM, Ollama, and local gateways.
87
+ * **Steerable and adaptable** — A live terminal dashboard, read-only WebUI,
88
+ optional human-in-the-loop review, and one-line domain plugins let you steer
89
+ experiments without changing Arbor's core code.
90
+
91
+ ## 🧩 Framework
92
+
93
+ <p align="center">
94
+ <img src="assets/framework.png" alt="Arbor framework" width="100%">
95
+ </p>
96
+
97
+ Arbor runs **two cooperating agents**:
98
+
99
+ - **Coordinator** — the research director. It maintains the Idea Tree, drives the
100
+ search via the *arbor cycle*, and dispatches experiments.
101
+ - **Executor** — the research engineer. Given one idea, it faithfully implements the
102
+ code changes, runs the experiment in an isolated git worktree, and reports evidence.
103
+
104
+ Together they repeat a six-step **arbor cycle**:
105
+
106
+ 1. **Observe** — the Coordinator re-grounds itself in the Idea Tree, reading the
107
+ active frontier, constraints, ancestor insights, recent evidence, and current
108
+ best artifact.
109
+ 2. **Ideate** — it chooses a parent node and proposes child hypotheses that refine,
110
+ correct, or extend what the tree has already learned.
111
+ 3. **Select** — it chooses the most promising pending leaves to test, balancing
112
+ the current best direction with unresolved alternatives.
113
+ 4. **Dispatch** — selected hypotheses are sent to independent Executors, which
114
+ implement them in fresh worktrees and evaluate them on the dev signal.
115
+ 5. **Backpropagate** — Arbor records each result, score, insight, and branch, then
116
+ abstracts the lesson upward so ancestor nodes and future ideas inherit it.
117
+ 6. **Decide** — the Coordinator chooses whether to merge, prune, continue, leave a
118
+ node pending, or stop, using held-out validation for merge decisions.
119
+
120
+ ## 🎬 Demo
121
+
122
+
123
+ https://github.com/user-attachments/assets/49c1a306-d2e9-49d6-9c83-65e38a62df30
124
+
125
+
126
+
127
+ ## 🚀 CLI And Skill Versions
128
+
129
+ This repository includes two ways to use Arbor:
130
+
131
+ | Version | Location | Best for | Recommendation |
132
+ | --- | --- | --- | --- |
133
+ | Native CLI runtime | Python package and `arbor` command | Real Arbor research runs, long experiments, dashboard, checkpoints, executor tools, merge/test discipline, plugins, reports | Recommended. This path is more complete, more reliable, and gives the best Arbor behavior. |
134
+ | Agent Skill Suite | [`skills/`](skills/README.md) | Codex or Claude Code environments where you want Arbor-style behavior without running the native Arbor runtime | Useful integration layer and fallback, but less complete than the CLI runtime. |
135
+
136
+ If you can run the CLI, use the CLI. The native `arbor` runtime contains the full
137
+ implementation: intake, Research Contract, live dashboard, EventBus,
138
+ checkpoint/resume, executor dispatch, protected dev/test evaluation discipline,
139
+ SearchAgent, plugins, and final report generation.
140
+
141
+ The repo-root [`skills/`](skills/README.md) directory is a Codex/Claude Code
142
+ skill suite. After installation, invoke `$arbor-research-agent` in Codex or
143
+ `/arbor-research-agent` in Claude Code and describe your research objective as
144
+ you would in Arbor. The skill suite performs Arbor-style clarification first
145
+ when target, metric, data, permissions, budget, or run mode are unclear, then
146
+ loads the orchestrator and phase skills. This is separate from the internal
147
+ runtime skills stored under `src/skills/`.
148
+
149
+ ---
150
+
151
+ ## 📦 Install
152
+
153
+ **Requirements:** Python ≥ 3.10 and Git. A virtual environment is recommended.
154
+
155
+ ```bash
156
+ git clone https://github.com/RUC-NLPIR/Arbor.git
157
+ cd Arbor
158
+ python -m venv .venv && source .venv/bin/activate # recommended
159
+ pip install -e . # or: uv pip install -e .
160
+ arbor doctor # verify PATH, git, API keys
161
+ ```
162
+
163
+ > Prefer a global command? `pipx install -e .` makes `arbor` available everywhere.
164
+ > For the docs site, `pip install -e ".[docs]" && mkdocs serve`, or read them online
165
+ > via the **Docs** badge above.
166
+
167
+ ---
168
+
169
+ ## ⚡ Getting Started
170
+
171
+ ```bash
172
+ arbor setup # one-time: configure provider / model / base_url / API key
173
+ arbor # start an interactive session in the current directory
174
+ arbor doctor # diagnose the install
175
+ ```
176
+
177
+ `arbor setup` writes `~/.arbor/config.yaml`, so day-to-day you can just run `arbor`
178
+ with no flags. The first thing Arbor does is an **intake conversation** that turns your
179
+ goal, target directory, metric, baseline, budget, dev/test discipline, and artifact
180
+ paths into a one-screen **Arbor Research Contract**. Once you confirm it, the live
181
+ dashboard takes over.
182
+
183
+ ```bash
184
+ # Point at a benchmark directory and a config
185
+ arbor --cwd ./benchmark --config research_config.yaml
186
+
187
+ # Give an initial goal up front; intake refines the rest
188
+ arbor "improve validation score without touching the test split" --cwd ./benchmark
189
+
190
+ # Small dry run
191
+ arbor --cwd ./benchmark --config research_config.yaml --max-cycles 3
192
+ ```
193
+
194
+ During a run you can type `/status`, `/tree`, `/evidence`, `/branches`, `/cost`,
195
+ `/pause`, `/resume`, `/report`, or `/abort`.
196
+
197
+ ### Prepare a benchmark
198
+
199
+ Your target directory should have:
200
+
201
+ - a runnable evaluation script (e.g. `run_eval.py`),
202
+ - evaluation data (ideally a **dev** split and a held-out **test** split), and
203
+ - a clean git repository (no uncommitted changes).
204
+
205
+ A minimal `research_config.yaml`:
206
+
207
+ ```yaml
208
+ # LLM/API live in `arbor setup`; project config is usually just the task and budget.
209
+ task: >
210
+ Optimize the agent's accuracy on the benchmark.
211
+ Do NOT modify the evaluation harness or data files.
212
+
213
+ coordinator:
214
+ max_cycles: 10 # arbor cycles to explore
215
+ max_depth: 2 # Idea Tree depth
216
+ merge_threshold: 5.0 # min held-out % gain to merge into trunk
217
+ ui:
218
+ interaction_mode: review # auto | direction | review | collaborative
219
+
220
+ executor:
221
+ max_turns: 100
222
+ ```
223
+
224
+ A copy-pasteable example with every option lives in
225
+ [`examples/research_config.example.yaml`](examples/research_config.example.yaml).
226
+
227
+ ---
228
+
229
+ ## 🧠 How It Works
230
+
231
+ ### The arbor cycle
232
+
233
+ Each cycle runs six steps:
234
+
235
+ ```
236
+ ① OBSERVE analyze current results and failure modes
237
+ ② IDEATE propose 1–3 new ideas from the analysis and tree insights
238
+ ③ SELECT pick the highest-priority idea to test
239
+ ④ DISPATCH run an Executor on it in an isolated git worktree
240
+ ⑤ BACKPROP record the result; abstract the insight up to ancestor nodes
241
+ ⑥ DECIDE continue / merge into trunk / prune / stop
242
+ ```
243
+
244
+
245
+ ### The Idea Tree
246
+
247
+ ```
248
+ ROOT (baseline: 20%)
249
+ ├── 1: Retrieval optimization [insight: "retrieval quality is the bottleneck"]
250
+ │ ├── 1.1: Constraint decomposition + verification [40%, merged]
251
+ │ ├── 1.2: Periodic re-read injection [40%, pruned — no net gain]
252
+ │ └── 1.3: Answer-extraction tuning [35%, pruned]
253
+ ├── 2: Multi-perspective search [insight: "search scaffolding hurts here"]
254
+ │ └── 2.1: Breadth-first search [25%, pruned]
255
+ └── 3: Code-level intervention [insight: "code-level > prompt-level"]
256
+ ├── 3.1: Continuation injection [70%, merged]
257
+ └── 3.2: ANSWER-tag extraction [45%, done]
258
+ ```
259
+
260
+ - **Depth 0 (Root):** the research objective and global insights.
261
+ - **Depth 1:** research directions (paper-title-level ideas).
262
+ - **Depth 2+:** concrete methods, implemented and tested by Executors.
263
+
264
+ ### Git strategy & evaluation
265
+
266
+ Each Executor works in its own worktree on a dedicated branch. Verified improvements merge
267
+ into a per-run `trunk`; you promote `trunk` into `main` only when satisfied
268
+ (`git merge research/run_xxx/trunk`). Executors iterate on a **dev** split, but a change is
269
+ kept only if it clears a margin on the **held-out test** split — guarding against
270
+ overfitting.
271
+
272
+ ### Human-in-the-loop
273
+
274
+ Set `ui.interaction_mode` (or `--interaction-mode`) to choose how much you steer:
275
+
276
+ | Mode | Behavior |
277
+ | --- | --- |
278
+ | `auto` | Fully autonomous. |
279
+ | `direction` | Asks you where to go next at ideation. |
280
+ | `review` | Pauses before each node and Executor. |
281
+ | `collaborative` | `direction` + `review`. |
282
+
283
+ When paused, your input opens an isolated discussion with a read-only companion — it never
284
+ pollutes the Coordinator's context. See [`docs/`](docs/index.md) for the full method.
285
+
286
+ ---
287
+
288
+ ## ⚙️ Configuration
289
+
290
+ LLM access is configured once with `arbor setup` (stored in `~/.arbor/config.yaml`) via a
291
+ single `provider` field:
292
+
293
+ | `provider` | Use it for |
294
+ | --- | --- |
295
+ | `auto` *(default)* | Let Arbor pick. It probes your endpoint's OpenAI **Responses** API and uses it when available (reasoning chain preserved), otherwise falls back to chat completions; Claude models use the native Anthropic API. The detected backend is frozen into the config. |
296
+ | `openai-responses` | OpenAI / o-series models via the Responses API (encrypted reasoning chain preserved across turns). |
297
+ | `openai-chat` | Any OpenAI-compatible chat-completions endpoint — DeepSeek / Qwen / GLM / vLLM / Ollama / local gateways. |
298
+ | `anthropic` | Claude via the native Anthropic Messages API (signed thinking + prompt caching). |
299
+
300
+ Most users just run `arbor setup`, keep `auto`, and fill in `model` + `base_url`. Keys come
301
+ from the environment or the config; per-project task and budget settings live in
302
+ `research_config.yaml`. See the
303
+ [configuration guide](https://RUC-NLPIR.github.io/Arbor/docs/configuration/) and
304
+ [`examples/research_config.example.yaml`](examples/research_config.example.yaml) for every
305
+ option.
306
+
307
+ ---
308
+
309
+ ## 🧰 CLI Reference
310
+
311
+ Day to day you only need `arbor`:
312
+
313
+ | Command | What it does |
314
+ | --- | --- |
315
+ | `arbor` | Start an interactive research session. |
316
+ | `arbor setup` | Configure provider / model / keys → `~/.arbor/config.yaml`. |
317
+ | `arbor report <session>` | Re-render `REPORT.md` for a past session. |
318
+ | `arbor doctor` | Diagnose install, PATH, git, and API keys. |
319
+ | `arbor version` | Print the installed version. |
320
+
321
+ Lower-level entry points (`run-research`, `coordinator`, `executor`, `review-research`)
322
+ remain for debugging — see the [CLI reference](https://RUC-NLPIR.github.io/Arbor/docs/cli/).
323
+
324
+ ---
325
+
326
+ ## 🔌 Plugins & Skills
327
+
328
+ A single line retargets the agent to a new domain — evaluation protocol, protected
329
+ data directories, required outputs, and timeout presets all come from the plugin:
330
+
331
+ ```yaml
332
+ plugin: mle_kaggle # switches to Kaggle/MLE mode
333
+ ```
334
+
335
+ A plugin is one YAML file (prompt-injection points + config overrides + profiles +
336
+ lifecycle hooks + an eval contract); a Skill is a markdown playbook the agent loads on
337
+ demand at runtime. A copy-pasteable Kaggle config lives in
338
+ [`examples/kaggle_config.example.yaml`](examples/kaggle_config.example.yaml).
339
+
340
+ ---
341
+
342
+ ## 💾 Output & Resume
343
+
344
+ Each run writes a session directory with `REPORT.md`, `events.jsonl`, `run_stats.json`, the
345
+ Idea Tree, and per-experiment artifacts under `.arbor/sessions/`. Runs are resumable —
346
+ interrupt with `Ctrl+C` and continue later with `--resume`; Arbor reloads the Idea Tree and
347
+ picks up where it left off.
348
+
349
+ ```bash
350
+ arbor report .arbor/sessions/<run_name> # re-render a past report
351
+ arbor --resume --run-name <run_name> # continue an interrupted run
352
+ ```
353
+
354
+ ---
355
+
356
+ ## 📊 Results
357
+
358
+ Arbor was evaluated as a single controller across model training, harness engineering,
359
+ and data synthesis — only the material, objective, evaluator, and budget change. It
360
+ wins the held-out test on all six tasks against strong single-agent baselines.
361
+
362
+ | Task | Direction | Initial | Codex | Claude Code | **Arbor** | Gain |
363
+ | --- | --- | --- | --- | --- | --- | --- |
364
+ | Optimizer Design | steps ↓ | 3325 | 3325 | 3287.5 | **3237.5** | +2.63% |
365
+ | Architecture Design | loss ↓ | 1.098 | 1.083 | 1.033 | **1.028** | +6.38% |
366
+ | Terminal-Bench 2.0 | pass ↑ | 69.81 | 73.59 | 71.70 | **77.36** | +7.55 |
367
+ | BrowseComp | acc ↑ | 45.33 | 50.00 | 53.33 | **67.67** | +22.34 |
368
+ | Search-Agent Data | gap ↑ | 5.00 | 9.00 | 12.00 | **18.00** | +13.0 |
369
+ | Math-Reasoning Data | gap ↑ | 1.04 | 6.25 | 8.33 | **20.83** | +19.79 |
370
+
371
+ On **MLE-Bench Lite** with GPT-5.5, Arbor reaches **86.36% Any-Medal** (100% valid
372
+ submissions, 95.45% above median, 77.27% gold). See the [paper](https://arxiv.org/pdf/2606.11926)
373
+ for full protocols and ablations.
374
+
375
+
376
+ ---
377
+
378
+ ## 🗂️ Project Structure
379
+
380
+ The code lives in `src/` and is imported as the `arbor` package.
381
+
382
+ ```
383
+ src/ # the `arbor` package
384
+ ├── core/ Shared infrastructure: ReAct loop, tools, LLM providers, context mgmt
385
+ ├── executor/ Executor agent + `executor` CLI
386
+ ├── coordinator/ Coordinator agent, Idea Tree, orchestrator, coordinator tools
387
+ ├── cli/ `arbor` CLI: intake, live dashboard, setup, doctor, config
388
+ ├── events/ Typed event bus and payloads
389
+ ├── report/ Report generation
390
+ ├── webui/ Read-only run-monitoring web server
391
+ ├── plugins/ Domain plugins (e.g. mle_kaggle.yaml)
392
+ ├── skills/ On-demand markdown playbooks
393
+ ├── dashboard.py HTML dashboard generator
394
+ ├── run.py `run-research` CLI
395
+ └── review.py `review-research` CLI
396
+ ```
397
+
398
+ ---
399
+
400
+ ## 🙏 Acknowledgements
401
+
402
+ Arbor is built on the excellent foundation of
403
+ [claw-code](https://github.com/ultraworkers/claw-code).
404
+
405
+ claw-code is an open-source Rust reimplementation of Claude Code. It provided
406
+ the REPL framework, tool-calling infrastructure, and cross-platform compilation
407
+ that made Arbor's CLI possible. Huge thanks to the ultraworkers team for their
408
+ outstanding work.
409
+
410
+ 🔗 claw-code: https://github.com/ultraworkers/claw-code
411
+
412
+ ---
413
+
414
+ ## 📚 Citation
415
+
416
+ ```bibtex
417
+ @misc{jin2026arbor,
418
+ title = {Toward Generalist Autonomous Research via Hypothesis-Tree Refinement},
419
+ author = {Jiajie Jin and Yuyang Hu and Kai Qiu and Qi Dai and Chong Luo and
420
+ Guanting Dong and Xiaoxi Li and Tong Zhao and Xiaolong Ma and
421
+ Gongrui Zhang and Zhirong Wu and Bei Liu and Zhengyuan Yang and
422
+ Linjie Li and Lijuan Wang and Hongjin Qian and Yutao Zhu and Zhicheng Dou},
423
+ year = {2026},
424
+ eprint = {2606.11926},
425
+ archivePrefix = {arXiv},
426
+ url = {https://arxiv.org/abs/2606.11926}
427
+ }
428
+ ```
429
+
430
+ ---
431
+
432
+ ## Star History
433
+
434
+ <picture>
435
+ <source
436
+ media="(prefers-color-scheme: dark)"
437
+ srcset="https://api.star-history.com/svg?repos=RUC-NLPIR/Arbor&type=Date&theme=dark"
438
+ />
439
+ <source
440
+ media="(prefers-color-scheme: light)"
441
+ srcset="https://api.star-history.com/svg?repos=RUC-NLPIR/Arbor&type=Date"
442
+ />
443
+ <img
444
+ alt="Star History Chart"
445
+ src="https://api.star-history.com/svg?repos=RUC-NLPIR/Arbor&type=Date"
446
+ />
447
+ </picture>
448
+
449
+ ---
450
+
451
+ ## 📄 License
452
+
453
+ Released under the [Apache License 2.0](LICENSE).
454
+
455
+ ---
456
+
457
+ Built at the Gaoling School of Artificial Intelligence, Renmin University of China, and
458
+ Microsoft Research.