academic-army 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.editorconfig +9 -0
  2. package/.github/workflows/publish.yml +44 -0
  3. package/.prettierrc.json +3 -0
  4. package/LICENSE +21 -0
  5. package/README.md +172 -0
  6. package/README.zh-CN.md +172 -0
  7. package/agent-forge.yaml +83 -0
  8. package/eslint.config.js +28 -0
  9. package/install_mcp.py +85 -0
  10. package/mcp-server/__main__.py +33 -0
  11. package/mcp-server/deepresearch/__init__.py +3 -0
  12. package/mcp-server/deepresearch/tools.py +33 -0
  13. package/mcp-server/requirements.txt +4 -0
  14. package/metaskills/README.md +131 -0
  15. package/metaskills/README.zh-CN.md +131 -0
  16. package/metaskills/academic-army-architect/METASKILL.md +91 -0
  17. package/metaskills/academic-army-architect/envolve.sh +9 -0
  18. package/metaskills/academic-army-coding-plan/ENVOLVETASK.md +1 -0
  19. package/metaskills/academic-army-coding-plan/METASKILL.md +118 -0
  20. package/metaskills/academic-army-coding-plan/envolve.sh +9 -0
  21. package/metaskills/academic-army-coding-style/METASKILL.md +292 -0
  22. package/metaskills/academic-army-experiment-plan/ENVOLVETASK.md +1 -0
  23. package/metaskills/academic-army-experiment-plan/METASKILL.md +82 -0
  24. package/metaskills/academic-army-experiment-plan/envolve.sh +9 -0
  25. package/metaskills/academic-army-repo-scaffold/ENVOLVETASK.md +1 -0
  26. package/metaskills/academic-army-repo-scaffold/METASKILL.md +223 -0
  27. package/metaskills/academic-army-repo-scaffold/envolve.sh +9 -0
  28. package/package.json +35 -0
  29. package/runs/develop-skill.sh +17 -0
  30. package/runs/develop.sh +16 -0
  31. package/skills/academic-army-architect/SKILL.md +336 -0
  32. package/skills/academic-army-architect/agents/openai.yaml +11 -0
  33. package/skills/academic-army-architect/references/blueprint-schema.md +345 -0
  34. package/skills/academic-army-coding-plan/SKILL.md +491 -0
  35. package/skills/academic-army-coding-plan/agents/openai.yaml +11 -0
  36. package/skills/academic-army-coding-style/SKILL.md +915 -0
  37. package/skills/academic-army-coding-style/agents/openai.yaml +11 -0
  38. package/skills/academic-army-experiment-plan/SKILL.md +517 -0
  39. package/skills/academic-army-experiment-plan/agents/openai.yaml +11 -0
  40. package/skills/academic-army-repo-scaffold/SKILL.md +756 -0
  41. package/skills/academic-army-repo-scaffold/agents/openai.yaml +10 -0
  42. package/src/README.md +79 -0
  43. package/src/README.zh-CN.md +79 -0
  44. package/src/cli.ts +55 -0
  45. package/src/developing/README.md +146 -0
  46. package/src/developing/README.zh-CN.md +146 -0
  47. package/src/developing/agents/developer.ts +40 -0
  48. package/src/developing/agents/factory.ts +11 -0
  49. package/src/developing/agents/index.ts +8 -0
  50. package/src/developing/agents/manager.ts +74 -0
  51. package/src/developing/agents/prompts.ts +12 -0
  52. package/src/developing/agents/reviewer.ts +44 -0
  53. package/src/developing/agents/trajectory-optimizer.ts +70 -0
  54. package/src/developing/agents/types.ts +41 -0
  55. package/src/developing/index.ts +2 -0
  56. package/src/developing/pipeline.ts +306 -0
  57. package/src/developing/pipelineskill.ts +169 -0
  58. package/src/evolve-skill/README.md +116 -0
  59. package/src/evolve-skill/README.zh-CN.md +116 -0
  60. package/src/evolve-skill/agents/evaluator.ts +28 -0
  61. package/src/evolve-skill/agents/factory.ts +11 -0
  62. package/src/evolve-skill/agents/index.ts +4 -0
  63. package/src/evolve-skill/agents/modifier.ts +27 -0
  64. package/src/evolve-skill/agents/runner.ts +19 -0
  65. package/src/evolve-skill/index.ts +1 -0
  66. package/src/evolve-skill/pipeline.ts +140 -0
  67. package/src/pipeline.ts +65 -0
  68. package/tsconfig.json +22 -0
package/.editorconfig ADDED
@@ -0,0 +1,9 @@
1
+ root = true
2
+
3
+ [*]
4
+ charset = utf-8
5
+ end_of_line = lf
6
+ indent_style = space
7
+ indent_size = 2
8
+ insert_final_newline = true
9
+ trim_trailing_whitespace = true
@@ -0,0 +1,44 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - package.json
7
+ workflow_dispatch:
8
+
9
+ permissions:
10
+ contents: read
11
+ id-token: write
12
+
13
+ jobs:
14
+ publish:
15
+ name: Publish to npm
16
+ runs-on: ubuntu-latest
17
+
18
+ steps:
19
+ - name: Checkout
20
+ uses: actions/checkout@v6
21
+
22
+ - name: Setup Node.js
23
+ uses: actions/setup-node@v6
24
+ with:
25
+ node-version: "20.x"
26
+ package-manager-cache: false
27
+
28
+ - name: Update npm
29
+ run: npm install -g npm@latest
30
+
31
+ - name: Install dependencies
32
+ run: npm ci
33
+
34
+ - name: Lint
35
+ run: npm run lint
36
+
37
+ - name: Type check
38
+ run: npm run check
39
+
40
+ - name: Build
41
+ run: npm run build
42
+
43
+ - name: Publish to npm
44
+ run: npm publish --provenance
@@ -0,0 +1,3 @@
1
+ {
2
+ "printWidth": 100
3
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Howard Yin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,172 @@
1
+ # AcademicArmy
2
+
3
+ AcademicArmy is a Codex-based workflow for turning research ideas into structured paper-planning artifacts and an implementation codebase. Its current core is a sequence of planning skills, a repository scaffold skill, and TypeScript pipelines that run development and skill-evolution agents from those artifacts.
4
+
5
+ > Status: experimental workflow infrastructure. The generated project lives under `output/`, which is ignored by git.
6
+
7
+ [中文说明](README.zh-CN.md)
8
+
9
+ ## Why It Exists
10
+
11
+ The central principle of AcademicArmy is: build according to the planning artifacts.
12
+
13
+ `paper_blueprint.md`, `experiment_plan.md`, and `coding_plan.md` should be specific enough that downstream agents can implement the project without redesigning its research direction, evidence strategy, or code contract during development.
14
+
15
+ Parts that require fine-grained research should mainly be handled by skills that know how to call Deep Research through APIs. This avoids saving large amounts of local data only for retrieval, keeping the project lighter and making research updates easier to refresh.
16
+
17
+ ## How The Workflow Fits Together
18
+
19
+ First use three planning skills to produce three AI-facing Markdown artifacts:
20
+
21
+ | Step | Artifact | Role |
22
+ |---|---|---|
23
+ | `academic-army-architect` | `paper_blueprint.md` | The strategic paper blueprint that fixes the paper identity, target venue posture, claims, contribution boundary, candidate method space, evidence needs, and downstream constraints. |
24
+ | `academic-army-experiment-plan` | `experiment_plan.md` | The experiment strategy that maps paper claims to evidence, datasets or workloads, metrics, baselines, ablations, robustness checks, and reviewer-facing validation needs. |
25
+ | `academic-army-coding-plan` | `coding_plan.md` | The implementation contract that turns the blueprint and experiment plan into logical module boundaries, interface and entrypoint semantics, harnesses, testing categories, raw-result artifact schemas, and method-freeze rules. |
26
+
27
+ Each planning skill also writes a Chinese `*.explain.md` companion for human review, but the development runner consumes the three English Markdown files above.
28
+
29
+ Planning skills use a fixed language split. AI-facing artifacts such as `paper_blueprint.md`, `experiment_plan.md`, and `coding_plan.md` are written in English and contain only the plan or specification. Their companion explanation files, such as `paper_blueprint.explain.md`, `experiment_plan.explain.md`, and `coding_plan.explain.md`, are written in Chinese so the user can review the reasoning, trade-offs, and confirmation state.
30
+
31
+ ## Quick Start
32
+
33
+ ### 1. Install local dependencies
34
+
35
+ Install package dependencies once:
36
+
37
+ ```bash
38
+ npm install
39
+ ```
40
+
41
+ Install MCP server dependencies from [`mcp-server/requirements.txt`](mcp-server/requirements.txt) if needed:
42
+
43
+ ```bash
44
+ python -m pip install -r ./mcp-server/requirements.txt
45
+ ```
46
+
47
+ ### 2. Configure DeepResearch MCP
48
+
49
+ Create `.env` in the repository root:
50
+
51
+ ```env
52
+ OPENAI_API_KEY=your_api_key_here
53
+ ```
54
+
55
+ For project pipeline runs, use the `academic_army_mcp_tools` server through [`agent-forge.yaml`](agent-forge.yaml). That config launches the server as `python -m mcp-server` with `PYTHONPATH=.` and `cwd=.` from the repository root, so the evolve/developing runners do not need a separate Codex MCP installation step.
56
+
57
+ When running AcademicArmy skills directly in Codex, use [`install_mcp.py`](install_mcp.py) to install the same MCP server into Codex so the skill can call `academic_army_mcp_tools.deepresearch` outside the project pipeline:
58
+
59
+ ```bash
60
+ python install_mcp.py
61
+ ```
62
+
63
+ ### 3. Generate the planning artifacts
64
+
65
+ Start with an idea. The idea can be rough or detailed; it does not need to be a complete research plan.
66
+
67
+ Use `academic-army-architect` to turn the idea into `paper_blueprint.md`. Because an early idea is usually underspecified, this step may involve multiple rounds of clarification and revision before the blueprint is specific enough to guide downstream work.
68
+
69
+ Once you are satisfied with the paper blueprint, continue with the next planning skills to derive `experiment_plan.md` and `coding_plan.md`. Those three artifacts become the project starting point for repository scaffolding and iterative code development.
70
+
71
+ ### 4. Initialize the codebase scaffold
72
+
73
+ After the three planning artifacts are ready, use `academic-army-repo-scaffold` to initialize a real starter repository for the codebase. It uses DeepResearch to choose a template, official initializer, or high-quality template repository, generates the starter repository, then adds the fixed experiment directories `data/`, `output/`, `results/`, and `harness/`. It writes dependency declarations and repo-local installation instructions, records installable dependencies and reference-only sources in `REFERENCES.md` and `REFERENCES.zh-CN.md`, preserves the template's test layout, and keeps README text focused on the current repository structure and usage.
74
+
75
+ The repo scaffold skill does not implement paper methods, harness logic, tests, metrics, loaders, exporters, or experiment runners. Those belong to later implementation work.
76
+
77
+ ### 5. Run the development loop
78
+
79
+ After the planning artifacts are ready, run:
80
+
81
+ ```bash
82
+ bash runs/develop.sh
83
+ ```
84
+
85
+ Run [`runs/develop.sh`](runs/develop.sh) to call the TypeScript `developing` pipeline, which reads the three planning artifacts and iteratively writes code under `output/codebase`. See [`src/README.md`](src/README.md) for the TypeScript entry points and [`src/developing/README.md`](src/developing/README.md) for the development loop implementation.
86
+
87
+ ## Common Tasks
88
+
89
+ ### Improve a planning skill output
90
+
91
+ If the direct output from `academic-army-architect`, `academic-army-experiment-plan`, or `academic-army-coding-plan` is not satisfactory, do not only patch the generated artifact by hand. Add the concrete dissatisfaction, preferred behavior, and failure pattern to the matching metaskill under [`metaskills/`](metaskills/), then run the corresponding `envolve.sh` script for several rounds.
92
+
93
+ Running those scripts calls the TypeScript [`evolve-skill`](src/evolve-skill/README.md) pipeline. Unlike directly running a skill once, `evolve-skill` is a small multi-agent loop: fresh runner agents test the skill on fixed tasks, an evaluator agent judges the produced artifacts against the metaskill, and a modifier agent revises the skill itself from that review. A few rounds usually make the next direct skill output much closer to the desired shape.
94
+
95
+ ### Run TypeScript pipelines directly
96
+
97
+ Use the shell scripts as convenience wrappers around these TypeScript pipelines:
98
+
99
+ ```bash
100
+ npm run developing
101
+ npm run developing-skill
102
+ npm run evolve-skill
103
+ ```
104
+
105
+ For the shared CLI and pipeline structure, see [`src/README.md`](src/README.md).
106
+
107
+ ### Call DeepResearch
108
+
109
+ AcademicArmy includes a local stdio MCP implementation in the [`mcp-server`](mcp-server) directory. It exposes one tool:
110
+
111
+ - `deepresearch(prompt: str)`: runs the prompt with OpenAI Responses using `gpt-5.5`, high reasoning, web search, background mode, and source inclusion.
112
+
113
+ Agents should call the `deepresearch` tool with a single self-contained prompt. For example:
114
+
115
+ ```text
116
+ Use deepresearch with prompt:
117
+ Find the closest papers to this research idea, compare their methods, and return a cited structured report.
118
+ ```
119
+
120
+ ## Project Structure
121
+
122
+ | Path | Purpose |
123
+ |---|---|
124
+ | `agent-forge.yaml` | Agent and team wiring. |
125
+ | `install_mcp.py` | Installs the project MCP server into Codex for direct skill runs. |
126
+ | `mcp-server/` | Local stdio MCP implementation that exposes `deepresearch`. |
127
+ | `skills/` | Prepared AcademicArmy skills. |
128
+ | `metaskills/` | Matching metaskill design/evolution files. |
129
+ | `runs/` | Convenience wrappers around TypeScript pipelines. |
130
+ | `src/` | TypeScript pipeline structure and implementation notes. |
131
+ | `output/` | Generated planning artifacts, codebase output, and archives. |
132
+
133
+ Agent and team wiring lives in [`agent-forge.yaml`](agent-forge.yaml). The current TypeScript agents are implemented under [`src/developing/agents`](src/developing/agents) and [`src/evolve-skill/agents`](src/evolve-skill/agents).
134
+
135
+ Prepared AcademicArmy skills live under [`skills/`](skills/), and their matching metaskill design/evolution files live under [`metaskills/`](metaskills/).
136
+
137
+ ## Configuration Reference
138
+
139
+ | File or variable | Required for | Notes |
140
+ |---|---|---|
141
+ | `.env` / `OPENAI_API_KEY` | DeepResearch MCP | Read by the MCP server and by `install_mcp.py`. |
142
+ | `agent-forge.yaml` | Project pipelines | Launches `academic_army_mcp_tools` as `python -m mcp-server` with `PYTHONPATH=.` and `cwd=.`. |
143
+ | `secret.yaml` | Prepared shell scripts | Local ignored config overlay used by the prepared wrappers. It may contain passwords, API keys, runtime credentials, or other private values that must not be committed or uploaded to GitHub. |
144
+
145
+ To override or add environment variables directly when installing MCP into Codex, repeat `-e/--env NAME=VALUE`:
146
+
147
+ ```bash
148
+ python install_mcp.py -e OPENAI_API_KEY=your_api_key_here
149
+ ```
150
+
151
+ Running the installer refreshes the Codex `academic_army_mcp_tools` entry, registers the current Python executable with `-m mcp-server`, sets the repository root as the MCP working directory, reads `.env`, and forwards those values to the MCP server.
152
+
153
+ ## Troubleshooting
154
+
155
+ | Problem | Likely cause | Fix |
156
+ |---|---|---|
157
+ | `OPENAI_API_KEY` is missing | `.env` is not present or was not forwarded to Codex MCP. | Create `.env`; when running skills directly in Codex, rerun `python install_mcp.py`. |
158
+ | A wrapper cannot find `secret.yaml` | The prepared shell scripts pass a local config overlay for private values such as passwords, API keys, and runtime credentials. | Create local `secret.yaml` or adjust the script to use your config files. Do not commit or upload this file to GitHub. |
159
+ | Development output is drifting | The planning artifacts are not specific enough. | Revise `paper_blueprint.md`, `experiment_plan.md`, and `coding_plan.md` before continuing development. |
160
+
161
+ ## Development
162
+
163
+ Use the normal TypeScript checks before changing runner code:
164
+
165
+ ```bash
166
+ npm run check
167
+ npm run lint
168
+ ```
169
+
170
+ ## License
171
+
172
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,172 @@
1
+ # AcademicArmy
2
+
3
+ AcademicArmy 是一个基于 Codex 的研究工作流,用来把研究想法转成结构化论文规划产物和可持续开发的实现代码库。当前核心由一组规划类 skills、一个仓库 scaffold skill,以及驱动开发和 skill evolution agents 的 TypeScript pipelines 组成。
4
+
5
+ > 当前状态:实验性工作流基础设施。生成的项目和运行产物位于 `output/`,该目录被 git 忽略。
6
+
7
+ [English README](README.md)
8
+
9
+ ## 为什么需要它
10
+
11
+ AcademicArmy 的主体核心可以概括为一句话:按图施工。
12
+
13
+ 这里的“施工图”就是 `paper_blueprint.md`、`experiment_plan.md` 和 `coding_plan.md`。它们应该足够具体,让下游 agents 在实现阶段不需要重新设计研究方向、证据策略或代码契约。
14
+
15
+ 需要精细调研的部分,主要通过会使用 API 的 skill 调用 Deep Research 来完成。这样可以避免为了检索而在本地保存大量数据,让项目更轻量,也方便后续刷新调研结果。
16
+
17
+ ## 工作流如何衔接
18
+
19
+ 先使用三个规划类 skill 交互生成三份面向 AI 执行的 Markdown 产物:
20
+
21
+ | 步骤 | Artifact | 作用 |
22
+ |---|---|---|
23
+ | `academic-army-architect` | `paper_blueprint.md` | 论文战略蓝图,用来固定论文身份、目标 venue 姿态、核心 claims、贡献边界、候选方法空间、证据需求和下游约束。 |
24
+ | `academic-army-experiment-plan` | `experiment_plan.md` | 实验策略,把论文 claims 映射到证据链、数据集或 workload、指标、baselines、消融、鲁棒性检查和审稿人关心的验证点。 |
25
+ | `academic-army-coding-plan` | `coding_plan.md` | 代码实现契约,把论文蓝图和实验方案转成逻辑模块边界、接口与 entrypoint 语义、实验 harness、测试类别、raw result artifact schema 和 method freeze 规则。 |
26
+
27
+ 每个规划类 skill 还会同时生成一份中文 `*.explain.md` 解释文件,方便用户审阅;但后续开发 runner 读取的是上面三份英文 Markdown。
28
+
29
+ 规划类 skills 使用固定语言分工。面向后续 AI 执行的产物,例如 `paper_blueprint.md`、`experiment_plan.md` 和 `coding_plan.md`,统一使用英文,并且只放方案或规范本身。配套解释文件,例如 `paper_blueprint.explain.md`、`experiment_plan.explain.md` 和 `coding_plan.explain.md`,统一使用中文,用来帮助用户确认推导逻辑、关键取舍和当前确认状态。
30
+
31
+ ## 快速开始
32
+
33
+ ### 1. 安装本地依赖
34
+
35
+ 首次使用先安装依赖:
36
+
37
+ ```bash
38
+ npm install
39
+ ```
40
+
41
+ 如有需要,从 [`mcp-server/requirements.txt`](mcp-server/requirements.txt) 安装 MCP server 依赖:
42
+
43
+ ```bash
44
+ python -m pip install -r ./mcp-server/requirements.txt
45
+ ```
46
+
47
+ ### 2. 配置 DeepResearch MCP
48
+
49
+ 先在仓库根目录创建 `.env`:
50
+
51
+ ```env
52
+ OPENAI_API_KEY=your_api_key_here
53
+ ```
54
+
55
+ 运行项目 pipeline 时,通过 [`agent-forge.yaml`](agent-forge.yaml) 使用 `academic_army_mcp_tools`。该配置会在仓库根目录以 `PYTHONPATH=.` 和 `cwd=.` 运行 `python -m mcp-server`,因此 evolve/developing runner 不需要额外执行 Codex MCP 安装步骤。
56
+
57
+ 如果直接在 Codex 中运行 AcademicArmy skills,需要用 [`install_mcp.py`](install_mcp.py) 把同一个 MCP server 安装到 Codex 里,这样 skill 才能在项目 pipeline 之外调用 `academic_army_mcp_tools.deepresearch`:
58
+
59
+ ```bash
60
+ python install_mcp.py
61
+ ```
62
+
63
+ ### 3. 生成规划产物
64
+
65
+ 从一个想法开始。这个想法可以很粗略,也可以比较详细,不需要一开始就是完整的研究方案。
66
+
67
+ 使用 `academic-army-architect` 把这个想法整理成 `paper_blueprint.md`,也就是后续执行用的核心“施工图”。由于最初的想法通常还不够收敛,这一步可以通过多轮澄清和修改,把论文蓝图逐步调整到足够支撑下游工作的状态。
68
+
69
+ 当你对论文蓝图满意后,继续使用后续规划类 skills 生成 `experiment_plan.md` 和 `coding_plan.md`。这三份规划产物共同构成 AcademicArmy 的施工图,成为仓库初始化和迭代代码开发的项目起点。
70
+
71
+ ### 4. 初始化代码库 Scaffold
72
+
73
+ 三份规划产物准备好后,使用 `academic-army-repo-scaffold` 为代码库初始化一个真实 starter repository。它会使用 DeepResearch 选择合适的 template、官方 initializer 或高质量 template repository,生成 starter repository,然后叠加固定实验目录 `data/`、`output/`、`results/` 和 `harness/`。它会写入依赖声明和 repo-local 安装说明,在 `REFERENCES.md` 和 `REFERENCES.zh-CN.md` 中记录可安装依赖和仅作参考的外部来源,保留模板决定的测试结构,并让 README 聚焦当前仓库结构和用法。
74
+
75
+ repo scaffold skill 不实现论文方法、harness 逻辑、测试、metric、loader、exporter 或实验 runner;这些属于后续实现工作。
76
+
77
+ ### 5. 运行开发循环
78
+
79
+ 三份规划产物准备好后,运行:
80
+
81
+ ```bash
82
+ bash runs/develop.sh
83
+ ```
84
+
85
+ 运行 [`runs/develop.sh`](runs/develop.sh) 来调用 TypeScript 的 `developing` pipeline,读取三份规划产物,并在 `output/codebase` 下迭代写代码。TypeScript 入口和目录结构见 [`src/README.zh-CN.md`](src/README.zh-CN.md),开发循环实现见 [`src/developing/README.zh-CN.md`](src/developing/README.zh-CN.md)。
86
+
87
+ ## 常见任务
88
+
89
+ ### 改进规划 Skill 输出
90
+
91
+ 如果 `academic-army-architect`、`academic-army-experiment-plan` 或 `academic-army-coding-plan` 直接生成的产物不满意,不建议只手工修产物本身。更好的做法是打开 [`metaskills/`](metaskills/) 下对应的 metaskill,把不满意的地方、希望偏向的写法和失败模式写进去,然后运行对应的 `envolve.sh` 脚本多迭代几轮。
92
+
93
+ 运行这些脚本时,会调用 TypeScript 的 [`evolve-skill`](src/evolve-skill/README.zh-CN.md) pipeline。`evolve-skill` 不同于直接运行一次 skill,它是一个简单的 multi-agent loop:新的 runner agent 用固定任务测试 skill,evaluator agent 按 metaskill 评价产物,modifier agent 再根据评价修改 skill 本身。通常多跑几轮后,再直接运行该 skill,就能得到更接近预期的结果。
94
+
95
+ ### 直接运行 TypeScript Pipelines
96
+
97
+ 使用 shell scripts 作为这些 TypeScript pipeline 的便捷包装:
98
+
99
+ ```bash
100
+ npm run developing
101
+ npm run developing-skill
102
+ npm run evolve-skill
103
+ ```
104
+
105
+ TypeScript pipeline 的目录结构和实现说明见 [`src/README.zh-CN.md`](src/README.zh-CN.md)。
106
+
107
+ ### 调用 DeepResearch
108
+
109
+ AcademicArmy 在 [`mcp-server`](mcp-server) 目录下提供了本地 stdio MCP 实现。它只暴露一个工具:
110
+
111
+ - `deepresearch(prompt: str)`:把 prompt 交给 OpenAI Responses,以 `gpt-5.5`、high reasoning、web search、background mode 和 source inclusion 的固定配置运行。
112
+
113
+ 使用时只需要让 agent 给 `deepresearch` 传入一个自包含 prompt,例如:
114
+
115
+ ```text
116
+ Use deepresearch with prompt:
117
+ Find the closest papers to this research idea, compare their methods, and return a cited structured report.
118
+ ```
119
+
120
+ ## 项目结构
121
+
122
+ | 路径 | 用途 |
123
+ |---|---|
124
+ | `agent-forge.yaml` | Agent 和团队 wiring。 |
125
+ | `install_mcp.py` | 把项目 MCP server 安装到 Codex,供直接运行 skill 时使用。 |
126
+ | `mcp-server/` | 本地 stdio MCP 实现,暴露 `deepresearch`。 |
127
+ | `skills/` | 已准备的 AcademicArmy skills。 |
128
+ | `metaskills/` | 对应的 metaskill 设计与 evolution 文件。 |
129
+ | `runs/` | TypeScript pipelines 的便捷 wrappers。 |
130
+ | `src/` | TypeScript pipeline 的目录结构和实现说明。 |
131
+ | `output/` | 生成的规划产物、代码库输出和归档。 |
132
+
133
+ Agent 和团队 wiring 位于 [`agent-forge.yaml`](agent-forge.yaml)。当前 TypeScript agents 分别实现于 [`src/developing/agents`](src/developing/agents) 和 [`src/evolve-skill/agents`](src/evolve-skill/agents)。
134
+
135
+ 已准备的 AcademicArmy skills 位于 [`skills/`](skills/),对应的 metaskill 设计与 evolution 文件位于 [`metaskills/`](metaskills/)。
136
+
137
+ ## 配置参考
138
+
139
+ | 文件或变量 | 用于 | 说明 |
140
+ |---|---|---|
141
+ | `.env` / `OPENAI_API_KEY` | DeepResearch MCP | MCP server 和 `install_mcp.py` 会读取。 |
142
+ | `agent-forge.yaml` | 项目 pipelines | 以 `PYTHONPATH=.` 和 `cwd=.` 运行 `python -m mcp-server`。 |
143
+ | `secret.yaml` | 预设 shell scripts | 预设 wrappers 使用的本地忽略 config overlay。它可以包含密码、API key、runtime 凭据等不能提交或上传到 GitHub 的隐私内容。 |
144
+
145
+ 如果需要覆盖或补充环境变量,可以重复使用 `-e/--env NAME=VALUE`:
146
+
147
+ ```bash
148
+ python install_mcp.py -e OPENAI_API_KEY=your_api_key_here
149
+ ```
150
+
151
+ 运行安装脚本时,会刷新 Codex 中的 `academic_army_mcp_tools` 配置项,注册当前 Python 可执行文件和 `-m mcp-server`,把仓库根目录设置为 MCP 工作目录,读取 `.env`,并把这些环境变量传给 MCP server。
152
+
153
+ ## 常见问题
154
+
155
+ | 问题 | 常见原因 | 解决办法 |
156
+ |---|---|---|
157
+ | 缺少 `OPENAI_API_KEY` | 没有 `.env`,或没有把变量转发给 Codex MCP。 | 创建 `.env`;如果直接在 Codex 中跑 skill,再执行 `python install_mcp.py`。 |
158
+ | Wrapper 找不到 `secret.yaml` | 预设脚本传入了本地 config overlay,用来放密码、API key、runtime 凭据等隐私内容。 | 创建本地 `secret.yaml`,或调整脚本使用你的 config 文件。不要把这个文件提交或上传到 GitHub。 |
159
+ | 开发输出偏离规划 | 三份规划产物还不够具体。 | 先修订 `paper_blueprint.md`、`experiment_plan.md` 和 `coding_plan.md`,再继续开发。 |
160
+
161
+ ## 开发
162
+
163
+ 修改 runner 代码前,使用常规 TypeScript 检查:
164
+
165
+ ```bash
166
+ npm run check
167
+ npm run lint
168
+ ```
169
+
170
+ ## License
171
+
172
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,83 @@
1
+ runtimes:
2
+ codex:
3
+ kind: codex
4
+ options:
5
+ config:
6
+ mcp_servers:
7
+ academic_army_mcp_tools:
8
+ command: python
9
+ args:
10
+ - -m
11
+ - mcp-server
12
+ cwd: .
13
+ env:
14
+ PYTHONPATH: .
15
+ tool_timeout_sec: 3600
16
+ qwen:
17
+ kind: qwen
18
+ options:
19
+ authType: openai
20
+ model: deepseek-reasoner
21
+ env:
22
+ OPENAI_BASE_URL: https://api.deepseek.com
23
+ OPENAI_MODEL: deepseek-v4-pro
24
+ OPENAI_REASONING_EFFORT: high
25
+
26
+ threads:
27
+ codex-5.5-full-access:
28
+ runtime: codex
29
+ options:
30
+ model: gpt-5.5
31
+ sandboxMode: danger-full-access
32
+ workingDirectory: .
33
+ codex-5.5-read-only:
34
+ runtime: codex
35
+ options:
36
+ model: gpt-5.5
37
+ sandboxMode: danger-full-access # read-only
38
+ workingDirectory: .
39
+ codex-develop-5.5-full-access:
40
+ runtime: codex
41
+ options:
42
+ model: gpt-5.5
43
+ sandboxMode: danger-full-access
44
+ workingDirectory: &developerWorkspacePath output/codebase
45
+ codex-develop-5.5-read-only:
46
+ runtime: codex
47
+ options:
48
+ model: gpt-5.5
49
+ sandboxMode: danger-full-access # read-only
50
+ workingDirectory: *developerWorkspacePath
51
+ codex-develop-5.3-full-access:
52
+ runtime: codex
53
+ options:
54
+ model: gpt-5.3-codex-spark
55
+ workingDirectory: *developerWorkspacePath
56
+ sandboxMode: danger-full-access
57
+ qwen-develop-full-access:
58
+ runtime: qwen
59
+ options:
60
+ cwd: *developerWorkspacePath
61
+ permissionMode: yolo
62
+
63
+ agents:
64
+ skill-runner:
65
+ thread: codex-5.5-full-access
66
+ skill-evaluator:
67
+ thread: codex-5.5-read-only
68
+ skill-modifier:
69
+ thread: codex-5.5-full-access
70
+ coding-manager:
71
+ thread: codex-develop-5.5-full-access
72
+ constants:
73
+ workspacePath: *developerWorkspacePath
74
+ developer:
75
+ thread: codex-develop-5.3-full-access
76
+ constants:
77
+ workspacePath: *developerWorkspacePath
78
+ code-reviewer:
79
+ thread: codex-develop-5.5-read-only
80
+ constants:
81
+ workspacePath: *developerWorkspacePath
82
+ trajectory-optimizer:
83
+ thread: codex-5.5-full-access
@@ -0,0 +1,28 @@
1
+ import { dirname } from "node:path";
2
+ import { fileURLToPath } from "node:url";
3
+ import js from "@eslint/js";
4
+ import { defineConfig, globalIgnores } from "eslint/config";
5
+ import tseslint from "typescript-eslint";
6
+
7
+ const tsconfigRootDir = dirname(fileURLToPath(import.meta.url));
8
+
9
+ export default defineConfig(
10
+ globalIgnores(["dist/**", "node_modules/**", "coverage/**", "output/**"]),
11
+ {
12
+ files: ["src/**/*.ts"],
13
+ extends: [
14
+ js.configs.recommended,
15
+ ...tseslint.configs.strictTypeChecked,
16
+ ...tseslint.configs.stylisticTypeChecked,
17
+ ],
18
+ languageOptions: {
19
+ parserOptions: {
20
+ projectService: true,
21
+ tsconfigRootDir,
22
+ },
23
+ },
24
+ rules: {
25
+ "@typescript-eslint/consistent-type-definitions": "off",
26
+ },
27
+ },
28
+ );
package/install_mcp.py ADDED
@@ -0,0 +1,85 @@
1
+ import argparse
2
+ import json
3
+ import shutil
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import tomlkit
9
+ from dotenv import dotenv_values
10
+
11
+
12
+ SERVER_NAME = "academic_army_mcp_tools"
13
+
14
+
15
+ parser = argparse.ArgumentParser(
16
+ description="Install the AcademicArmy MCP server into Codex."
17
+ )
18
+ parser.add_argument("-e", "--env", action="append", default=[], metavar="NAME=VALUE")
19
+ parser.add_argument(
20
+ "--timeout",
21
+ type=int,
22
+ default=3600,
23
+ metavar="SECONDS",
24
+ help="MCP tool timeout in seconds.",
25
+ )
26
+ args = parser.parse_args()
27
+
28
+ for item in args.env:
29
+ if "=" not in item or item.startswith("="):
30
+ parser.error("-e/--env must be NAME=VALUE")
31
+
32
+ if args.timeout <= 0:
33
+ parser.error("--timeout must be greater than 0")
34
+
35
+ repo = Path(__file__).resolve().parent
36
+
37
+ env_items = {}
38
+ for name, value in dotenv_values(repo / ".env").items():
39
+ name = name.lstrip("\ufeff")
40
+ if name and value is not None:
41
+ env_items[name] = value
42
+
43
+ for item in args.env:
44
+ name, value = item.split("=", 1)
45
+ env_items[name] = value
46
+
47
+ env_items.setdefault("PYTHONPATH", str(repo))
48
+
49
+ codex = shutil.which("codex")
50
+ if not codex:
51
+ raise SystemExit("Could not find the codex command line tool in PATH.")
52
+
53
+ subprocess.run(
54
+ [codex, "mcp", "remove", SERVER_NAME],
55
+ stdout=subprocess.DEVNULL,
56
+ stderr=subprocess.DEVNULL,
57
+ )
58
+
59
+ command = [codex, "mcp", "add"]
60
+ for name, value in env_items.items():
61
+ command += ["--env", f"{name}={value}"]
62
+ command += [SERVER_NAME, "--", str(Path(sys.executable).resolve()), "-m", "mcp-server"]
63
+
64
+ subprocess.run(command, check=True)
65
+
66
+ doctor = subprocess.run(
67
+ [codex, "doctor", "--json"],
68
+ check=True,
69
+ capture_output=True,
70
+ text=True,
71
+ )
72
+ codex_config = Path(json.loads(doctor.stdout)["checks"]["config.load"]["details"]["config.toml"])
73
+
74
+ config = tomlkit.parse(codex_config.read_text(encoding="utf-8"))
75
+ config["mcp_servers"][SERVER_NAME]["cwd"] = str(repo)
76
+ config["mcp_servers"][SERVER_NAME]["tool_timeout_sec"] = args.timeout
77
+ codex_config.write_text(tomlkit.dumps(config), encoding="utf-8")
78
+
79
+ print(f"Installed {SERVER_NAME} with {codex}")
80
+ print(f"Python: {Path(sys.executable).resolve()}")
81
+ print(f"Working directory: {repo}")
82
+ print(f"PYTHONPATH: {repo}")
83
+ print(f"Codex config: {codex_config}")
84
+ print(f"Environment variables registered with codex --env: {len(env_items)}")
85
+ print(f"tool_timeout_sec: {args.timeout}")
@@ -0,0 +1,33 @@
1
+ import argparse
2
+ import os
3
+
4
+ from dotenv import load_dotenv
5
+ from mcp.server.fastmcp import FastMCP
6
+
7
+ from .deepresearch import register_deepresearch
8
+
9
+
10
+ mcp = FastMCP(
11
+ "academic-army",
12
+ instructions=(
13
+ "AcademicArmy MCP server. It exposes project-level tools for research, "
14
+ "blueprint orchestration, and future AcademicArmy workflow functions."
15
+ ),
16
+ )
17
+
18
+
19
+ if __name__ == "__main__":
20
+ load_dotenv(".env")
21
+
22
+ parser = argparse.ArgumentParser()
23
+ parser.add_argument("-e", "--env", action="append", default=[], metavar="NAME=VALUE")
24
+ args = parser.parse_args()
25
+
26
+ for item in args.env:
27
+ name, separator, value = item.partition("=")
28
+ if not separator or not name:
29
+ parser.error("-e/--env must be NAME=VALUE")
30
+ os.environ[name] = value
31
+
32
+ register_deepresearch(mcp)
33
+ mcp.run(transport="stdio")
@@ -0,0 +1,3 @@
1
+ from .tools import deepresearch, register_deepresearch
2
+
3
+ __all__ = ["deepresearch", "register_deepresearch"]