academic-army 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.editorconfig +9 -0
  2. package/.github/workflows/publish.yml +44 -0
  3. package/.prettierrc.json +3 -0
  4. package/LICENSE +21 -0
  5. package/README.md +172 -0
  6. package/README.zh-CN.md +172 -0
  7. package/agent-forge.yaml +83 -0
  8. package/eslint.config.js +28 -0
  9. package/install_mcp.py +85 -0
  10. package/mcp-server/__main__.py +33 -0
  11. package/mcp-server/deepresearch/__init__.py +3 -0
  12. package/mcp-server/deepresearch/tools.py +33 -0
  13. package/mcp-server/requirements.txt +4 -0
  14. package/metaskills/README.md +131 -0
  15. package/metaskills/README.zh-CN.md +131 -0
  16. package/metaskills/academic-army-architect/METASKILL.md +91 -0
  17. package/metaskills/academic-army-architect/envolve.sh +9 -0
  18. package/metaskills/academic-army-coding-plan/ENVOLVETASK.md +1 -0
  19. package/metaskills/academic-army-coding-plan/METASKILL.md +118 -0
  20. package/metaskills/academic-army-coding-plan/envolve.sh +9 -0
  21. package/metaskills/academic-army-coding-style/METASKILL.md +292 -0
  22. package/metaskills/academic-army-experiment-plan/ENVOLVETASK.md +1 -0
  23. package/metaskills/academic-army-experiment-plan/METASKILL.md +82 -0
  24. package/metaskills/academic-army-experiment-plan/envolve.sh +9 -0
  25. package/metaskills/academic-army-repo-scaffold/ENVOLVETASK.md +1 -0
  26. package/metaskills/academic-army-repo-scaffold/METASKILL.md +223 -0
  27. package/metaskills/academic-army-repo-scaffold/envolve.sh +9 -0
  28. package/package.json +35 -0
  29. package/runs/develop-skill.sh +17 -0
  30. package/runs/develop.sh +16 -0
  31. package/skills/academic-army-architect/SKILL.md +336 -0
  32. package/skills/academic-army-architect/agents/openai.yaml +11 -0
  33. package/skills/academic-army-architect/references/blueprint-schema.md +345 -0
  34. package/skills/academic-army-coding-plan/SKILL.md +491 -0
  35. package/skills/academic-army-coding-plan/agents/openai.yaml +11 -0
  36. package/skills/academic-army-coding-style/SKILL.md +915 -0
  37. package/skills/academic-army-coding-style/agents/openai.yaml +11 -0
  38. package/skills/academic-army-experiment-plan/SKILL.md +517 -0
  39. package/skills/academic-army-experiment-plan/agents/openai.yaml +11 -0
  40. package/skills/academic-army-repo-scaffold/SKILL.md +756 -0
  41. package/skills/academic-army-repo-scaffold/agents/openai.yaml +10 -0
  42. package/src/README.md +79 -0
  43. package/src/README.zh-CN.md +79 -0
  44. package/src/cli.ts +55 -0
  45. package/src/developing/README.md +146 -0
  46. package/src/developing/README.zh-CN.md +146 -0
  47. package/src/developing/agents/developer.ts +40 -0
  48. package/src/developing/agents/factory.ts +11 -0
  49. package/src/developing/agents/index.ts +8 -0
  50. package/src/developing/agents/manager.ts +74 -0
  51. package/src/developing/agents/prompts.ts +12 -0
  52. package/src/developing/agents/reviewer.ts +44 -0
  53. package/src/developing/agents/trajectory-optimizer.ts +70 -0
  54. package/src/developing/agents/types.ts +41 -0
  55. package/src/developing/index.ts +2 -0
  56. package/src/developing/pipeline.ts +306 -0
  57. package/src/developing/pipelineskill.ts +169 -0
  58. package/src/evolve-skill/README.md +116 -0
  59. package/src/evolve-skill/README.zh-CN.md +116 -0
  60. package/src/evolve-skill/agents/evaluator.ts +28 -0
  61. package/src/evolve-skill/agents/factory.ts +11 -0
  62. package/src/evolve-skill/agents/index.ts +4 -0
  63. package/src/evolve-skill/agents/modifier.ts +27 -0
  64. package/src/evolve-skill/agents/runner.ts +19 -0
  65. package/src/evolve-skill/index.ts +1 -0
  66. package/src/evolve-skill/pipeline.ts +140 -0
  67. package/src/pipeline.ts +65 -0
  68. package/tsconfig.json +22 -0
@@ -0,0 +1,10 @@
1
+ interface:
2
+ display_name: "Academic Army Repo Scaffold"
3
+ short_description: "Create template-first Academic Army research repo scaffolds"
4
+ default_prompt: "Use $academic-army-repo-scaffold to initialize a template-first research repository scaffold from my paper_blueprint, experiment plan, coding plan, and target repo path."
5
+
6
+ dependencies:
7
+ tools:
8
+ - type: "mcp"
9
+ value: "academic_army_mcp_tools"
10
+ description: "DeepResearch for repository template, harness, and ecosystem research."
package/src/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # TypeScript Pipelines
2
+
3
+ `src` contains the TypeScript runners that turn AcademicArmy planning artifacts into repeatable agent workflows.
4
+
5
+ [中文说明](README.zh-CN.md)
6
+
7
+ ## What This Code Owns
8
+
9
+ The CLI entry point is [`cli.ts`](cli.ts). It exposes three pipelines through [`package.json`](../package.json) scripts:
10
+
11
+ | Pipeline | Package script | What it does |
12
+ |---|---|---|
13
+ | `developing` | `npm run developing` | Runs the code development loop implemented in `developing/`. |
14
+ | `developing-skill` | `npm run developing-skill` | Runs the same development loop with a trajectory optimizer that can revise the coding-style skill from concrete development feedback. |
15
+ | `evolve-skill` | `npm run evolve-skill` | Runs the skill self-evolution loop implemented in `evolve-skill/`. |
16
+
17
+ [`pipeline.ts`](pipeline.ts) provides the shared wrapper used by these commands. It parses pipeline-specific arguments, loads one or more YAML config files with `coding-agent-forge`, builds an `AgentTeam` from the configured factories, runs the selected pipeline, and closes the team afterward.
18
+
19
+ ## Quick Start
20
+
21
+ Install dependencies once from the repository root:
22
+
23
+ ```bash
24
+ npm install
25
+ ```
26
+
27
+ The shared CLI shape is:
28
+
29
+ ```bash
30
+ npm run cli -- <pipeline> [...args]
31
+ ```
32
+
33
+ For most project workflows, use the prepared shell scripts:
34
+
35
+ ```bash
36
+ bash runs/develop.sh
37
+ bash metaskills/academic-army-architect/envolve.sh
38
+ ```
39
+
40
+ ## Directory Guide
41
+
42
+ | Path | Purpose |
43
+ |---|---|
44
+ | [`cli.ts`](cli.ts) | Selects a pipeline by name and forwards the remaining CLI arguments. |
45
+ | [`pipeline.ts`](pipeline.ts) | Shared pipeline definition, config loading, agent-team construction, and cleanup. |
46
+ | [`developing/`](developing/) | Reads `paper_blueprint.md`, `experiment_plan.md`, and `coding_plan.md`, then iteratively implements the target codebase. See [`developing/README.md`](developing/README.md). |
47
+ | [`developing/pipelineskill.ts`](developing/pipelineskill.ts) | Wraps the development loop with `trajectory-optimizer` hooks for improving the coding-style skill while development runs. |
48
+ | [`evolve-skill/`](evolve-skill/) | Runs a skill on a fixed task, evaluates the artifact against a metaskill, and asks a modifier agent to revise the skill. See [`evolve-skill/README.md`](evolve-skill/README.md). |
49
+
50
+ ## How The Shared Wrapper Works
51
+
52
+ Each pipeline provides pipeline-specific arguments and configured factories. [`pipeline.ts`](pipeline.ts) loads one or more YAML config files with `coding-agent-forge`, builds an `AgentTeam` from the configured factories, runs the selected pipeline, and closes the team afterward.
53
+
54
+ This keeps the config loading, agent-team construction, and cleanup shared across the TypeScript runners.
55
+
56
+ ## Relationship To Shell Scripts
57
+
58
+ Shell scripts under [`runs/`](../runs/) and the metaskill scripts described in [`metaskills/README.md`](../metaskills/README.md) are convenience wrappers around these TypeScript pipelines.
59
+
60
+ | Script | Calls |
61
+ |---|---|
62
+ | [`runs/develop.sh`](../runs/develop.sh) | `npm run developing` |
63
+ | [`runs/develop-skill.sh`](../runs/develop-skill.sh) | `npm run developing-skill` |
64
+ | `metaskills/*/envolve.sh` | `npm run evolve-skill` |
65
+
66
+ ## Development Checks
67
+
68
+ Run these before changing runner code:
69
+
70
+ ```bash
71
+ npm run check
72
+ npm run lint
73
+ ```
74
+
75
+ ## Where To Go Next
76
+
77
+ - Development loop details: [`developing/README.md`](developing/README.md)
78
+ - Skill evolution loop details: [`evolve-skill/README.md`](evolve-skill/README.md)
79
+ - User-facing skill evolution workflow: [`../metaskills/README.md`](../metaskills/README.md)
@@ -0,0 +1,79 @@
1
+ # TypeScript Pipelines
2
+
3
+ `src` 存放 AcademicArmy 的 TypeScript runner,用来把规划产物转成可重复运行的 agent workflow。
4
+
5
+ [English README](README.md)
6
+
7
+ ## 这层代码负责什么
8
+
9
+ CLI 入口是 [`cli.ts`](cli.ts)。它通过 [`package.json`](../package.json) scripts 暴露三个 pipeline:
10
+
11
+ | Pipeline | Package script | 作用 |
12
+ |---|---|---|
13
+ | `developing` | `npm run developing` | 运行 `developing/` 中实现的代码开发循环。 |
14
+ | `developing-skill` | `npm run developing-skill` | 运行带 `trajectory-optimizer` hook 的开发循环,可根据具体开发反馈优化 coding-style skill。 |
15
+ | `evolve-skill` | `npm run evolve-skill` | 运行 `evolve-skill/` 中实现的 skill self-evolution 循环。 |
16
+
17
+ [`pipeline.ts`](pipeline.ts) 是这些命令共用的封装层。它解析各 pipeline 自己的参数,使用 `coding-agent-forge` 加载一个或多个 YAML 配置文件,根据配置好的 factories 创建 `AgentTeam`,运行选中的 pipeline,并在结束后关闭 team。
18
+
19
+ ## 快速开始
20
+
21
+ 在仓库根目录先安装依赖:
22
+
23
+ ```bash
24
+ npm install
25
+ ```
26
+
27
+ 共享 CLI 形态是:
28
+
29
+ ```bash
30
+ npm run cli -- <pipeline> [...args]
31
+ ```
32
+
33
+ 多数项目 workflow 直接使用预设 shell scripts:
34
+
35
+ ```bash
36
+ bash runs/develop.sh
37
+ bash metaskills/academic-army-architect/envolve.sh
38
+ ```
39
+
40
+ ## 目录说明
41
+
42
+ | 路径 | 作用 |
43
+ |---|---|
44
+ | [`cli.ts`](cli.ts) | 根据名称选择 pipeline,并把剩余 CLI 参数传给对应 pipeline。 |
45
+ | [`pipeline.ts`](pipeline.ts) | 共享的 pipeline 定义、配置加载、agent team 构建和清理逻辑。 |
46
+ | [`developing/`](developing/) | 读取 `paper_blueprint.md`、`experiment_plan.md` 和 `coding_plan.md`,然后迭代实现目标代码库。详见 [`developing/README.zh-CN.md`](developing/README.zh-CN.md)。 |
47
+ | [`developing/pipelineskill.ts`](developing/pipelineskill.ts) | 给开发循环叠加 `trajectory-optimizer` hooks,用于在开发过程中优化 coding-style skill。 |
48
+ | [`evolve-skill/`](evolve-skill/) | 在固定任务上运行某个 skill,根据 metaskill 评价产物,并让 modifier agent 修改 skill。详见 [`evolve-skill/README.zh-CN.md`](evolve-skill/README.zh-CN.md)。 |
49
+
50
+ ## 共享 Wrapper 如何工作
51
+
52
+ 每个 pipeline 提供自己的参数和配置好的 factories。[`pipeline.ts`](pipeline.ts) 使用 `coding-agent-forge` 加载一个或多个 YAML 配置文件,根据配置好的 factories 创建 `AgentTeam`,运行选中的 pipeline,并在结束后关闭 team。
53
+
54
+ 这样 TypeScript runners 可以共享配置加载、agent team 构建和清理逻辑。
55
+
56
+ ## 和 Shell 脚本的关系
57
+
58
+ [`runs/`](../runs/) 和 [`metaskills/README.zh-CN.md`](../metaskills/README.zh-CN.md) 中说明的 metaskill scripts 是这些 TypeScript pipeline 的便捷包装。
59
+
60
+ | Script | 调用 |
61
+ |---|---|
62
+ | [`runs/develop.sh`](../runs/develop.sh) | `npm run developing` |
63
+ | [`runs/develop-skill.sh`](../runs/develop-skill.sh) | `npm run developing-skill` |
64
+ | `metaskills/*/envolve.sh` | `npm run evolve-skill` |
65
+
66
+ ## 开发检查
67
+
68
+ 修改 runner code 前运行:
69
+
70
+ ```bash
71
+ npm run check
72
+ npm run lint
73
+ ```
74
+
75
+ ## 下一步阅读
76
+
77
+ - 开发循环细节:[`developing/README.zh-CN.md`](developing/README.zh-CN.md)
78
+ - Skill evolution loop 细节:[`evolve-skill/README.zh-CN.md`](evolve-skill/README.zh-CN.md)
79
+ - 面向用户的 skill evolution workflow:[`../metaskills/README.zh-CN.md`](../metaskills/README.zh-CN.md)
package/src/cli.ts ADDED
@@ -0,0 +1,55 @@
1
+ import { type AgentVariablesByName } from "coding-agent-forge";
2
+ import { type PipelineDefinition, runPipelineCli } from "./pipeline.js";
3
+ import { developingPipeline, developingSkillPipeline } from "./developing/index.js";
4
+ import { evolveSkillPipeline } from "./evolve-skill/index.js";
5
+
6
+ function defineCli<VariablesByName extends AgentVariablesByName, Options>(entry: {
7
+ name: string;
8
+ description: string;
9
+ definition: PipelineDefinition<VariablesByName, Options>;
10
+ }) {
11
+ return {
12
+ name: entry.name,
13
+ description: entry.description,
14
+ run: (args: readonly string[]) => runPipelineCli(entry.definition, args),
15
+ };
16
+ }
17
+
18
+ const cliDefinitions = [
19
+ defineCli({
20
+ name: "developing",
21
+ description: "Run the code development loop.",
22
+ definition: developingPipeline,
23
+ }),
24
+ defineCli({
25
+ name: "developing-skill",
26
+ description: "Run the code development loop and evolve its skill.",
27
+ definition: developingSkillPipeline,
28
+ }),
29
+ defineCli({
30
+ name: "evolve-skill",
31
+ description: "Run the skill evolution loop.",
32
+ definition: evolveSkillPipeline,
33
+ }),
34
+ ] as const;
35
+
36
+ function buildHelp(): string {
37
+ const pipelineList = cliDefinitions
38
+ .map((pipeline) => ` ${pipeline.name.padEnd(16)} ${pipeline.description}`)
39
+ .join("\n");
40
+
41
+ return `Usage: npm run cli -- <pipeline> [...args]
42
+
43
+ Available pipelines:
44
+ ${pipelineList}`;
45
+ }
46
+
47
+ const [pipelineName, ...pipelineArgs] = process.argv.slice(2);
48
+ const pipelineDefinition = cliDefinitions.find((pipeline) => pipeline.name === pipelineName);
49
+
50
+ if (pipelineDefinition === undefined) {
51
+ console.log(buildHelp());
52
+ process.exitCode = 1;
53
+ } else {
54
+ await pipelineDefinition.run(pipelineArgs);
55
+ }
@@ -0,0 +1,146 @@
1
+ # Developing Pipeline
2
+
3
+ [`src/developing`](.) implements the code-writing loop used after the three planning artifacts are ready.
4
+
5
+ [中文说明](README.zh-CN.md)
6
+
7
+ ## What This Pipeline Does
8
+
9
+ The usual entry point is [`runs/develop.sh`](../../runs/develop.sh), which calls `npm run developing` with paths for:
10
+
11
+ - `paper_blueprint.md`
12
+ - `experiment_plan.md`
13
+ - `coding_plan.md`
14
+ - `skills/academic-army-coding-style/SKILL.md`
15
+ - the artifact directory containing `TODO.md`
16
+ - the target codebase directory
17
+ - the development archive directory, passed through the current CLI option name `--achive-dir`
18
+
19
+ The pipeline works in the configured `--target-path`, maintains `TODO.md` under the configured `--artifact-path`, and archives per-iteration task/review artifacts under the configured archive directory.
20
+
21
+ For the overall TypeScript pipeline usage and entry points, see [`src/README.md`](../README.md).
22
+
23
+ ## Core Idea: Developing And Coding Style
24
+
25
+ `src/developing` turns the three planning artifacts into a repeatable code-writing trajectory. `coding-manager` reads the current repository and `TODO.md`, chooses one concrete developer task, `developer` edits the target repository, and `code-reviewer` either returns `ACCEPT` or sends revision feedback back into the same task.
26
+
27
+ The coding-style skill is [`skills/academic-army-coding-style/SKILL.md`](../../skills/academic-army-coding-style/SKILL.md). Its job is to control the code-writing agent's code structure and style. The upstream user task decides what to implement; this skill decides how to keep the implementation readable, local, low-coupling, and consistent with the current framework.
28
+
29
+ Every developer run loads the configured coding-style skill through `--coding-style-skill-path`. [`agents/developer.ts`](agents/developer.ts) prepends the instruction from [`agents/prompts.ts`](agents/prompts.ts): load and follow that skill before reading the blueprint, experiment plan, coding plan, repository files, and current task. That makes the writing agent use the same code-structure and style preferences across features, refactors, harness/test work, methods, baselines, metrics, result exports, and framework docs.
30
+
31
+ `academic-army-coding-style` is generic for any code-writing task. It does not decide the research method, experiment content, task priority, or repository template initialization. It only keeps code concise, readable, low-friction, easy to modify, and aligned with the existing repository structure.
32
+
33
+ Put durable code-structure and style preferences in [`metaskills/academic-army-coding-style/METASKILL.md`](../../metaskills/academic-army-coding-style/METASKILL.md), then run [`runs/develop-skill.sh`](../../runs/develop-skill.sh) from the repository root to update the skill.
34
+
35
+ ## Quick Start
36
+
37
+ From the repository root, run the prepared wrapper:
38
+
39
+ ```bash
40
+ bash runs/develop.sh
41
+ ```
42
+
43
+ Use the wrapper with the conventional project paths listed above to write code under `output/codebase`.
44
+
45
+ ## Direct Command
46
+
47
+ `runs/develop.sh` calls:
48
+
49
+ ```bash
50
+ npm run developing -- \
51
+ --config "agent-forge.yaml" \
52
+ --config "secret.yaml" \
53
+ --target-path "output/codebase" \
54
+ --achive-dir "output/developing-archives" \
55
+ --artifact-path "output/developing" \
56
+ --coding-style-skill-path "skills/academic-army-coding-style" \
57
+ --paper-blueprint-path "output/paper_blueprint.md" \
58
+ --experiment-plan-path "output/experiment_plan.md" \
59
+ --coding-plan-path "output/coding_plan.md" \
60
+ --max-iterations "100" \
61
+ --max-revision-iterations "10"
62
+ ```
63
+
64
+ The current CLI option name is `--achive-dir`.
65
+
66
+ ## Options Reference
67
+
68
+ | Option | Description |
69
+ |---|---|
70
+ | `--config` | One or more YAML config files loaded with `coding-agent-forge`. |
71
+ | `--target-path` | Target codebase directory. |
72
+ | `--achive-dir` | Development archive directory. |
73
+ | `--artifact-path` | Artifact directory containing `TODO.md`. |
74
+ | `--coding-style-skill-path` | Configured coding-style skill. |
75
+ | `--paper-blueprint-path` | `paper_blueprint.md`. |
76
+ | `--experiment-plan-path` | `experiment_plan.md`. |
77
+ | `--coding-plan-path` | `coding_plan.md`. |
78
+ | `--max-iterations` | Stops the outer loop if `coding-manager` has not returned `FINISHED`. |
79
+ | `--max-revision-iterations` | Limits the inner developer/reviewer repair loop. |
80
+
81
+ ## Main Flow
82
+
83
+ [`pipeline.ts`](pipeline.ts) parses CLI options and repeats an outer coding-manager task loop with an inner developer/reviewer repair loop.
84
+
85
+ Each iteration does the following:
86
+
87
+ 1. `coding-manager` scans the current repository and `TODO.md` in the artifact directory, then chooses one developer task.
88
+ 2. `developer` loads the configured coding-style skill, edits the repository, and reports what changed for review.
89
+ 3. `code-reviewer` reads the code and developer report, then returns exactly `ACCEPT` or revision feedback.
90
+ 4. If the reviewer returns feedback, `developer` fixes the same task and `code-reviewer` reviews again.
91
+ 5. After the review loop ends, the pipeline archives the task and reports, then asks `coding-manager` to update the TODO file.
92
+ 6. The pipeline stops when `coding-manager` returns `FINISHED` or `--max-iterations` is reached.
93
+
94
+ ## Developing-Skill And Trajectory Feedback
95
+
96
+ [`runs/develop-skill.sh`](../../runs/develop-skill.sh) calls the related `developing-skill` pipeline in [`pipelineskill.ts`](pipelineskill.ts). It runs the same development loop, adds `--metaskill-path`, and invokes `trajectory-optimizer` before the revision loop and after TODO updates so the coding-style skill can be improved from concrete development feedback.
97
+
98
+ The first `trajectory-optimizer` call runs in `scan` mode before the developer starts. It reads the target repository, the current coding-style skill, the blueprint, the experiment plan, and the coding plan so the optimizer has the same project context as the code-writing loop.
99
+
100
+ The second `trajectory-optimizer` call runs in `optimize` mode after the TODO update report is produced. It reads the metaskill, target repository, plans, current task, revision report, and TODO update report; evaluates whether the skill produced a good modification trajectory; then edits the coding-style skill directly. The prompt focuses the optimizer on missing, misleading, or redundant guidance that affected task selection, coding, review, or TODO update.
101
+
102
+ The intended loop is:
103
+
104
+ 1. Add code-style preferences, failure modes, and review tips to [`metaskills/academic-army-coding-style/METASKILL.md`](../../metaskills/academic-army-coding-style/METASKILL.md).
105
+ 2. Run `bash runs/develop-skill.sh`.
106
+ 3. Let `developer`, `code-reviewer`, `coding-manager`, and `trajectory-optimizer` expose where the current skill helped or failed.
107
+ 4. Inspect the updated [`skills/academic-army-coding-style/SKILL.md`](../../skills/academic-army-coding-style/SKILL.md), keep the useful changes, and repeat when new code-style preferences appear.
108
+
109
+ This is the coding-style version of skill self-improvement: the metaskill states what "good style guidance" means, the trajectory records how the agent actually modified code, and `develop-skill` uses that evidence to make the reusable skill more precise over time.
110
+
111
+ Related work points in the same direction, though `developing-skill` is a local AcademicArmy implementation rather than a direct implementation of these papers:
112
+
113
+ - [Reflexion](https://arxiv.org/abs/2303.11366) shows language agents improving across trials by turning task feedback into verbal reflection instead of updating model weights.
114
+ - [Agent Trajectory Explorer](https://research.ibm.com/publications/agent-trajectory-explorer-visualizing-and-providing-feedback-on-agent-trajectories) argues that raw agent trajectories need navigable formats so developers can inspect behavior and provide feedback for future improvement.
115
+ - [Agent-as-a-Judge](https://openreview.net/forum?id=Nn9POI9Ekt) evaluates agentic code-generation systems with an agentic evaluator that can consider the step-by-step task-solving process, not only the final output.
116
+ - [When Agents go Astray](https://arxiv.org/abs/2509.02360) studies trajectory-level errors in software-engineering agents and uses process feedback to detect and course-correct inefficient trajectories during execution.
117
+
118
+ ## Output Artifacts
119
+
120
+ The pipeline maintains:
121
+
122
+ | Artifact | Where it lives |
123
+ |---|---|
124
+ | `TODO.md` | Under the configured artifact directory; the coding-manager-maintained task list. |
125
+ | Timestamped archive folders | Under the configured archive directory; contains each selected task, per-revision reports, and TODO update reports. |
126
+
127
+ ## Important Files
128
+
129
+ | Path | Purpose |
130
+ |---|---|
131
+ | [`pipeline.ts`](pipeline.ts) | Argument parsing, loop orchestration, archive creation, and per-agent handoff. |
132
+ | [`pipelineskill.ts`](pipelineskill.ts) | `developing-skill` wrapper that adds trajectory optimization hooks around the base loop. |
133
+ | [`agents/factory.ts`](agents/factory.ts) | Registers the developing coding manager, developer, and reviewer agents. |
134
+ | [`agents/types.ts`](agents/types.ts) | Shared workspace-aware base class and variables. |
135
+ | [`agents/manager.ts`](agents/manager.ts) | Maintains the TODO file and selects outer-loop tasks. |
136
+ | [`agents/developer.ts`](agents/developer.ts) | Edits the target repository using the shared coding-style skill. |
137
+ | [`agents/reviewer.ts`](agents/reviewer.ts) | Performs the read-only code review gate. |
138
+ | [`agents/trajectory-optimizer.ts`](agents/trajectory-optimizer.ts) | Scans the trajectory and proposes coding-style skill improvements for `developing-skill`. |
139
+
140
+ ## Troubleshooting
141
+
142
+ | Problem | Likely cause | Fix |
143
+ |---|---|---|
144
+ | The loop stops with `FINISHED` | `coding-manager` decided no further developer task is needed. | Inspect `TODO.md` in the artifact directory and the latest archive. |
145
+ | A task keeps returning revision feedback | The inner developer/reviewer repair loop has not reached `ACCEPT`. | Read the per-revision reports in the timestamped archive folder. |
146
+ | The archive option looks misspelled | The current CLI option name is `--achive-dir`. | Use the current option name until the CLI changes. |
@@ -0,0 +1,146 @@
1
+ # Developing Pipeline
2
+
3
+ [`src/developing`](.) 实现三份规划产物准备好之后使用的代码编写循环。
4
+
5
+ [English README](README.md)
6
+
7
+ ## 这个 Pipeline 做什么
8
+
9
+ 常用入口是 [`runs/develop.sh`](../../runs/develop.sh),它会调用 `npm run developing`,并传入以下路径:
10
+
11
+ - `paper_blueprint.md`
12
+ - `experiment_plan.md`
13
+ - `coding_plan.md`
14
+ - `skills/academic-army-coding-style/SKILL.md`
15
+ - 包含 `TODO.md` 的 artifact 目录
16
+ - 目标代码库目录
17
+ - development archive 目录;当前 CLI 参数名仍是 `--achive-dir`
18
+
19
+ pipeline 会在配置的 `--target-path` 中继续写代码,维护配置的 `--artifact-path` 下的 `TODO.md`,并把每轮 task/review 产物归档到 archive 目录。
20
+
21
+ TypeScript pipeline 的整体用法和入口见 [`src/README.zh-CN.md`](../README.zh-CN.md)。
22
+
23
+ ## 核心思想:Developing 和 Coding Style
24
+
25
+ `src/developing` 会把三份规划产物变成一条可重复执行的代码编写 trajectory。`coding-manager` 读取当前 repo 和 `TODO.md`,选择一个具体 developer task;`developer` 修改目标 repo;`code-reviewer` 返回严格的 `ACCEPT`,或者把 revision feedback 送回同一个 task。
26
+
27
+ coding-style skill 是 [`skills/academic-army-coding-style/SKILL.md`](../../skills/academic-army-coding-style/SKILL.md)。它的功能是控制写代码 agent 的代码结构和代码风格。上游用户任务决定要实现什么;这个 skill 决定如何让实现保持 readable、local、low-coupling,并和当前 framework 保持一致。
28
+
29
+ 每次 developer run 都会通过 `--coding-style-skill-path` 加载配置的 coding-style skill。[`agents/developer.ts`](agents/developer.ts) 会把 [`agents/prompts.ts`](agents/prompts.ts) 里的说明放到 developer prompt 前面:先 load and follow 这个 skill,再读取 blueprint、experiment plan、coding plan、repo files 和 current task。这样负责写代码的 agent 在 feature、refactor、harness/test work、methods、baselines、metrics、result exports 和 framework docs 等各种任务里,都会用同一套代码结构和风格偏好,保证输出的代码结构和风格统一。
30
+
31
+ `academic-army-coding-style` 对各种代码编写任务都是通用的。它不决定 research method、experiment content、task priority 或 repository template initialization;它只关心代码结构和风格,让代码 concise、readable、low-friction、easy to modify,并贴合现有 repo 结构。
32
+
33
+ 任何对代码结构和代码风格的长期偏好,都放进 [`metaskills/academic-army-coding-style/METASKILL.md`](../../metaskills/academic-army-coding-style/METASKILL.md),然后在仓库根目录运行 [`runs/develop-skill.sh`](../../runs/develop-skill.sh) 来更新这个 skill。
34
+
35
+ ## 快速开始
36
+
37
+ 在仓库根目录运行预设 wrapper:
38
+
39
+ ```bash
40
+ bash runs/develop.sh
41
+ ```
42
+
43
+ 使用这个 wrapper 按上面列出的项目约定路径在 `output/codebase` 下写代码。
44
+
45
+ ## 直接命令
46
+
47
+ `runs/develop.sh` 会调用:
48
+
49
+ ```bash
50
+ npm run developing -- \
51
+ --config "agent-forge.yaml" \
52
+ --config "secret.yaml" \
53
+ --target-path "output/codebase" \
54
+ --achive-dir "output/developing-archives" \
55
+ --artifact-path "output/developing" \
56
+ --coding-style-skill-path "skills/academic-army-coding-style" \
57
+ --paper-blueprint-path "output/paper_blueprint.md" \
58
+ --experiment-plan-path "output/experiment_plan.md" \
59
+ --coding-plan-path "output/coding_plan.md" \
60
+ --max-iterations "100" \
61
+ --max-revision-iterations "10"
62
+ ```
63
+
64
+ 当前 CLI 参数名是 `--achive-dir`。
65
+
66
+ ## 参数参考
67
+
68
+ | 参数 | 说明 |
69
+ |---|---|
70
+ | `--config` | 用 `coding-agent-forge` 加载的一个或多个 YAML config 文件。 |
71
+ | `--target-path` | 目标代码库目录。 |
72
+ | `--achive-dir` | Development archive 目录。 |
73
+ | `--artifact-path` | 包含 `TODO.md` 的 artifact 目录。 |
74
+ | `--coding-style-skill-path` | 配置的 coding-style skill。 |
75
+ | `--paper-blueprint-path` | `paper_blueprint.md`。 |
76
+ | `--experiment-plan-path` | `experiment_plan.md`。 |
77
+ | `--coding-plan-path` | `coding_plan.md`。 |
78
+ | `--max-iterations` | 当 `coding-manager` 尚未返回 `FINISHED` 时限制外层循环。 |
79
+ | `--max-revision-iterations` | 限制内层 developer/reviewer 修复循环。 |
80
+
81
+ ## 主流程
82
+
83
+ [`pipeline.ts`](pipeline.ts) 负责解析 CLI 参数,并重复运行外层 coding-manager 选任务循环和内层 developer/reviewer 修复循环。
84
+
85
+ 每轮迭代执行以下步骤:
86
+
87
+ 1. `coding-manager` 扫描当前 repo 和 artifact 目录中的 `TODO.md`,然后选择一个 developer task。
88
+ 2. `developer` 加载配置的 coding-style skill,修改 repo,并报告自己改了哪些内容给 reviewer。
89
+ 3. `code-reviewer` 阅读代码和 developer report,返回严格的 `ACCEPT` 或 revision feedback。
90
+ 4. 如果 reviewer 返回 feedback,`developer` 继续修同一个任务,然后 `code-reviewer` 再审。
91
+ 5. review 循环结束后,pipeline 归档 task 和 reports,然后让 `coding-manager` 更新 TODO 文件。
92
+ 6. 当 `coding-manager` 返回 `FINISHED` 或达到 `--max-iterations` 时停止。
93
+
94
+ ## developing-skill 和 Trajectory Feedback
95
+
96
+ [`runs/develop-skill.sh`](../../runs/develop-skill.sh) 会调用 [`pipelineskill.ts`](pipelineskill.ts) 中的 `developing-skill` pipeline。它复用同一套开发循环,额外传入 `--metaskill-path`,并在 revision loop 前和 TODO 更新后调用 `trajectory-optimizer`,让 coding-style skill 能根据具体开发反馈继续优化。
97
+
98
+ 第一次 `trajectory-optimizer` 调用发生在 developer 开始前,使用 `scan` 模式。它会读取目标 repo、当前 coding-style skill、blueprint、experiment plan 和 coding plan,让 optimizer 拿到和代码编写循环相同的项目上下文。
99
+
100
+ 第二次 `trajectory-optimizer` 调用发生在 TODO update report 生成后,使用 `optimize` 模式。它会读取 metaskill、target repo、plans、current task、revision report 和 TODO update report;根据 [`metaskills/academic-army-coding-style/METASKILL.md`](../../metaskills/academic-army-coding-style/METASKILL.md) 中写的偏好评估这次修改 trajectory 的质量;然后直接修改 coding-style skill。这个 prompt 会重点检查哪些 guidance 缺失、误导或冗余,并看这些问题是否影响 task selection、coding、review 或 TODO update。
101
+
102
+ 推荐的使用循环是:
103
+
104
+ 1. 把代码风格偏好、failure modes 和 review tips 写进 [`metaskills/academic-army-coding-style/METASKILL.md`](../../metaskills/academic-army-coding-style/METASKILL.md)。
105
+ 2. 运行 `bash runs/develop-skill.sh`。
106
+ 3. 让 `developer`、`code-reviewer`、`coding-manager` 和 `trajectory-optimizer` 暴露当前 skill 在真实开发轨迹里哪里有效、哪里失效。
107
+ 4. 检查更新后的 [`skills/academic-army-coding-style/SKILL.md`](../../skills/academic-army-coding-style/SKILL.md),保留有用修改;当出现新的代码偏好时继续重复。
108
+
109
+ 这就是 coding-style 版本的 skill self-improvement:metaskill 说明什么是“好的代码风格 guidance”,trajectory 记录 agent 实际如何修改代码,`develop-skill` 根据这些证据修改可复用的 skill,让这个 skill 越用越强。
110
+
111
+ 相关研究也在支持类似方向;这里的 `developing-skill` 是 AcademicArmy 的本地实现,不是对下面论文的直接复现:
112
+
113
+ - [Reflexion](https://arxiv.org/abs/2303.11366) 展示了 language agents 可以把任务反馈转成 verbal reflection,在不更新模型权重的情况下跨 trial 改善表现。
114
+ - [Agent Trajectory Explorer](https://research.ibm.com/publications/agent-trajectory-explorer-visualizing-and-providing-feedback-on-agent-trajectories) 讨论了 raw agent trajectory 不适合直接做人工分析,需要更容易浏览的格式来检查行为并提供 future improvement feedback。
115
+ - [Agent-as-a-Judge](https://openreview.net/forum?id=Nn9POI9Ekt) 使用 agentic evaluator 评价 agentic code-generation systems,强调评价时不只看 final output,也看 step-by-step task-solving process。
116
+ - [When Agents go Astray](https://arxiv.org/abs/2509.02360) 研究 software-engineering agents 的 trajectory-level errors,并用 process feedback 在执行中检测和纠正低效 trajectory。
117
+
118
+ ## 输出产物
119
+
120
+ pipeline 会维护:
121
+
122
+ | Artifact | 位置 |
123
+ |---|---|
124
+ | `TODO.md` | 配置的 artifact 目录下,由 coding-manager 维护的任务列表。 |
125
+ | 按时间戳归档的文件夹 | 配置的 archive 目录下,保存 selected task、每次 revision 的 reports 和 TODO update reports。 |
126
+
127
+ ## 重要文件
128
+
129
+ | 路径 | 作用 |
130
+ |---|---|
131
+ | [`pipeline.ts`](pipeline.ts) | 参数解析、循环编排、archive 创建和各 agent 之间的交接。 |
132
+ | [`pipelineskill.ts`](pipelineskill.ts) | 给基础开发循环增加 trajectory optimization hooks 的 `developing-skill` 包装。 |
133
+ | [`agents/factory.ts`](agents/factory.ts) | 注册 developing coding manager、developer 和 reviewer agents。 |
134
+ | [`agents/types.ts`](agents/types.ts) | 共享的 workspace-aware base class 和变量定义。 |
135
+ | [`agents/manager.ts`](agents/manager.ts) | 维护 TODO 文件并选择外层任务。 |
136
+ | [`agents/developer.ts`](agents/developer.ts) | 使用共享 coding-style skill 修改目标 repo。 |
137
+ | [`agents/reviewer.ts`](agents/reviewer.ts) | 执行只读代码审阅 gate。 |
138
+ | [`agents/trajectory-optimizer.ts`](agents/trajectory-optimizer.ts) | 扫描开发轨迹,并为 `developing-skill` 提出 coding-style skill 优化建议。 |
139
+
140
+ ## 常见问题
141
+
142
+ | 问题 | 常见原因 | 解决办法 |
143
+ |---|---|---|
144
+ | Loop 以 `FINISHED` 停止 | `coding-manager` 判断不需要继续选择 developer task。 | 检查 artifact 目录中的 `TODO.md` 和最新 archive。 |
145
+ | 某个任务持续返回 revision feedback | 内层 developer/reviewer 修复循环尚未达到 `ACCEPT`。 | 阅读按时间戳归档的 per-revision reports。 |
146
+ | Archive 参数看起来拼错 | 当前 CLI 参数名就是 `--achive-dir`。 | 在 CLI 改名前继续使用当前参数名。 |
@@ -0,0 +1,40 @@
1
+ import { codingStyleSkillInstruction } from "./prompts.js";
2
+ import { DevelopingAgent, type DevelopingAgentVariables } from "./types.js";
3
+
4
+ export type DeveloperVariables = DevelopingAgentVariables & {
5
+ currentTask: string;
6
+ reviewerReport?: string;
7
+ };
8
+
9
+ export class DeveloperAgent extends DevelopingAgent<DeveloperVariables> {
10
+ protected buildPrompt(variables: Readonly<DeveloperVariables>): string {
11
+ const codingStyleSkillPath = this.workspaceRelativePath(variables.codingStyleSkillPath);
12
+ const targetPath = this.workspaceRelativePath(variables.targetPath);
13
+ const paperBlueprintPath = this.workspaceRelativePath(variables.paperBlueprintPath);
14
+ const experimentPlanPath = this.workspaceRelativePath(variables.experimentPlanPath);
15
+ const codingPlanPath = this.workspaceRelativePath(variables.codingPlanPath);
16
+ const reviewerReport = variables.reviewerReport ?? "(none)";
17
+ const codingStyleSkillInstructionText = codingStyleSkillInstruction(codingStyleSkillPath);
18
+
19
+ return `
20
+ ${codingStyleSkillInstructionText}
21
+
22
+ Work only in the target repository at ${targetPath}/.
23
+
24
+ Read:
25
+ - paper blueprint: ${paperBlueprintPath}
26
+ - experiment plan: ${experimentPlanPath}
27
+ - coding plan: ${codingPlanPath}
28
+
29
+ Current developer task:
30
+ ${variables.currentTask}
31
+
32
+ Reviewer report:
33
+ ${reviewerReport}
34
+
35
+ Modify the target repository code for the current task. If a reviewer report is present, update the code according to that report.
36
+
37
+ Output a concise developer report with the main changes.
38
+ `;
39
+ }
40
+ }
@@ -0,0 +1,11 @@
1
+ import type { AgentFactoryMap } from "coding-agent-forge";
2
+
3
+ import { DeveloperAgent } from "./developer.js";
4
+ import { CodingManagerAgent } from "./manager.js";
5
+ import { CodeReviewerAgent } from "./reviewer.js";
6
+
7
+ export const agentFactories: AgentFactoryMap = {
8
+ "coding-manager": (thread, constants) => new CodingManagerAgent(thread, constants),
9
+ developer: (thread, constants) => new DeveloperAgent(thread, constants),
10
+ "code-reviewer": (thread, constants) => new CodeReviewerAgent(thread, constants),
11
+ };
@@ -0,0 +1,8 @@
1
+ export { CodingManagerAgent, type CodingManagerVariables } from "./manager.js";
2
+ export { DeveloperAgent, type DeveloperVariables } from "./developer.js";
3
+ export { CodeReviewerAgent, type CodeReviewerVariables } from "./reviewer.js";
4
+ export {
5
+ TrajectoryOptimizerAgent,
6
+ type TrajectoryOptimizerVariables,
7
+ } from "./trajectory-optimizer.js";
8
+ export { agentFactories } from "./factory.js";
@@ -0,0 +1,74 @@
1
+ import { codingStyleSkillInstruction, goalInstruction } from "./prompts.js";
2
+ import { DevelopingAgent, type DevelopingAgentVariables } from "./types.js";
3
+
4
+ type SelectCodingManagerVariables = DevelopingAgentVariables & {
5
+ todoPath: string;
6
+ finishMark: string;
7
+ phase: "select";
8
+ };
9
+
10
+ type UpdateCodingManagerVariables = DevelopingAgentVariables & {
11
+ todoPath: string;
12
+ finishMark: string;
13
+ phase: "update";
14
+ currentTask: string;
15
+ revisionReport: string;
16
+ };
17
+
18
+ export type CodingManagerVariables = SelectCodingManagerVariables | UpdateCodingManagerVariables;
19
+
20
+ export class CodingManagerAgent extends DevelopingAgent<CodingManagerVariables> {
21
+ protected buildPrompt(variables: Readonly<CodingManagerVariables>): string {
22
+ const codingStyleSkillPath = this.workspaceRelativePath(variables.codingStyleSkillPath);
23
+ const targetPath = this.workspaceRelativePath(variables.targetPath);
24
+ const paperBlueprintPath = this.workspaceRelativePath(variables.paperBlueprintPath);
25
+ const experimentPlanPath = this.workspaceRelativePath(variables.experimentPlanPath);
26
+ const codingPlanPath = this.workspaceRelativePath(variables.codingPlanPath);
27
+ const todoPath = this.workspaceRelativePath(variables.todoPath);
28
+ const codingStyleSkillInstructionText = codingStyleSkillInstruction(codingStyleSkillPath);
29
+ const goalInstructionText = goalInstruction(variables.goal);
30
+
31
+ if (variables.phase === "update") {
32
+ return `
33
+ ${codingStyleSkillInstructionText}
34
+
35
+ Update the TODO file after a developer task.
36
+ Work only in the TODO file at ${todoPath}. Scan the target repository at ${targetPath}/ before editing it.
37
+
38
+ Read:
39
+ - paper blueprint: ${paperBlueprintPath}
40
+ - experiment plan: ${experimentPlanPath}
41
+ - coding plan: ${codingPlanPath}
42
+ ${goalInstructionText}
43
+
44
+ Current developer task:
45
+ ${variables.currentTask}
46
+
47
+ Revision report:
48
+ ${variables.revisionReport}
49
+
50
+ The revision report lists each Developer report and Reviewer report from the review loop, ending with whether the Reviewer accepted the changes or the loop reached the max revision iterations.
51
+
52
+ Update the TODO so completed work and future developer tasks match the current repository. If you find a better future plan, update it too.
53
+ `;
54
+ }
55
+
56
+ return `
57
+ ${codingStyleSkillInstructionText}
58
+
59
+ Select the next developer task for the target repository.
60
+ Scan the target repository at ${targetPath}/ and the TODO file at ${todoPath}.
61
+
62
+ Read:
63
+ - paper blueprint: ${paperBlueprintPath}
64
+ - experiment plan: ${experimentPlanPath}
65
+ - coding plan: ${codingPlanPath}
66
+ ${goalInstructionText}
67
+
68
+ Choose exactly one new bounded task for the Developer.
69
+
70
+ When no further developer task is needed, return exactly:
71
+ ${variables.finishMark}
72
+ `;
73
+ }
74
+ }