academic-army 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +9 -0
- package/.github/workflows/publish.yml +44 -0
- package/.prettierrc.json +3 -0
- package/LICENSE +21 -0
- package/README.md +172 -0
- package/README.zh-CN.md +172 -0
- package/agent-forge.yaml +83 -0
- package/eslint.config.js +28 -0
- package/install_mcp.py +85 -0
- package/mcp-server/__main__.py +33 -0
- package/mcp-server/deepresearch/__init__.py +3 -0
- package/mcp-server/deepresearch/tools.py +33 -0
- package/mcp-server/requirements.txt +4 -0
- package/metaskills/README.md +131 -0
- package/metaskills/README.zh-CN.md +131 -0
- package/metaskills/academic-army-architect/METASKILL.md +91 -0
- package/metaskills/academic-army-architect/envolve.sh +9 -0
- package/metaskills/academic-army-coding-plan/ENVOLVETASK.md +1 -0
- package/metaskills/academic-army-coding-plan/METASKILL.md +118 -0
- package/metaskills/academic-army-coding-plan/envolve.sh +9 -0
- package/metaskills/academic-army-coding-style/METASKILL.md +292 -0
- package/metaskills/academic-army-experiment-plan/ENVOLVETASK.md +1 -0
- package/metaskills/academic-army-experiment-plan/METASKILL.md +82 -0
- package/metaskills/academic-army-experiment-plan/envolve.sh +9 -0
- package/metaskills/academic-army-repo-scaffold/ENVOLVETASK.md +1 -0
- package/metaskills/academic-army-repo-scaffold/METASKILL.md +223 -0
- package/metaskills/academic-army-repo-scaffold/envolve.sh +9 -0
- package/package.json +35 -0
- package/runs/develop-skill.sh +17 -0
- package/runs/develop.sh +16 -0
- package/skills/academic-army-architect/SKILL.md +336 -0
- package/skills/academic-army-architect/agents/openai.yaml +11 -0
- package/skills/academic-army-architect/references/blueprint-schema.md +345 -0
- package/skills/academic-army-coding-plan/SKILL.md +491 -0
- package/skills/academic-army-coding-plan/agents/openai.yaml +11 -0
- package/skills/academic-army-coding-style/SKILL.md +915 -0
- package/skills/academic-army-coding-style/agents/openai.yaml +11 -0
- package/skills/academic-army-experiment-plan/SKILL.md +517 -0
- package/skills/academic-army-experiment-plan/agents/openai.yaml +11 -0
- package/skills/academic-army-repo-scaffold/SKILL.md +756 -0
- package/skills/academic-army-repo-scaffold/agents/openai.yaml +10 -0
- package/src/README.md +79 -0
- package/src/README.zh-CN.md +79 -0
- package/src/cli.ts +55 -0
- package/src/developing/README.md +146 -0
- package/src/developing/README.zh-CN.md +146 -0
- package/src/developing/agents/developer.ts +40 -0
- package/src/developing/agents/factory.ts +11 -0
- package/src/developing/agents/index.ts +8 -0
- package/src/developing/agents/manager.ts +74 -0
- package/src/developing/agents/prompts.ts +12 -0
- package/src/developing/agents/reviewer.ts +44 -0
- package/src/developing/agents/trajectory-optimizer.ts +70 -0
- package/src/developing/agents/types.ts +41 -0
- package/src/developing/index.ts +2 -0
- package/src/developing/pipeline.ts +306 -0
- package/src/developing/pipelineskill.ts +169 -0
- package/src/evolve-skill/README.md +116 -0
- package/src/evolve-skill/README.zh-CN.md +116 -0
- package/src/evolve-skill/agents/evaluator.ts +28 -0
- package/src/evolve-skill/agents/factory.ts +11 -0
- package/src/evolve-skill/agents/index.ts +4 -0
- package/src/evolve-skill/agents/modifier.ts +27 -0
- package/src/evolve-skill/agents/runner.ts +19 -0
- package/src/evolve-skill/index.ts +1 -0
- package/src/evolve-skill/pipeline.ts +140 -0
- package/src/pipeline.ts +65 -0
- package/tsconfig.json +22 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Evolve Skill Pipeline
|
|
2
|
+
|
|
3
|
+
[`src/evolve-skill`](.) implements the self-evolution loop used by the metaskill evolution scripts described in [`../../metaskills/README.md`](../../metaskills/README.md). It is for improving an existing skill by repeatedly testing it on a fixed task, evaluating the produced artifact, and applying targeted revisions.
|
|
4
|
+
|
|
5
|
+
[中文说明](README.zh-CN.md)
|
|
6
|
+
|
|
7
|
+
For the overall TypeScript pipeline usage and entry points, see [`src/README.md`](../README.md).
|
|
8
|
+
|
|
9
|
+
For the user-facing optimization workflow, see [`../../metaskills/README.md`](../../metaskills/README.md).
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
When a skill output is weak, add concrete tips to the matching metaskill file, then run that skill's evolution script from the repository root. See [`../../metaskills/README.md`](../../metaskills/README.md) for the prepared scripts and path mapping.
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bash metaskills/academic-army-architect/envolve.sh
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Direct Command
|
|
20
|
+
|
|
21
|
+
Run the pipeline directly from the repository root:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npm run evolve-skill -- \
|
|
25
|
+
--config agent-forge.yaml \
|
|
26
|
+
--skill-path skills/academic-army-architect \
|
|
27
|
+
--artifact-path output/evolve-academic-army-architect \
|
|
28
|
+
--metaskill-path metaskills/academic-army-architect/METASKILL.md \
|
|
29
|
+
--task-path metaskills/academic-army-architect/ENVOLVETASK.md
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Options Reference
|
|
33
|
+
|
|
34
|
+
Required arguments:
|
|
35
|
+
|
|
36
|
+
| Option | Description |
|
|
37
|
+
|---|---|
|
|
38
|
+
| `--skill-path` | The skill directory or file to revise. |
|
|
39
|
+
| `--artifact-path` | The output folder cleared and reused by each runner round. |
|
|
40
|
+
| `--metaskill-path` | The metaskill design document used by evaluator and modifier. |
|
|
41
|
+
| `--task-path` | The fixed task used by the runner to test the skill. Repeat this option to run multiple fixed tasks per round. |
|
|
42
|
+
|
|
43
|
+
Optional arguments:
|
|
44
|
+
|
|
45
|
+
```text
|
|
46
|
+
--rounds 5
|
|
47
|
+
Number of self-evolve rounds to run.
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
`--rounds` defaults to `3`.
|
|
51
|
+
|
|
52
|
+
## Main Flow
|
|
53
|
+
|
|
54
|
+
[`pipeline.ts`](pipeline.ts) parses:
|
|
55
|
+
|
|
56
|
+
- `--skill-path`: the skill directory or file to revise.
|
|
57
|
+
- `--artifact-path`: the output directory cleared and reused each round.
|
|
58
|
+
- `--metaskill-path`: the design document and tips used to judge and revise the skill.
|
|
59
|
+
- `--task-path`: one or more fixed tasks used to test the skill.
|
|
60
|
+
- `--rounds`: the number of self-evolve rounds, defaulting to `3`.
|
|
61
|
+
|
|
62
|
+
Each round does the following:
|
|
63
|
+
|
|
64
|
+
1. Clear and recreate `--artifact-path`.
|
|
65
|
+
2. For each configured `--task-path`, create a fresh `skill-runner` agent to run the target skill on that fixed task and write artifacts.
|
|
66
|
+
3. Use `skill-evaluator` to evaluate the artifact against the current metaskill guidance file.
|
|
67
|
+
4. Pass the evaluator review to `skill-modifier`, which revises the target skill using the same metaskill guidance.
|
|
68
|
+
|
|
69
|
+
The fresh runner keeps each artifact independent from earlier runner context. The evaluator and modifier are invoked through the shared `AgentTeam` so their configured agent behavior is centralized in the pipeline config.
|
|
70
|
+
|
|
71
|
+
## Loop Behavior
|
|
72
|
+
|
|
73
|
+
The loop keeps skill development grounded in concrete output. Instead of rewriting a skill from vague impressions, it tests the skill on a stable task, evaluates the resulting artifact against the metaskill, and then asks Codex to revise the skill based on concrete feedback.
|
|
74
|
+
|
|
75
|
+
The pipeline keeps two long-lived Codex threads through the shared team:
|
|
76
|
+
|
|
77
|
+
1. `skill-evaluator`: reviews artifacts across rounds.
|
|
78
|
+
2. `skill-modifier`: edits the target skill across rounds.
|
|
79
|
+
|
|
80
|
+
Each round also creates a fresh one-time `skill-runner` thread. The runner has no memory from previous rounds, so earlier artifacts do not pollute the next run.
|
|
81
|
+
|
|
82
|
+
The loop deliberately stays simple:
|
|
83
|
+
|
|
84
|
+
1. A fresh runner thread runs the target skill for each configured task and writes artifacts to an output folder.
|
|
85
|
+
2. A long-lived evaluator thread reviews the artifact using the metaskill.
|
|
86
|
+
3. A long-lived modifier thread edits the skill according to the evaluator feedback.
|
|
87
|
+
4. The next round starts with a fresh runner thread.
|
|
88
|
+
|
|
89
|
+
This avoids LangGraph, state machines, registries, and defensive wrapper code. The important state lives in the long-lived evaluator/modifier Codex sessions, the current artifact folder, and the files being revised.
|
|
90
|
+
|
|
91
|
+
## Inputs And Outputs
|
|
92
|
+
|
|
93
|
+
| Item | Path source |
|
|
94
|
+
|---|---|
|
|
95
|
+
| Target skill | `--skill-path` |
|
|
96
|
+
| Metaskill | `--metaskill-path` |
|
|
97
|
+
| Fixed task | `--task-path` |
|
|
98
|
+
| Generated artifact folder | `--artifact-path`, cleared and reused each round |
|
|
99
|
+
|
|
100
|
+
## Important Files
|
|
101
|
+
|
|
102
|
+
| Path | Purpose |
|
|
103
|
+
|---|---|
|
|
104
|
+
| [`pipeline.ts`](pipeline.ts) | Argument parsing and round orchestration. |
|
|
105
|
+
| [`agents/factory.ts`](agents/factory.ts) | Registers `skill-runner`, `skill-evaluator`, and `skill-modifier`. |
|
|
106
|
+
| [`agents/runner.ts`](agents/runner.ts) | Reads each fixed task file configured by `--task-path` and asks the target skill to write artifacts. |
|
|
107
|
+
| [`agents/evaluator.ts`](agents/evaluator.ts) | Reads the metaskill file configured by `--metaskill-path` and critiques the produced artifact. |
|
|
108
|
+
| [`agents/modifier.ts`](agents/modifier.ts) | Reads the metaskill file and the evaluator review, then revises the target skill. |
|
|
109
|
+
|
|
110
|
+
## Troubleshooting
|
|
111
|
+
|
|
112
|
+
| Problem | Likely cause | Fix |
|
|
113
|
+
|---|---|---|
|
|
114
|
+
| Artifacts disappear between rounds | `--artifact-path` is cleared and reused each runner round. | Use a dedicated `output/evolve-*` folder. |
|
|
115
|
+
| The output still feels weak | The loop needs concrete metaskill guidance. | Add concrete tips to the matching metaskill file and run the script again. |
|
|
116
|
+
| Runner context seems to influence results | The runner should be fresh each round. | Check the pipeline config and archive the generated artifacts for comparison. |
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Evolve Skill Pipeline
|
|
2
|
+
|
|
3
|
+
[`src/evolve-skill`](.) 实现 [`../../metaskills/README.zh-CN.md`](../../metaskills/README.zh-CN.md) 中说明的 metaskill evolution scripts 所使用的 self-evolution loop。它用于优化已有 skill:反复在固定任务上测试该 skill,评价产出的 artifact,并根据评价进行有针对性的修改。
|
|
4
|
+
|
|
5
|
+
[English README](README.md)
|
|
6
|
+
|
|
7
|
+
TypeScript pipeline 的整体用法和入口见 [`src/README.zh-CN.md`](../README.zh-CN.md)。
|
|
8
|
+
|
|
9
|
+
面向用户的优化流程见 [`../../metaskills/README.zh-CN.md`](../../metaskills/README.zh-CN.md)。
|
|
10
|
+
|
|
11
|
+
## 快速开始
|
|
12
|
+
|
|
13
|
+
当某个 skill 的输出不理想时,先把具体 tips 加到对应的 metaskill 文件,然后在仓库根目录运行该 skill 的 evolution 脚本。预设脚本和路径对应关系见 [`../../metaskills/README.zh-CN.md`](../../metaskills/README.zh-CN.md)。
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bash metaskills/academic-army-architect/envolve.sh
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 直接命令
|
|
20
|
+
|
|
21
|
+
可以在仓库根目录直接运行 pipeline:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npm run evolve-skill -- \
|
|
25
|
+
--config agent-forge.yaml \
|
|
26
|
+
--skill-path skills/academic-army-architect \
|
|
27
|
+
--artifact-path output/evolve-academic-army-architect \
|
|
28
|
+
--metaskill-path metaskills/academic-army-architect/METASKILL.md \
|
|
29
|
+
--task-path metaskills/academic-army-architect/ENVOLVETASK.md
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 参数参考
|
|
33
|
+
|
|
34
|
+
必填参数:
|
|
35
|
+
|
|
36
|
+
| 参数 | 说明 |
|
|
37
|
+
|---|---|
|
|
38
|
+
| `--skill-path` | 要修改的 skill 目录或文件。 |
|
|
39
|
+
| `--artifact-path` | 每轮 runner 清空并复用的输出文件夹。 |
|
|
40
|
+
| `--metaskill-path` | evaluator 和 modifier 使用的 metaskill 设计文档。 |
|
|
41
|
+
| `--task-path` | runner 用来测试 skill 的固定任务文件;可重复传入多个固定任务。 |
|
|
42
|
+
|
|
43
|
+
可选参数:
|
|
44
|
+
|
|
45
|
+
```text
|
|
46
|
+
--rounds 5
|
|
47
|
+
要运行的 self-evolve 轮数。
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
`--rounds` 默认是 `3`。
|
|
51
|
+
|
|
52
|
+
## 主流程
|
|
53
|
+
|
|
54
|
+
[`pipeline.ts`](pipeline.ts) 解析以下参数:
|
|
55
|
+
|
|
56
|
+
- `--skill-path`:要修改的 skill 目录或文件。
|
|
57
|
+
- `--artifact-path`:每轮都会清空并复用的输出目录。
|
|
58
|
+
- `--metaskill-path`:用于评价和修改 skill 的设计文档与 tips。
|
|
59
|
+
- `--task-path`:用于测试 skill 的一个或多个固定任务。
|
|
60
|
+
- `--rounds`:self-evolve 轮数,默认是 `3`。
|
|
61
|
+
|
|
62
|
+
每一轮执行以下步骤:
|
|
63
|
+
|
|
64
|
+
1. 清空并重新创建 `--artifact-path`。
|
|
65
|
+
2. 对每个配置的 `--task-path` 创建新的 `skill-runner` agent,让目标 skill 在该固定任务上运行并写出 artifacts。
|
|
66
|
+
3. 使用 `skill-evaluator` 根据当前 metaskill 指导文件评价 artifact。
|
|
67
|
+
4. 把 evaluator review 交给 `skill-modifier`,让它基于同一份 metaskill 指导修改目标 skill。
|
|
68
|
+
|
|
69
|
+
新建 runner 可以避免前一轮 runner 上下文污染下一轮 artifact。evaluator 和 modifier 通过共享的 `AgentTeam` 调用,因此它们的 agent 配置集中在 pipeline config 中。
|
|
70
|
+
|
|
71
|
+
## Loop 行为
|
|
72
|
+
|
|
73
|
+
这个 loop 让 skill 的迭代基于具体产物,而不是凭模糊感觉重写 skill。它会让 skill 在一个固定任务上产出 artifact,再根据 metaskill 检查这个 artifact,最后让 Codex 根据具体反馈修改 skill。
|
|
74
|
+
|
|
75
|
+
pipeline 通过共享 team 保留两个长生命周期 Codex thread:
|
|
76
|
+
|
|
77
|
+
1. `skill-evaluator`:跨轮次评价 artifact。
|
|
78
|
+
2. `skill-modifier`:跨轮次修改目标 skill。
|
|
79
|
+
|
|
80
|
+
每一轮还会新建一个一次性的 `skill-runner` thread。runner 不保留上一轮上下文,避免旧 artifact 或旧对话污染下一轮输出。
|
|
81
|
+
|
|
82
|
+
整个 loop 保持简单:
|
|
83
|
+
|
|
84
|
+
1. 针对每个配置的 task 新建一次性的 runner thread,运行目标 skill,并把产物写入输出文件夹。
|
|
85
|
+
2. 长生命周期的 evaluator thread 根据 metaskill 评价 artifact。
|
|
86
|
+
3. 长生命周期的 modifier thread 根据评价修改 skill。
|
|
87
|
+
4. 下一轮重新新建 runner thread。
|
|
88
|
+
|
|
89
|
+
这里刻意不引入 LangGraph、状态机、registry 或复杂的 defensive wrapper。关键状态只保留在 evaluator/modifier 两个长期 Codex session、当前 artifact 文件夹,以及被修改的文件里。
|
|
90
|
+
|
|
91
|
+
## 输入和输出
|
|
92
|
+
|
|
93
|
+
| 项目 | 路径来源 |
|
|
94
|
+
|---|---|
|
|
95
|
+
| 目标 skill | `--skill-path` |
|
|
96
|
+
| Metaskill | `--metaskill-path` |
|
|
97
|
+
| 固定任务 | `--task-path` |
|
|
98
|
+
| 生成 artifact 文件夹 | `--artifact-path`,每轮清空并复用 |
|
|
99
|
+
|
|
100
|
+
## 重要文件
|
|
101
|
+
|
|
102
|
+
| 路径 | 作用 |
|
|
103
|
+
|---|---|
|
|
104
|
+
| [`pipeline.ts`](pipeline.ts) | 参数解析和轮次编排。 |
|
|
105
|
+
| [`agents/factory.ts`](agents/factory.ts) | 注册 `skill-runner`、`skill-evaluator` 和 `skill-modifier`。 |
|
|
106
|
+
| [`agents/runner.ts`](agents/runner.ts) | 读取每个 `--task-path` 配置的固定任务文件,并要求目标 skill 写出 artifacts。 |
|
|
107
|
+
| [`agents/evaluator.ts`](agents/evaluator.ts) | 读取 `--metaskill-path` 配置的 metaskill 文件,并评价产出的 artifact。 |
|
|
108
|
+
| [`agents/modifier.ts`](agents/modifier.ts) | 读取 metaskill 文件和 evaluator review,然后修改目标 skill。 |
|
|
109
|
+
|
|
110
|
+
## 常见问题
|
|
111
|
+
|
|
112
|
+
| 问题 | 常见原因 | 解决办法 |
|
|
113
|
+
|---|---|---|
|
|
114
|
+
| Artifacts 在轮次之间消失 | `--artifact-path` 每轮都会清空并复用。 | 使用专门的 `output/evolve-*` 文件夹。 |
|
|
115
|
+
| 输出仍然不理想 | Loop 需要具体 metaskill guidance。 | 把具体 tips 加到对应 metaskill 文件,然后再次运行脚本。 |
|
|
116
|
+
| Runner 上下文似乎影响结果 | Runner 应该每轮新建。 | 检查 pipeline config,并归档生成 artifacts 方便比较。 |
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Agent } from "coding-agent-forge/agent";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
|
|
4
|
+
export type SkillEvaluatorVariables = {
|
|
5
|
+
artifactPath: string;
|
|
6
|
+
metaskillPath: string;
|
|
7
|
+
taskDescriptions: string;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export class SkillEvaluatorAgent extends Agent<SkillEvaluatorVariables> {
|
|
11
|
+
protected buildPrompt(variables: Readonly<SkillEvaluatorVariables>): string {
|
|
12
|
+
const metaskill = readFileSync(variables.metaskillPath, "utf8");
|
|
13
|
+
return `
|
|
14
|
+
Evaluate the artifact at ${variables.artifactPath}. It was produced by a skill.
|
|
15
|
+
|
|
16
|
+
The artifacts were created based on the following task descriptions:
|
|
17
|
+
|
|
18
|
+
${variables.taskDescriptions}
|
|
19
|
+
|
|
20
|
+
The metaskill below contains the design goals and tips of this skill:
|
|
21
|
+
|
|
22
|
+
${metaskill}
|
|
23
|
+
|
|
24
|
+
Based on these goals and tips, are there any problems in the artifact produced by this skill? Are there any redundant parts?
|
|
25
|
+
Carefully inspect both the language and the content, and use that analysis to explain how this skill can be optimized.
|
|
26
|
+
`;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { AgentFactoryMap } from "coding-agent-forge";
|
|
2
|
+
|
|
3
|
+
import { SkillEvaluatorAgent } from "./evaluator.js";
|
|
4
|
+
import { SkillModifierAgent } from "./modifier.js";
|
|
5
|
+
import { SkillRunnerAgent } from "./runner.js";
|
|
6
|
+
|
|
7
|
+
export const agentFactories: AgentFactoryMap = {
|
|
8
|
+
"skill-runner": (thread, constants) => new SkillRunnerAgent(thread, constants),
|
|
9
|
+
"skill-evaluator": (thread, constants) => new SkillEvaluatorAgent(thread, constants),
|
|
10
|
+
"skill-modifier": (thread, constants) => new SkillModifierAgent(thread, constants),
|
|
11
|
+
};
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { agentFactories } from "./factory.js";
|
|
2
|
+
export { SkillEvaluatorAgent, type SkillEvaluatorVariables } from "./evaluator.js";
|
|
3
|
+
export { SkillModifierAgent, type SkillModifierVariables } from "./modifier.js";
|
|
4
|
+
export { SkillRunnerAgent, type SkillRunnerVariables } from "./runner.js";
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Agent } from "coding-agent-forge/agent";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
|
|
4
|
+
export type SkillModifierVariables = {
|
|
5
|
+
skillPath: string;
|
|
6
|
+
metaskillPath: string;
|
|
7
|
+
review: string;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export class SkillModifierAgent extends Agent<SkillModifierVariables> {
|
|
11
|
+
protected buildPrompt(variables: Readonly<SkillModifierVariables>): string {
|
|
12
|
+
const metaskill = readFileSync(variables.metaskillPath, "utf8");
|
|
13
|
+
return `
|
|
14
|
+
Revise the skill at ${variables.skillPath} using the feedback below. The feedback is based on an artifact produced by this skill.
|
|
15
|
+
|
|
16
|
+
The metaskill below contains the design goals and tips of this skill:
|
|
17
|
+
|
|
18
|
+
${metaskill}
|
|
19
|
+
|
|
20
|
+
Consider these design goals and tips when revising.
|
|
21
|
+
|
|
22
|
+
Feedback:
|
|
23
|
+
|
|
24
|
+
${variables.review}
|
|
25
|
+
`;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Agent } from "coding-agent-forge/agent";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
|
|
4
|
+
export type SkillRunnerVariables = {
|
|
5
|
+
skillPath: string;
|
|
6
|
+
artifactPath: string;
|
|
7
|
+
taskPath: string;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export class SkillRunnerAgent extends Agent<SkillRunnerVariables> {
|
|
11
|
+
protected buildPrompt(variables: Readonly<SkillRunnerVariables>): string {
|
|
12
|
+
const task = readFileSync(variables.taskPath, "utf8");
|
|
13
|
+
return `
|
|
14
|
+
Use the skill at ${variables.skillPath} to complete the task below. Save all relevant output files in ${variables.artifactPath}.
|
|
15
|
+
|
|
16
|
+
${task}
|
|
17
|
+
`;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./pipeline.js";
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { AgentTeam, type RecordCallback } from "coding-agent-forge";
|
|
2
|
+
import { mkdir, readFile, rm } from "node:fs/promises";
|
|
3
|
+
import { parseArgs } from "node:util";
|
|
4
|
+
import { definePipeline } from "../pipeline.js";
|
|
5
|
+
import type { ParsedPipelineArgs } from "../pipeline.js";
|
|
6
|
+
import { agentFactories } from "./agents/index.js";
|
|
7
|
+
import type {
|
|
8
|
+
SkillEvaluatorVariables,
|
|
9
|
+
SkillModifierVariables,
|
|
10
|
+
SkillRunnerVariables,
|
|
11
|
+
} from "./agents/index.js";
|
|
12
|
+
|
|
13
|
+
export type EvolveSkillAgentVariables = {
|
|
14
|
+
"skill-runner": SkillRunnerVariables;
|
|
15
|
+
"skill-evaluator": SkillEvaluatorVariables;
|
|
16
|
+
"skill-modifier": SkillModifierVariables;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export type EvolveSkillOptions = {
|
|
20
|
+
skillPath: string;
|
|
21
|
+
artifactPath: string;
|
|
22
|
+
metaskillPath: string;
|
|
23
|
+
taskPaths: readonly string[];
|
|
24
|
+
rounds: number;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const USAGE =
|
|
28
|
+
"Usage: npm run evolve-skill -- --config <path> --skill-path <path> --artifact-path <folder> --metaskill-path <path> --task-path <path> [--task-path <path> ...] [--rounds <positive-integer>]";
|
|
29
|
+
|
|
30
|
+
export function parseEvolveSkillArgs(
|
|
31
|
+
args: readonly string[],
|
|
32
|
+
): ParsedPipelineArgs<EvolveSkillOptions> {
|
|
33
|
+
const {
|
|
34
|
+
values: {
|
|
35
|
+
config,
|
|
36
|
+
"skill-path": skillPath,
|
|
37
|
+
"artifact-path": artifactPath,
|
|
38
|
+
"metaskill-path": metaskillPath,
|
|
39
|
+
"task-path": taskPath,
|
|
40
|
+
rounds,
|
|
41
|
+
},
|
|
42
|
+
} = parseArgs({
|
|
43
|
+
args: [...args],
|
|
44
|
+
options: {
|
|
45
|
+
config: { type: "string", multiple: true },
|
|
46
|
+
"skill-path": { type: "string" },
|
|
47
|
+
"artifact-path": { type: "string" },
|
|
48
|
+
"metaskill-path": { type: "string" },
|
|
49
|
+
"task-path": { type: "string", multiple: true },
|
|
50
|
+
rounds: { type: "string" },
|
|
51
|
+
},
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
if (
|
|
55
|
+
config === undefined ||
|
|
56
|
+
skillPath === undefined ||
|
|
57
|
+
artifactPath === undefined ||
|
|
58
|
+
metaskillPath === undefined ||
|
|
59
|
+
taskPath === undefined
|
|
60
|
+
) {
|
|
61
|
+
throw new Error(USAGE);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
configPaths: config,
|
|
66
|
+
runningOptions: {
|
|
67
|
+
skillPath,
|
|
68
|
+
artifactPath,
|
|
69
|
+
metaskillPath,
|
|
70
|
+
taskPaths: taskPath,
|
|
71
|
+
rounds: Number(rounds ?? 3),
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export async function evolveSkill(
|
|
77
|
+
team: AgentTeam<EvolveSkillAgentVariables>,
|
|
78
|
+
options: EvolveSkillOptions,
|
|
79
|
+
): Promise<void> {
|
|
80
|
+
const logRecord: RecordCallback = (thread, record) => {
|
|
81
|
+
console.log(thread.recordToPrettyString(record));
|
|
82
|
+
};
|
|
83
|
+
const taskDescriptions = (
|
|
84
|
+
await Promise.all(
|
|
85
|
+
options.taskPaths.map(async (taskPath, index) => {
|
|
86
|
+
const task = await readFile(taskPath, "utf8");
|
|
87
|
+
return `Task ${String(index + 1)}: ${task}`;
|
|
88
|
+
}),
|
|
89
|
+
)
|
|
90
|
+
).join("\n\n");
|
|
91
|
+
|
|
92
|
+
for (let round = 1; round <= options.rounds; round++) {
|
|
93
|
+
await rm(options.artifactPath, { recursive: true, force: true });
|
|
94
|
+
await mkdir(options.artifactPath, { recursive: true });
|
|
95
|
+
|
|
96
|
+
for (const taskPath of options.taskPaths) {
|
|
97
|
+
const runner = await team.createAgent("skill-runner");
|
|
98
|
+
await runner.runStreamed(
|
|
99
|
+
{
|
|
100
|
+
skillPath: options.skillPath,
|
|
101
|
+
artifactPath: options.artifactPath,
|
|
102
|
+
taskPath,
|
|
103
|
+
},
|
|
104
|
+
logRecord,
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const review = (
|
|
109
|
+
await team.runStreamed(
|
|
110
|
+
"skill-evaluator",
|
|
111
|
+
{
|
|
112
|
+
artifactPath: options.artifactPath,
|
|
113
|
+
metaskillPath: options.metaskillPath,
|
|
114
|
+
taskDescriptions,
|
|
115
|
+
},
|
|
116
|
+
logRecord,
|
|
117
|
+
)
|
|
118
|
+
).trim();
|
|
119
|
+
|
|
120
|
+
console.log(`\n# Review\n${review}\n`);
|
|
121
|
+
|
|
122
|
+
const edit = await team.runStreamed(
|
|
123
|
+
"skill-modifier",
|
|
124
|
+
{
|
|
125
|
+
skillPath: options.skillPath,
|
|
126
|
+
metaskillPath: options.metaskillPath,
|
|
127
|
+
review,
|
|
128
|
+
},
|
|
129
|
+
logRecord,
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
console.log(`# Edit\n${edit}\n`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export const evolveSkillPipeline = definePipeline({
|
|
137
|
+
agentFactories,
|
|
138
|
+
parseArgs: parseEvolveSkillArgs,
|
|
139
|
+
run: evolveSkill,
|
|
140
|
+
});
|
package/src/pipeline.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AgentTeam,
|
|
3
|
+
isPlainObject,
|
|
4
|
+
loadYamls,
|
|
5
|
+
mergeConfig,
|
|
6
|
+
type AgentFactoryMap,
|
|
7
|
+
type AgentVariablesByName,
|
|
8
|
+
type PlainObject,
|
|
9
|
+
} from "coding-agent-forge";
|
|
10
|
+
|
|
11
|
+
export type ParsedPipelineArgs<Options> = {
|
|
12
|
+
configPaths: readonly string[];
|
|
13
|
+
runningOptions: Options;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type PipelineDefinition<VariablesByName extends AgentVariablesByName, Options> = {
|
|
17
|
+
agentFactories: AgentFactoryMap;
|
|
18
|
+
parseArgs: (args: readonly string[]) => ParsedPipelineArgs<Options>;
|
|
19
|
+
run: (team: AgentTeam<VariablesByName>, options: Options) => Promise<void>;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export function definePipeline<VariablesByName extends AgentVariablesByName, Options>(
|
|
23
|
+
definition: PipelineDefinition<VariablesByName, Options>,
|
|
24
|
+
): PipelineDefinition<VariablesByName, Options> {
|
|
25
|
+
return definition;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function buildPipelineAgentTeam<VariablesByName extends AgentVariablesByName, Options>(
|
|
29
|
+
rawConfig: PlainObject,
|
|
30
|
+
definition: PipelineDefinition<VariablesByName, Options>,
|
|
31
|
+
): AgentTeam<VariablesByName> {
|
|
32
|
+
if (!isPlainObject(rawConfig.agents)) {
|
|
33
|
+
throw new Error("Config must define an agents object");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const configuredAgents = Object.fromEntries(
|
|
37
|
+
Object.entries(rawConfig.agents).filter(([name]) =>
|
|
38
|
+
Object.hasOwn(definition.agentFactories, name),
|
|
39
|
+
),
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
const agents = Object.fromEntries(
|
|
43
|
+
Object.keys(definition.agentFactories).map((name) => [name, { kind: name }]),
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
return new AgentTeam<VariablesByName>(
|
|
47
|
+
mergeConfig({ ...rawConfig, agents: configuredAgents }, { agents }),
|
|
48
|
+
definition.agentFactories,
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export async function runPipelineCli<VariablesByName extends AgentVariablesByName, Options>(
|
|
53
|
+
definition: PipelineDefinition<VariablesByName, Options>,
|
|
54
|
+
args: readonly string[],
|
|
55
|
+
): Promise<void> {
|
|
56
|
+
const { configPaths, runningOptions } = definition.parseArgs(args);
|
|
57
|
+
const rawConfig = await loadYamls(...configPaths);
|
|
58
|
+
const team = buildPipelineAgentTeam(rawConfig, definition);
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
await definition.run(team, runningOptions);
|
|
62
|
+
} finally {
|
|
63
|
+
await team.close();
|
|
64
|
+
}
|
|
65
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"strict": true,
|
|
7
|
+
"declaration": true,
|
|
8
|
+
"declarationMap": true,
|
|
9
|
+
"sourceMap": true,
|
|
10
|
+
"outDir": "dist",
|
|
11
|
+
"rootDir": "src",
|
|
12
|
+
"exactOptionalPropertyTypes": true,
|
|
13
|
+
"noFallthroughCasesInSwitch": true,
|
|
14
|
+
"noImplicitReturns": true,
|
|
15
|
+
"noUncheckedIndexedAccess": true,
|
|
16
|
+
"noUnusedLocals": true,
|
|
17
|
+
"noUnusedParameters": true,
|
|
18
|
+
"skipLibCheck": true,
|
|
19
|
+
"types": ["node"]
|
|
20
|
+
},
|
|
21
|
+
"include": ["src/**/*.ts"]
|
|
22
|
+
}
|