scientify 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/src/hooks/inject-skill.d.ts +11 -0
- package/dist/src/hooks/inject-skill.d.ts.map +1 -0
- package/dist/src/hooks/inject-skill.js +70 -0
- package/dist/src/hooks/inject-skill.js.map +1 -0
- package/package.json +4 -4
- package/skills/idea-generation/SKILL.md +1 -1
- package/skills/install-scientify/SKILL.md +1 -1
- package/skills/literature-survey/SKILL.md +77 -12
- package/skills/research-experiment/SKILL.md +71 -5
- package/skills/research-implement/SKILL.md +1 -1
- package/skills/research-pipeline/SKILL.md +101 -59
- package/skills/research-plan/SKILL.md +21 -7
- package/skills/research-review/SKILL.md +86 -21
- package/skills/research-survey/SKILL.md +26 -8
- package/skills/write-review-paper/SKILL.md +1 -1
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAkBlD,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAG,EAAE,iBAAiB,QAiFtD"}
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import path from "node:path";
|
|
1
2
|
import { handleResearchStatus, handlePapers, handleIdeas, handleProjects, handleProjectSwitch, handleProjectDelete, } from "./src/commands.js";
|
|
2
3
|
import { createArxivSearchTool } from "./src/tools/arxiv-search.js";
|
|
3
4
|
import { createArxivDownloadTool } from "./src/tools/arxiv-download.js";
|
|
4
5
|
import { createGithubSearchTool } from "./src/tools/github-search-tool.js";
|
|
5
6
|
import { createAutoUpdaterService } from "./src/services/auto-updater.js";
|
|
7
|
+
import { createSkillInjectionHook } from "./src/hooks/inject-skill.js";
|
|
6
8
|
// Default: check every hour
|
|
7
9
|
const UPDATE_CHECK_INTERVAL_MS = 60 * 60 * 1000;
|
|
8
10
|
export default function register(api) {
|
|
@@ -67,6 +69,12 @@ export default function register(api) {
|
|
|
67
69
|
requireAuth: true, // Require auth for destructive operation
|
|
68
70
|
handler: handleProjectDelete,
|
|
69
71
|
});
|
|
72
|
+
// Inject SKILL.md content into sessions_spawn tasks.
|
|
73
|
+
// Sub-agents run in "minimal" prompt mode and don't see <available_skills>,
|
|
74
|
+
// so this hook reads the matching SKILL.md and embeds it in the task body.
|
|
75
|
+
// api.source = entry file path (e.g. dist/index.js); findPluginRoot() walks
|
|
76
|
+
// up to locate openclaw.plugin.json, which is always at the plugin root.
|
|
77
|
+
api.on("before_tool_call", createSkillInjectionHook(path.dirname(api.source)));
|
|
70
78
|
api.logger.info("Scientify plugin loaded successfully");
|
|
71
79
|
}
|
|
72
80
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,YAAY,EACZ,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,gCAAgC,CAAC;AAC1E,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAEvE,4BAA4B;AAC5B,MAAM,wBAAwB,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAsB;IACrD,iBAAiB;IACjB,GAAG,CAAC,YAAY,CAAC,qBAAqB,EAAE,CAAC,CAAC;IAC1C,GAAG,CAAC,YAAY,CAAC,uBAAuB,EAAE,CAAC,CAAC;IAC5C,GAAG,CAAC,YAAY,CAAC,sBAAsB,EAAE,CAAC,CAAC;IAE3C,iDAAiD;IACjD,MAAM,YAAY,GAAG,GAAG,CAAC,YAAoD,CAAC;IAC9E,MAAM,iBAAiB,GAAG,YAAY,EAAE,UAAU,KAAK,KAAK,CAAC,CAAC,qBAAqB;IAEnF,IAAI,iBAAiB,EAAE,CAAC;QACtB,GAAG,CAAC,eAAe,CACjB,wBAAwB,CAAC;YACvB,WAAW,EAAE,WAAW;YACxB,eAAe,EAAE,wBAAwB;YACzC,MAAM,EAAE;gBACN,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;gBACnC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;gBACnC,KAAK,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC;aACxC;SACF,CAAC,CACH,CAAC;IACJ,CAAC;IAED,sCAAsC;IACtC,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,gEAAgE;QAC7E,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,oBAAoB;KAC9B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,0CAA0C;QACvD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,YAAY;KACtB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,WAAW;KACrB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,cAAc;KACxB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,oDAAoD;QACjE,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,IAAI,EAAE,yCAAyC;QAC5D,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,qDAAqD;IACrD,4EAA4E;IAC5E,2EAA2E;IAC3E,4EAA4E;IAC5E,yEAAyE;IACzE,GAAG,CAAC,EAAE,CAAC,kBAAkB,EAAE,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAE/E,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;AAC1D,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export declare function createSkillInjectionHook(entryFileDir: string): (event: {
|
|
2
|
+
toolName: string;
|
|
3
|
+
params: Record<string, unknown>;
|
|
4
|
+
}, _ctx: {
|
|
5
|
+
agentId?: string;
|
|
6
|
+
sessionKey?: string;
|
|
7
|
+
toolName: string;
|
|
8
|
+
}) => Promise<{
|
|
9
|
+
params: Record<string, unknown>;
|
|
10
|
+
} | void>;
|
|
11
|
+
//# sourceMappingURL=inject-skill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inject-skill.d.ts","sourceRoot":"","sources":["../../../src/hooks/inject-skill.ts"],"names":[],"mappings":"AAgCA,wBAAgB,wBAAwB,CAAC,YAAY,EAAE,MAAM,IAKzD,OAAO;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,EAC5D,MAAM;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,KAChE,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,GAAG,IAAI,CAAC,CAqCvD"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import fsSync from "node:fs";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
/**
|
|
5
|
+
* Build a before_tool_call hook that injects SKILL.md content into
|
|
6
|
+
* sessions_spawn task parameters.
|
|
7
|
+
*
|
|
8
|
+
* When the orchestrator spawns a sub-agent with task="/research-survey\n...",
|
|
9
|
+
* this hook reads the matching SKILL.md and appends its body to the task,
|
|
10
|
+
* so the sub-agent receives full workflow instructions even though its
|
|
11
|
+
* system prompt runs in "minimal" mode (no <available_skills> section).
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Find the plugin root by walking up from a starting directory until
|
|
15
|
+
* we find openclaw.plugin.json (the canonical plugin manifest).
|
|
16
|
+
* Falls back to the starting directory if not found.
|
|
17
|
+
*/
|
|
18
|
+
function findPluginRoot(startDir) {
|
|
19
|
+
let dir = startDir;
|
|
20
|
+
for (;;) {
|
|
21
|
+
if (fsSync.existsSync(path.join(dir, "openclaw.plugin.json"))) {
|
|
22
|
+
return dir;
|
|
23
|
+
}
|
|
24
|
+
const parent = path.dirname(dir);
|
|
25
|
+
if (parent === dir)
|
|
26
|
+
break; // filesystem root
|
|
27
|
+
dir = parent;
|
|
28
|
+
}
|
|
29
|
+
return startDir;
|
|
30
|
+
}
|
|
31
|
+
export function createSkillInjectionHook(entryFileDir) {
|
|
32
|
+
const pluginRoot = findPluginRoot(entryFileDir);
|
|
33
|
+
const skillsDir = path.join(pluginRoot, "skills");
|
|
34
|
+
return async (event, _ctx) => {
|
|
35
|
+
if (event.toolName !== "sessions_spawn")
|
|
36
|
+
return;
|
|
37
|
+
const task = event.params?.task;
|
|
38
|
+
if (typeof task !== "string")
|
|
39
|
+
return;
|
|
40
|
+
// Extract /skill-name from the first line
|
|
41
|
+
const match = task.match(/^\/([a-z][\w-]*)/);
|
|
42
|
+
if (!match)
|
|
43
|
+
return;
|
|
44
|
+
const skillName = match[1];
|
|
45
|
+
const skillMdPath = path.join(skillsDir, skillName, "SKILL.md");
|
|
46
|
+
let content;
|
|
47
|
+
try {
|
|
48
|
+
content = await fs.readFile(skillMdPath, "utf-8");
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
// No SKILL.md for this skill — not one of ours, skip
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
// Strip YAML frontmatter (---\n...\n---)
|
|
55
|
+
const body = content.replace(/^---[\s\S]*?---\s*/, "").trim();
|
|
56
|
+
if (!body)
|
|
57
|
+
return;
|
|
58
|
+
// Inject after the first line (/skill-name), before the rest of the context.
|
|
59
|
+
const firstNewline = task.indexOf("\n");
|
|
60
|
+
const newTask = firstNewline === -1
|
|
61
|
+
? task + "\n\n" + body
|
|
62
|
+
: task.slice(0, firstNewline) +
|
|
63
|
+
"\n\n" +
|
|
64
|
+
body +
|
|
65
|
+
"\n\n" +
|
|
66
|
+
task.slice(firstNewline + 1);
|
|
67
|
+
return { params: { ...event.params, task: newTask } };
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=inject-skill.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inject-skill.js","sourceRoot":"","sources":["../../../src/hooks/inject-skill.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B;;;;;;;;GAQG;AAEH;;;;GAIG;AACH,SAAS,cAAc,CAAC,QAAgB;IACtC,IAAI,GAAG,GAAG,QAAQ,CAAC;IACnB,SAAS,CAAC;QACR,IAAI,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,sBAAsB,CAAC,CAAC,EAAE,CAAC;YAC9D,OAAO,GAAG,CAAC;QACb,CAAC;QACD,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,MAAM,KAAK,GAAG;YAAE,MAAM,CAAC,kBAAkB;QAC7C,GAAG,GAAG,MAAM,CAAC;IACf,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,YAAoB;IAC3D,MAAM,UAAU,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IAElD,OAAO,KAAK,EACV,KAA4D,EAC5D,IAAiE,EACZ,EAAE;QACvD,IAAI,KAAK,CAAC,QAAQ,KAAK,gBAAgB;YAAE,OAAO;QAEhD,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC;QAChC,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,OAAO;QAErC,0CAA0C;QAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAC7C,IAAI,CAAC,KAAK;YAAE,OAAO;QACnB,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAE3B,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAChE,IAAI,OAAe,CAAC;QACpB,IAAI,CAAC;YACH,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QACpD,CAAC;QAAC,MAAM,CAAC;YACP,qDAAqD;YACrD,OAAO;QACT,CAAC;QAED,yCAAyC;QACzC,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAC9D,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,6EAA6E;QAC7E,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GACX,YAAY,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,IAAI,GAAG,MAAM,GAAG,IAAI;YACtB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC;gBAC3B,MAAM;gBACN,IAAI;gBACJ,MAAM;gBACN,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;QAEnC,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IACxD,CAAC,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scientify",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "Scientify - AI-powered research workflow automation for OpenClaw. Includes idea generation, literature review, research pipeline skills, and arxiv tool.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -23,11 +23,11 @@
|
|
|
23
23
|
"license": "MIT",
|
|
24
24
|
"repository": {
|
|
25
25
|
"type": "git",
|
|
26
|
-
"url": "https://github.com/tsingyuai/
|
|
26
|
+
"url": "https://github.com/tsingyuai/scientify.git"
|
|
27
27
|
},
|
|
28
|
-
"homepage": "https://github.com/tsingyuai/
|
|
28
|
+
"homepage": "https://github.com/tsingyuai/scientify#readme",
|
|
29
29
|
"bugs": {
|
|
30
|
-
"url": "https://github.com/tsingyuai/
|
|
30
|
+
"url": "https://github.com/tsingyuai/scientify/issues"
|
|
31
31
|
},
|
|
32
32
|
"scripts": {
|
|
33
33
|
"build": "tsc",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: idea-generation
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use this when the user wants research ideas, innovation points, or to find research gaps (找研究方向, 生成创新点). Generates 5 ideas from collected papers, identifies gaps, proposes novel methods with citations. Requires papers in workspace."
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: install-scientify
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use this when the user wants to install or set up the Scientify research plugin. Adds research-pipeline, literature-survey, idea-generation, arxiv tools, and workspace management."
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: literature-survey
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use this when the user wants to find, download, or collect academic papers on a topic. Searches arXiv, filters by relevance, downloads PDFs and sources, clusters by research direction."
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -21,11 +21,15 @@ metadata:
|
|
|
21
21
|
├── survey/
|
|
22
22
|
│ ├── search_terms.json # 检索词列表
|
|
23
23
|
│ └── report.md # 最终报告
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
├── papers/
|
|
25
|
+
│ ├── _downloads/ # 原始下载
|
|
26
|
+
│ ├── _meta/ # 每篇论文的元数据
|
|
27
|
+
│ │ └── {arxiv_id}.json
|
|
28
|
+
│ └── {direction}/ # 整理后的分类
|
|
29
|
+
├── repos/ # 参考代码仓库(Phase 3)
|
|
30
|
+
│ ├── {repo_name_1}/
|
|
31
|
+
│ └── {repo_name_2}/
|
|
32
|
+
└── prepare_res.md # 仓库选择报告(Phase 3)
|
|
29
33
|
```
|
|
30
34
|
|
|
31
35
|
---
|
|
@@ -95,11 +99,71 @@ arxiv_download({
|
|
|
95
99
|
|
|
96
100
|
---
|
|
97
101
|
|
|
98
|
-
### Phase 3:
|
|
102
|
+
### Phase 3: GitHub 代码搜索与参考仓库选择
|
|
99
103
|
|
|
100
|
-
|
|
104
|
+
**目标**:为下游 skill(research-survey、research-plan、research-implement)提供可参考的开源实现。
|
|
101
105
|
|
|
102
|
-
#### 3.1
|
|
106
|
+
#### 3.1 选择高分论文
|
|
107
|
+
|
|
108
|
+
读取 `papers/_meta/` 下得分 ≥4 的论文,选出 **Top 5** 最相关论文。
|
|
109
|
+
|
|
110
|
+
#### 3.2 搜索参考仓库
|
|
111
|
+
|
|
112
|
+
对每篇选中论文,用以下关键词组合搜索 GitHub 仓库:
|
|
113
|
+
- 论文标题 + "code" / "implementation"
|
|
114
|
+
- 核心方法名 + 作者名
|
|
115
|
+
- 论文中提到的数据集名 + 任务名
|
|
116
|
+
|
|
117
|
+
使用 `web_search` 或 `exec` 调用 GitHub API:
|
|
118
|
+
```bash
|
|
119
|
+
# 示例:
|
|
120
|
+
web_search({ query: "{paper_title} implementation github" })
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
#### 3.3 筛选与 clone
|
|
124
|
+
|
|
125
|
+
对搜索到的仓库,评估:
|
|
126
|
+
- Star 数(建议 >100)
|
|
127
|
+
- 代码质量(有 README、有 requirements.txt、代码结构清晰)
|
|
128
|
+
- 与论文的匹配度(README 中引用了论文 / 实现了论文中的方法)
|
|
129
|
+
|
|
130
|
+
选择 **3-5 个**最相关的仓库,clone 到 `repos/`:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
mkdir -p "$PROJECT_DIR/repos"
|
|
134
|
+
cd "$PROJECT_DIR/repos"
|
|
135
|
+
git clone --depth 1 <repo_url>
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
#### 3.4 写入选择报告
|
|
139
|
+
|
|
140
|
+
创建 `$PROJECT_DIR/prepare_res.md`:
|
|
141
|
+
|
|
142
|
+
```markdown
|
|
143
|
+
# 参考仓库选择
|
|
144
|
+
|
|
145
|
+
| 仓库 | 对应论文 | Stars | 选择理由 |
|
|
146
|
+
|------|----------|-------|----------|
|
|
147
|
+
| repos/{repo_name} | {paper_title} (arxiv:{id}) | {N} | {理由} |
|
|
148
|
+
|
|
149
|
+
## 各仓库关键文件
|
|
150
|
+
|
|
151
|
+
### {repo_name}
|
|
152
|
+
- **模型实现**: `model/` 或 `models/`
|
|
153
|
+
- **训练脚本**: `train.py` 或 `main.py`
|
|
154
|
+
- **数据加载**: `data/` 或 `dataset.py`
|
|
155
|
+
- **核心文件**: `{关键文件路径}` — {描述}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**如果搜不到相关仓库**,在 `prepare_res.md` 中注明"无可用参考仓库",后续 skill 将不依赖代码映射。
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
### Phase 4: 分类整理
|
|
163
|
+
|
|
164
|
+
所有检索词和代码搜索完毕后:
|
|
165
|
+
|
|
166
|
+
#### 4.1 读取所有元数据
|
|
103
167
|
|
|
104
168
|
```bash
|
|
105
169
|
ls $PROJECT_DIR/papers/_meta/
|
|
@@ -107,11 +171,11 @@ ls $PROJECT_DIR/papers/_meta/
|
|
|
107
171
|
|
|
108
172
|
读取所有 `.json` 文件,汇总论文列表。
|
|
109
173
|
|
|
110
|
-
####
|
|
174
|
+
#### 4.2 聚类分析
|
|
111
175
|
|
|
112
176
|
根据论文的标题、摘要、来源检索词,识别 3-6 个研究方向。
|
|
113
177
|
|
|
114
|
-
####
|
|
178
|
+
#### 4.3 创建文件夹并移动
|
|
115
179
|
|
|
116
180
|
```bash
|
|
117
181
|
mkdir -p "$PROJECT_DIR/papers/data-driven"
|
|
@@ -120,12 +184,13 @@ mv "$PROJECT_DIR/papers/_downloads/2401.12345" "$PROJECT_DIR/papers/data-driven/
|
|
|
120
184
|
|
|
121
185
|
---
|
|
122
186
|
|
|
123
|
-
### Phase
|
|
187
|
+
### Phase 5: 生成报告
|
|
124
188
|
|
|
125
189
|
创建 `survey/report.md`:
|
|
126
190
|
- 调研概要(检索词数、论文数、方向数)
|
|
127
191
|
- 各研究方向概述
|
|
128
192
|
- Top 10 论文
|
|
193
|
+
- **参考仓库摘要**(引用 prepare_res.md)
|
|
129
194
|
- 建议阅读顺序
|
|
130
195
|
|
|
131
196
|
---
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: research-experiment
|
|
3
|
-
description: "
|
|
3
|
+
description: "[Read when prompt contains /research-experiment]"
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -31,7 +31,8 @@ metadata:
|
|
|
31
31
|
|
|
32
32
|
| File | Content |
|
|
33
33
|
|------|---------|
|
|
34
|
-
| `$W/experiment_res.md` |
|
|
34
|
+
| `$W/experiment_res.md` | 完整实验报告(含 full training + 消融 + 补充实验) |
|
|
35
|
+
| `$W/experiment_analysis/analysis_{N}.md` | 每轮实验分析报告(迭代过程中产生) |
|
|
35
36
|
|
|
36
37
|
---
|
|
37
38
|
|
|
@@ -69,9 +70,55 @@ python run.py # full epochs
|
|
|
69
70
|
python run.py --epochs 2 --ablation no_attention
|
|
70
71
|
```
|
|
71
72
|
|
|
72
|
-
### Step 4:
|
|
73
|
+
### Step 4: 实验分析→补充实验迭代(2 轮)
|
|
73
74
|
|
|
74
|
-
|
|
75
|
+
**⚠️ 这是 Novix Exp Analyzer 机制 — 对已有结果进行分析,提出补充实验,执行后再分析。**
|
|
76
|
+
|
|
77
|
+
循环 **2 次**:
|
|
78
|
+
|
|
79
|
+
#### 4.1 分析当前结果
|
|
80
|
+
|
|
81
|
+
读取当前所有实验结果(full training + 消融),写入分析报告 `$W/experiment_analysis/analysis_{N}.md`:
|
|
82
|
+
|
|
83
|
+
```markdown
|
|
84
|
+
# Experiment Analysis Round {N}
|
|
85
|
+
|
|
86
|
+
## 当前结果摘要
|
|
87
|
+
- Full training: {metrics}
|
|
88
|
+
- 消融实验: {key findings}
|
|
89
|
+
|
|
90
|
+
## 发现的问题或机会
|
|
91
|
+
1. {observation} → 建议: {experiment}
|
|
92
|
+
2. ...
|
|
93
|
+
|
|
94
|
+
## 补充实验计划
|
|
95
|
+
| 实验名称 | 目的 | 修改内容 | 预期结果 |
|
|
96
|
+
|----------|------|----------|----------|
|
|
97
|
+
| {exp_name} | {why} | {what to change} | {expected} |
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
补充实验类型参考(**Novix Exp Analyzer** 的典型输出):
|
|
101
|
+
- **敏感性分析**:关键超参数(lr、hidden_dim、dropout)的影响
|
|
102
|
+
- **可视化**:attention map、embedding 可视化、训练曲线对比图
|
|
103
|
+
- **对比实验**:与 baseline 方法的性能对比
|
|
104
|
+
- **鲁棒性测试**:不同数据规模/噪声水平下的表现
|
|
105
|
+
|
|
106
|
+
#### 4.2 执行补充实验
|
|
107
|
+
|
|
108
|
+
根据分析报告中的计划,修改代码并执行补充实验。**只改实验相关参数/配置,不改核心算法逻辑。**
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
cd $W/project && source .venv/bin/activate
|
|
112
|
+
python run.py --experiment {exp_name}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
记录结果后,回到 4.1 进行下一轮分析(共 2 轮)。
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
### Step 5: 写入最终实验报告
|
|
120
|
+
|
|
121
|
+
汇总所有实验结果(full training + 消融 + 2 轮补充实验),写入 `$W/experiment_res.md`:
|
|
75
122
|
|
|
76
123
|
```markdown
|
|
77
124
|
# Experiment Report
|
|
@@ -97,8 +144,25 @@ python run.py --epochs 2 --ablation no_attention
|
|
|
97
144
|
| No {component} | 去掉 {X} | {value} | {-/+}% |
|
|
98
145
|
| ... | ... | ... | ... |
|
|
99
146
|
|
|
147
|
+
## Supplementary Experiments
|
|
148
|
+
|
|
149
|
+
### Sensitivity Analysis
|
|
150
|
+
| 超参数 | 值 | val_metric | 备注 |
|
|
151
|
+
|--------|-----|-----------|------|
|
|
152
|
+
| ... | ... | ... | ... |
|
|
153
|
+
|
|
154
|
+
### Comparison with Baselines
|
|
155
|
+
| 方法 | val_metric | 备注 |
|
|
156
|
+
|------|-----------|------|
|
|
157
|
+
| Ours | {value} | — |
|
|
158
|
+
| {Baseline} | {value} | ... |
|
|
159
|
+
|
|
160
|
+
### Visualizations
|
|
161
|
+
- 训练曲线: `$W/project/figures/training_curve.png`
|
|
162
|
+
- {其他可视化}: `$W/project/figures/{name}.png`
|
|
163
|
+
|
|
100
164
|
## Conclusions
|
|
101
|
-
- {key findings}
|
|
165
|
+
- {key findings from all experiments}
|
|
102
166
|
|
|
103
167
|
## Limitations
|
|
104
168
|
- {limitations and future work}
|
|
@@ -112,3 +176,5 @@ python run.py --epochs 2 --ablation no_attention
|
|
|
112
176
|
2. 所有数值必须来自真实执行输出
|
|
113
177
|
3. 消融实验至少做 2 个
|
|
114
178
|
4. 如果 full training 失败(OOM 等),调整 batch_size 后重试,不要跳过
|
|
179
|
+
5. **补充实验迭代必须做 2 轮(Novix Exp Analyzer 机制)** — 第 1 轮针对初始结果,第 2 轮针对补充实验结果
|
|
180
|
+
6. 补充实验不改核心算法,只改实验配置/参数/可视化代码
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: research-pipeline
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use this skill when the user wants to research a topic, analyze papers, build ML models, or run experiments. Orchestrates the full pipeline: paper search → analysis → planning → implementation → review → experiments."
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -15,14 +15,86 @@ metadata:
|
|
|
15
15
|
|
|
16
16
|
**Don't ask permission. Just do it.**
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
1. 检查 workspace 文件状态
|
|
20
|
-
2. 为下一步构造任务描述
|
|
21
|
-
3. 用 `sessions_spawn` 派发给子 agent
|
|
22
|
-
4. 等待完成后验证产出
|
|
23
|
-
5. 重复直到流程结束
|
|
18
|
+
## Critical Identity Rule
|
|
24
19
|
|
|
25
|
-
|
|
20
|
+
**你是编排器(Orchestrator),不是研究员。**
|
|
21
|
+
|
|
22
|
+
- 你**不**分析论文
|
|
23
|
+
- 你**不**写代码
|
|
24
|
+
- 你**不**设计模型
|
|
25
|
+
- 你**不**生成研究内容
|
|
26
|
+
|
|
27
|
+
你**只做**以下事情:
|
|
28
|
+
1. 检查文件是否存在
|
|
29
|
+
2. 读取产出文件的摘要
|
|
30
|
+
3. 调用 `sessions_spawn` 工具把任务派发给子 agent
|
|
31
|
+
4. 验证子 agent 的产出
|
|
32
|
+
|
|
33
|
+
如果你发现自己在写任何研究内容,**立刻停下**,改用 `sessions_spawn` 派发。
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## ⛔ 严格顺序执行规则
|
|
38
|
+
|
|
39
|
+
**这是最重要的规则,违反此规则会导致整个流程失败。**
|
|
40
|
+
|
|
41
|
+
### 禁止并行派发
|
|
42
|
+
|
|
43
|
+
- **每次响应中只能调用一次 `sessions_spawn`**
|
|
44
|
+
- **绝对禁止**在同一次响应中调用多个 `sessions_spawn`
|
|
45
|
+
- 如果你想同时启动 Phase 2 和 Phase 3 —— **不行,停下来**
|
|
46
|
+
- 必须等前一个子 agent 完成、产出文件通过验证后,才能启动下一个
|
|
47
|
+
|
|
48
|
+
### 单步调度流程
|
|
49
|
+
|
|
50
|
+
每次你只能做以下其中之一:
|
|
51
|
+
1. **检查 + 派发**:检查当前阶段的产出文件 → 如果缺失 → 调用**一次** `sessions_spawn` → **立刻停止,等待子 agent 完成**
|
|
52
|
+
2. **验证 + 推进**:收到子 agent 完成通知后 → 验证产出文件 → 如果通过 → 检查下一阶段 → 派发或报告完成
|
|
53
|
+
|
|
54
|
+
### 派发后的行为
|
|
55
|
+
|
|
56
|
+
调用 `sessions_spawn` 后,你必须:
|
|
57
|
+
1. 告诉用户当前进度(例:"Phase 2 Deep Survey 已启动,等待子 agent 完成...")
|
|
58
|
+
2. **停止响应** —— 不要继续检查后续阶段,不要再调用任何 `sessions_spawn`
|
|
59
|
+
3. 等待系统发送子 agent 完成通知
|
|
60
|
+
|
|
61
|
+
### 收到子 agent 完成通知后
|
|
62
|
+
|
|
63
|
+
当你收到类似 "A background task ... just completed" 的消息时:
|
|
64
|
+
1. **不要仅仅总结给用户** —— 你是编排器,你需要继续推进流程
|
|
65
|
+
2. 验证该阶段的产出文件(用 `exec` 或 `read` 检查文件是否存在、内容是否正确)
|
|
66
|
+
3. 如果验证通过:简要告知用户,然后检查下一阶段、准备下一次派发
|
|
67
|
+
4. 如果验证失败:报告问题,决定是重试还是报告用户
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## sessions_spawn 工具
|
|
72
|
+
|
|
73
|
+
`sessions_spawn` 是一个 **tool call**(不是代码块,不是伪代码)。直接作为工具调用。
|
|
74
|
+
|
|
75
|
+
**参数:**
|
|
76
|
+
|
|
77
|
+
| 参数 | 类型 | 必填 | 说明 |
|
|
78
|
+
|------|------|------|------|
|
|
79
|
+
| `task` | string | 是 | 子 agent 的完整任务描述 |
|
|
80
|
+
| `label` | string | 否 | 显示标签(如 "Deep Survey") |
|
|
81
|
+
| `model` | string | 否 | 模型覆盖(如 `tsingyu/gemini-3-flash-preview`) |
|
|
82
|
+
| `runTimeoutSeconds` | number | 否 | 超时秒数 |
|
|
83
|
+
|
|
84
|
+
**task 字段格式**(子 agent 是独立 session,看不到当前上下文):
|
|
85
|
+
|
|
86
|
+
task 必须以 `/skill-name` 开头(触发 slash command 解析),后续行提供上下文:
|
|
87
|
+
|
|
88
|
+
1. **第一行**:`/research-survey`(slash command,必须在最前面)
|
|
89
|
+
2. **工作目录的绝对路径**(如 `工作目录: /Users/xxx/.openclaw/workspace/projects/battery-soh`)
|
|
90
|
+
3. **上下文摘要**:从上一步产出文件中提取的 2-5 行关键信息
|
|
91
|
+
4. **预期产出**:明确说明要写哪个文件
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Workspace
|
|
96
|
+
|
|
97
|
+
See `../_shared/workspace-spec.md`. Set `$W` to the active project directory.
|
|
26
98
|
|
|
27
99
|
---
|
|
28
100
|
|
|
@@ -43,20 +115,15 @@ ACTIVE=$(cat ~/.openclaw/workspace/projects/.active 2>/dev/null)
|
|
|
43
115
|
|
|
44
116
|
## 调度循环
|
|
45
117
|
|
|
46
|
-
|
|
118
|
+
按顺序检查每个阶段。**每次只执行一个阶段,每次响应只派发一个任务。**
|
|
47
119
|
|
|
48
120
|
### Phase 1: Literature Survey
|
|
49
121
|
|
|
50
122
|
**检查:** `$W/papers/_meta/` 目录存在且有 `.json` 文件?
|
|
51
123
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
sessions_spawn({
|
|
56
|
-
task: "工作目录: $W\n执行 /literature-survey 技能\n\n研究主题: {从 task.json 提取}\n请搜索、筛选、下载相关论文到 $W/papers/",
|
|
57
|
-
label: "Literature Survey"
|
|
58
|
-
})
|
|
59
|
-
```
|
|
124
|
+
**如果缺失,调用 sessions_spawn 工具(然后停止,等待完成通知):**
|
|
125
|
+
- task: `"/literature-survey\n工作目录: {$W绝对路径}\n研究主题: {从task.json提取}\n请搜索、筛选、下载论文到工作目录的 papers/ 下。"`
|
|
126
|
+
- label: `"Literature Survey"`
|
|
60
127
|
|
|
61
128
|
**验证:** `ls $W/papers/_meta/*.json` 至少有 3 个文件
|
|
62
129
|
|
|
@@ -66,14 +133,9 @@ sessions_spawn({
|
|
|
66
133
|
|
|
67
134
|
**检查:** `$W/survey_res.md` 存在?
|
|
68
135
|
|
|
69
|
-
**如果缺失,先读取 Phase 1
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
sessions_spawn({
|
|
73
|
-
task: "工作目录: $W\n执行 /research-survey 技能\n\n上下文: 已下载 {N} 篇论文,方向包括 {directions}\n请深度分析论文,提取公式,写入 survey_res.md",
|
|
74
|
-
label: "Deep Survey"
|
|
75
|
-
})
|
|
76
|
-
```
|
|
136
|
+
**如果缺失,先读取 Phase 1 摘要(论文数量、方向),然后调用 sessions_spawn 工具(然后停止,等待完成通知):**
|
|
137
|
+
- task: `"/research-survey\n工作目录: {$W绝对路径}\n上下文: 已下载 {N} 篇论文,方向包括 {directions}。\n重点论文: {top 3 arxiv_id 和标题}\n请深度分析论文、提取公式,写入 survey_res.md。"`
|
|
138
|
+
- label: `"Deep Survey"`
|
|
77
139
|
|
|
78
140
|
**验证:** `$W/survey_res.md` 存在且包含"核心方法对比"表格
|
|
79
141
|
|
|
@@ -83,14 +145,9 @@ sessions_spawn({
|
|
|
83
145
|
|
|
84
146
|
**检查:** `$W/plan_res.md` 存在?
|
|
85
147
|
|
|
86
|
-
**如果缺失,读取 survey_res.md
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
sessions_spawn({
|
|
90
|
-
task: "工作目录: $W\n执行 /research-plan 技能\n\n上下文: 调研发现核心方法是 {method},推荐技术路线 {route}\n请制定完整实现计划到 plan_res.md",
|
|
91
|
-
label: "Research Plan"
|
|
92
|
-
})
|
|
93
|
-
```
|
|
148
|
+
**如果缺失,读取 survey_res.md 摘要,然后调用 sessions_spawn 工具(然后停止,等待完成通知):**
|
|
149
|
+
- task: `"/research-plan\n工作目录: {$W绝对路径}\n上下文: 调研发现核心方法是 {method},推荐技术路线 {route}。\n关键公式: {1-2个公式}\n请制定实现计划到 plan_res.md。"`
|
|
150
|
+
- label: `"Research Plan"`
|
|
94
151
|
|
|
95
152
|
**验证:** `$W/plan_res.md` 存在且包含 4 个 section(Dataset/Model/Training/Testing)
|
|
96
153
|
|
|
@@ -100,14 +157,9 @@ sessions_spawn({
|
|
|
100
157
|
|
|
101
158
|
**检查:** `$W/ml_res.md` 存在?
|
|
102
159
|
|
|
103
|
-
**如果缺失,读取 plan_res.md
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
sessions_spawn({
|
|
107
|
-
task: "工作目录: $W\n执行 /research-implement 技能\n\n上下文:\n- 计划包含 {N} 个组件: {list}\n- 数据集: {dataset}\n- 框架: PyTorch\n请实现代码到 $W/project/,运行 2 epoch 验证,写入 ml_res.md",
|
|
108
|
-
label: "Research Implement"
|
|
109
|
-
})
|
|
110
|
-
```
|
|
160
|
+
**如果缺失,读取 plan_res.md 要点,然后调用 sessions_spawn 工具(然后停止,等待完成通知):**
|
|
161
|
+
- task: `"/research-implement\n工作目录: {$W绝对路径}\n上下文:\n- 计划包含 {N} 个组件: {list}\n- 数据集: {dataset}\n- 框架: PyTorch\n请实现代码到 project/,运行 2 epoch 验证,写入 ml_res.md。"`
|
|
162
|
+
- label: `"Research Implement"`
|
|
111
163
|
|
|
112
164
|
**验证:**
|
|
113
165
|
- `$W/project/run.py` 存在
|
|
@@ -120,14 +172,9 @@ sessions_spawn({
|
|
|
120
172
|
|
|
121
173
|
**检查:** `$W/iterations/` 下最新 `judge_v*.md` 的 verdict 是否为 PASS?
|
|
122
174
|
|
|
123
|
-
**如果没有 PASS
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
sessions_spawn({
|
|
127
|
-
task: "工作目录: $W\n执行 /research-review 技能\n\n上下文:\n- 实现报告: ml_res.md 显示 train_loss={value}\n- 计划在 plan_res.md\n请审查代码,如需修改则迭代修复(最多 3 轮)",
|
|
128
|
-
label: "Research Review"
|
|
129
|
-
})
|
|
130
|
-
```
|
|
175
|
+
**如果没有 PASS,调用 sessions_spawn 工具(然后停止,等待完成通知):**
|
|
176
|
+
- task: `"/research-review\n工作目录: {$W绝对路径}\n上下文:\n- ml_res.md 显示 train_loss={value}\n- 计划在 plan_res.md\n请审查代码,如需修改则迭代修复(最多 3 轮)。"`
|
|
177
|
+
- label: `"Research Review"`
|
|
131
178
|
|
|
132
179
|
**验证:** 最新 `judge_v*.md` 中 `verdict: PASS` 或 `verdict: BLOCKED`
|
|
133
180
|
|
|
@@ -139,14 +186,9 @@ sessions_spawn({
|
|
|
139
186
|
|
|
140
187
|
**检查:** `$W/experiment_res.md` 存在?
|
|
141
188
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
sessions_spawn({
|
|
146
|
-
task: "工作目录: $W\n执行 /research-experiment 技能\n\n上下文:\n- Review PASS,代码已验证\n- plan_res.md 中指定 full epochs\n请执行完整训练 + 消融实验,写入 experiment_res.md",
|
|
147
|
-
label: "Research Experiment"
|
|
148
|
-
})
|
|
149
|
-
```
|
|
189
|
+
**如果缺失,调用 sessions_spawn 工具(然后停止,等待完成通知):**
|
|
190
|
+
- task: `"/research-experiment\n工作目录: {$W绝对路径}\n上下文:\n- Review PASS,代码已验证\n- plan_res.md 中指定 full epochs\n请执行完整训练 + 消融实验,写入 experiment_res.md。"`
|
|
191
|
+
- label: `"Research Experiment"`
|
|
150
192
|
|
|
151
193
|
**验证:** `$W/experiment_res.md` 包含 `[RESULT]` 行和消融表格
|
|
152
194
|
|
|
@@ -168,10 +210,10 @@ sessions_spawn({
|
|
|
168
210
|
|
|
169
211
|
## 上下文桥接规则
|
|
170
212
|
|
|
171
|
-
|
|
213
|
+
每次调用 sessions_spawn 前,编排器必须:
|
|
172
214
|
1. **读取**上一步的产出文件
|
|
173
215
|
2. **摘要** 2-5 行关键信息(不要复制全文)
|
|
174
|
-
3. **写入**
|
|
216
|
+
3. **写入** sessions_spawn task 参数的上下文部分
|
|
175
217
|
|
|
176
218
|
这确保子 agent 拿到足够信息启动,同时不会被前序步骤的完整输出污染。
|
|
177
219
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: research-plan
|
|
3
|
-
description: "
|
|
3
|
+
description: "[Read when prompt contains /research-plan]"
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -23,7 +23,8 @@ metadata:
|
|
|
23
23
|
| `$W/task.json` | /research-pipeline or user |
|
|
24
24
|
| `$W/survey_res.md` | /research-survey |
|
|
25
25
|
| `$W/notes/paper_*.md` | /research-survey |
|
|
26
|
-
| `$W/repos/`
|
|
26
|
+
| `$W/repos/` | /literature-survey Phase 3 |
|
|
27
|
+
| `$W/prepare_res.md` | /literature-survey Phase 3 |
|
|
27
28
|
|
|
28
29
|
**If `survey_res.md` is missing, STOP:** "需要先运行 /research-survey 完成深度分析"
|
|
29
30
|
|
|
@@ -41,10 +42,22 @@ metadata:
|
|
|
41
42
|
|
|
42
43
|
读取以下文件,理解研究目标和技术方案:
|
|
43
44
|
- `$W/task.json` — 研究目标
|
|
44
|
-
- `$W/survey_res.md` —
|
|
45
|
-
-
|
|
45
|
+
- `$W/survey_res.md` — 技术路线建议、核心公式、**公式→代码映射表**、参考代码架构摘要
|
|
46
|
+
- `$W/prepare_res.md` — 参考仓库列表及关键文件说明
|
|
46
47
|
|
|
47
|
-
### Step 2:
|
|
48
|
+
### Step 2: 参考代码深度分析
|
|
49
|
+
|
|
50
|
+
**⚠️ 强制性步骤(Novix Plan Agent 机制)** — 读参考仓库的实现细节,确保 plan 有具体可行的依据。
|
|
51
|
+
|
|
52
|
+
对 `prepare_res.md` 中的重点仓库:
|
|
53
|
+
1. 读取目录结构和 README
|
|
54
|
+
2. 读取核心模型代码,理解架构实现方式
|
|
55
|
+
3. 读取训练脚本,理解超参数选择和训练技巧
|
|
56
|
+
4. 读取数据加载代码,理解预处理流程
|
|
57
|
+
|
|
58
|
+
为每个组件记录:**参考文件路径 + 关键实现细节**。这些信息将直接填入 plan 的"参考代码"列。
|
|
59
|
+
|
|
60
|
+
### Step 3: 制定四部分计划
|
|
48
61
|
|
|
49
62
|
写入 `$W/plan_res.md`:
|
|
50
63
|
|
|
@@ -100,11 +113,12 @@ metadata:
|
|
|
100
113
|
2. {ablation 2}
|
|
101
114
|
```
|
|
102
115
|
|
|
103
|
-
### Step
|
|
116
|
+
### Step 4: 自检
|
|
104
117
|
|
|
105
118
|
验证计划的完整性:
|
|
106
119
|
- [ ] 每个模型组件都有对应公式
|
|
107
|
-
- [ ]
|
|
120
|
+
- [ ] **每个组件的"参考代码"列已填写**(当 repos/ 存在时)
|
|
121
|
+
- [ ] 数据集有具体获取方式(URL 或下载命令)
|
|
108
122
|
- [ ] Loss 函数有数学定义
|
|
109
123
|
- [ ] 评估指标有明确定义
|
|
110
124
|
- [ ] 训练参数合理(不要 lr=0.1 for Adam)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: research-review
|
|
3
|
-
description: "
|
|
3
|
+
description: "[Read when prompt contains /research-review]"
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -48,19 +48,55 @@ metadata:
|
|
|
48
48
|
- `$W/project/` — 实际代码
|
|
49
49
|
- `$W/ml_res.md` — 执行结果
|
|
50
50
|
|
|
51
|
-
### Step 2:
|
|
51
|
+
### Step 2: 提取原子性概念清单
|
|
52
|
+
|
|
53
|
+
**⚠️ 这是 Novix Judge Agent 的核心机制 — 逐一核对每个原子性学术概念。**
|
|
54
|
+
|
|
55
|
+
从 `$W/survey_res.md` 的"关键公式汇总"和"核心方法对比"中,提取所有需要在代码中实现的**原子性学术概念**(每个公式、每个核心组件都是一个概念)。
|
|
56
|
+
|
|
57
|
+
为每个概念记录:
|
|
58
|
+
- 概念名称(如 "Multi-Head Attention", "Contrastive Loss", "Batch Normalization")
|
|
59
|
+
- 对应公式(LaTeX 格式)
|
|
60
|
+
- 预期代码位置(根据 plan_res.md 推断)
|
|
61
|
+
|
|
62
|
+
示例清单:
|
|
63
|
+
```
|
|
64
|
+
原子性概念清单(从 survey_res.md 提取):
|
|
65
|
+
1. Multi-Head Attention — $Attention(Q,K,V) = softmax(\frac{QK^T}{\sqrt{d_k}})V$ — 预期在 model/attention.py
|
|
66
|
+
2. Layer Normalization — $LN(x) = \gamma \frac{x - \mu}{\sigma} + \beta$ — 预期在 model/layers.py
|
|
67
|
+
3. Residual Connection — $y = F(x) + x$ — 预期贯穿所有模型组件
|
|
68
|
+
...
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Step 3: 逐项检查
|
|
72
|
+
|
|
73
|
+
#### A. 数据集真实性审查
|
|
74
|
+
|
|
75
|
+
| 检查项 | 方法 |
|
|
76
|
+
|--------|------|
|
|
77
|
+
| 数据集是否真实拉取 | 检查 `data/` 目录下是否有实际数据文件(非空文件),检查下载脚本/代码是否真正执行了网络请求或本地读取 |
|
|
78
|
+
| 数据加载代码正确性 | 实际执行数据加载代码,验证 shape、dtype、样本数是否与 plan 一致:`python -c "from data.dataset import *; ds = ...; print(len(ds), ds[0])"` |
|
|
79
|
+
| Mock 数据标注 | 搜索 `# MOCK DATA` 注释;如果使用 mock 数据但未声明,标记为 NEEDS_REVISION |
|
|
80
|
+
|
|
81
|
+
#### B. 算法实现审查
|
|
52
82
|
|
|
53
83
|
| 检查项 | 方法 |
|
|
54
84
|
|--------|------|
|
|
55
|
-
|
|
|
56
|
-
|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
| [RESULT] 行存在 | 检查 ml_res.md 中的数值来源 |
|
|
60
|
-
| Loss 合理 | 非 NaN/Inf,有下降趋势 |
|
|
61
|
-
| 无 mock 数据(除非已声明) | 搜索 `# MOCK DATA` 注释 |
|
|
85
|
+
| **原子性概念逐一核对** | **对照 Step 2 的概念清单,逐个检查**:该概念是否在代码中有对应实现?公式翻译是否正确?维度/参数是否一致?每个概念标注 ✓ 或 ✗ 并记录代码位置 |
|
|
86
|
+
| Loss 函数正确 | 对比 plan Training Plan vs `training/loss.py`,验证数学公式是否正确翻译为代码 |
|
|
87
|
+
| 评估指标正确 | 对比 plan Testing Plan vs `testing/`,确认指标计算逻辑无误 |
|
|
88
|
+
| 关键算法未被简化 | 检查 plan 中的核心创新点是否被完整实现,而非用简化/占位逻辑替代 |
|
|
62
89
|
|
|
63
|
-
|
|
90
|
+
#### C. 算力与执行合理性审查
|
|
91
|
+
|
|
92
|
+
| 检查项 | 方法 |
|
|
93
|
+
|--------|------|
|
|
94
|
+
| 执行用时合理 | 读取 ml_res.md 中 `[RESULT] elapsed=` 值,根据数据集规模 + 模型参数量 + 设备(CPU/GPU)判断用时是否合理。过短(如万级数据集 <1s)可能说明数据未真正加载或训练未真正执行 |
|
|
95
|
+
| [RESULT] 行存在 | 检查 ml_res.md 中的数值来源,确认非编造 |
|
|
96
|
+
| Loss 合理 | 非 NaN/Inf,有下降趋势(epoch 1 loss > epoch 2 loss) |
|
|
97
|
+
| 数据管道匹配 plan | 对比 plan Dataset Plan vs `data/` 实现,batch size、预处理步骤一致 |
|
|
98
|
+
|
|
99
|
+
### Step 4: 写入审查报告
|
|
64
100
|
|
|
65
101
|
写入 `$W/iterations/judge_v1.md`:
|
|
66
102
|
|
|
@@ -70,11 +106,29 @@ metadata:
|
|
|
70
106
|
## Verdict: PASS / NEEDS_REVISION
|
|
71
107
|
|
|
72
108
|
## Checklist
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
- [x/✗]
|
|
76
|
-
- [x/✗]
|
|
109
|
+
|
|
110
|
+
### 数据集
|
|
111
|
+
- [x/✗] Dataset actually downloaded/loaded (not empty or placeholder)
|
|
112
|
+
- [x/✗] Data loading code produces correct shape/dtype/count
|
|
113
|
+
- [x/✗] No undeclared mock data
|
|
114
|
+
|
|
115
|
+
### 算法实现 - 原子性概念核对
|
|
116
|
+
|
|
117
|
+
**逐一核对 Step 2 提取的每个学术概念:**
|
|
118
|
+
|
|
119
|
+
| 概念 | 公式 | 代码位置 | 结果 | 备注 |
|
|
120
|
+
|------|------|----------|------|------|
|
|
121
|
+
| {概念名} | $...$ | `model/xxx.py:L42` | ✓/✗ | {正确实现/公式错误/缺失/简化为占位符} |
|
|
122
|
+
| ... | ... | ... | ... | ... |
|
|
123
|
+
|
|
124
|
+
### 算法实现 - 整体检查
|
|
125
|
+
- [x/✗] Loss function correctly implements the math
|
|
126
|
+
- [x/✗] Key algorithm components fully implemented (no simplified placeholders)
|
|
77
127
|
- [x/✗] Evaluation metrics correct
|
|
128
|
+
|
|
129
|
+
### 算力与执行
|
|
130
|
+
- [x/✗] Execution time reasonable for data scale + model size + device
|
|
131
|
+
- [x/✗] Training loop proper (loss decreasing)
|
|
78
132
|
- [x/✗] Results are from real execution (not fabricated)
|
|
79
133
|
|
|
80
134
|
## Issues (if NEEDS_REVISION)
|
|
@@ -82,21 +136,27 @@ metadata:
|
|
|
82
136
|
2. ...
|
|
83
137
|
```
|
|
84
138
|
|
|
85
|
-
### Step
|
|
139
|
+
### Step 5: 迭代(如果 NEEDS_REVISION)
|
|
140
|
+
|
|
141
|
+
**⚠️ 防偏移机制:每轮迭代都重新读取原始设计文档,确保修改方向正确。**
|
|
86
142
|
|
|
87
143
|
循环最多 3 次:
|
|
88
144
|
|
|
89
145
|
1. 读取 `judge_v{N}.md` 的修改建议
|
|
90
|
-
2.
|
|
91
|
-
|
|
146
|
+
2. **防偏移检查:重新读取** `$W/survey_res.md` 和 `$W/plan_res.md`
|
|
147
|
+
- 对照原始学术设计目标
|
|
148
|
+
- 确保修改不是为了"绕过审查"而偏离学术严谨性
|
|
149
|
+
- 确认修改符合 survey 中的公式定义和 plan 中的设计意图
|
|
150
|
+
3. 修改 `$W/project/` 中的代码
|
|
151
|
+
4. 重新执行:
|
|
92
152
|
```bash
|
|
93
153
|
cd $W/project && source .venv/bin/activate && python run.py --epochs 2
|
|
94
154
|
```
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
155
|
+
5. 读取执行输出,验证修复
|
|
156
|
+
6. **重新执行 Step 2-4**(提取概念清单 → 逐项检查 → 写报告),写入 `judge_v{N+1}.md`
|
|
157
|
+
7. 如果 PASS → 停止;否则继续
|
|
98
158
|
|
|
99
|
-
### Step
|
|
159
|
+
### Step 6: 最终判定
|
|
100
160
|
|
|
101
161
|
3 轮后仍 NEEDS_REVISION → 在最后一份 judge 中列出剩余问题,标记 `verdict: BLOCKED`,等待用户介入。
|
|
102
162
|
|
|
@@ -108,3 +168,8 @@ metadata:
|
|
|
108
168
|
2. 每个 issue 必须给出具体的修复指令(不是"请改进")
|
|
109
169
|
3. 验证修复后必须重新执行代码并检查输出
|
|
110
170
|
4. PASS 的前提:所有 checklist 项通过 + [RESULT] 数值合理
|
|
171
|
+
5. **数据集必须验证真实性** —— 实际执行数据加载代码,确认有真实数据(哪怕是小规模);纯随机 tensor 不算
|
|
172
|
+
6. **执行时间必须与算力匹配** —— 2 epoch 训练时间过短(数据量 >1000 却 <2s)说明数据未加载或训练是空循环
|
|
173
|
+
7. **算法实现必须完整** —— plan 中标注的核心创新点必须逐一检查,不能被简化为 `nn.Linear` 占位
|
|
174
|
+
8. **原子性概念逐一核对(Novix Judge 机制)** —— Step 2 提取的每个概念都必须在 judge 报告的表格中有对应行,标注 ✓ 或 ✗
|
|
175
|
+
9. **防偏移(每轮迭代必须重新对齐)** —— Step 5 每轮修改前必须重新读取 survey_res.md 和 plan_res.md,确保不偏离原始设计目标
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: research-survey
|
|
3
|
-
description: "
|
|
3
|
+
description: "[Read when prompt contains /research-survey]"
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|
|
@@ -24,9 +24,12 @@ Read and verify these files exist before starting:
|
|
|
24
24
|
|------|--------|
|
|
25
25
|
| `$W/papers/_meta/*.json` | /literature-survey |
|
|
26
26
|
| `$W/papers/_downloads/` or `$W/papers/{direction}/` | /literature-survey |
|
|
27
|
-
| `$W/repos/`
|
|
27
|
+
| `$W/repos/` | /literature-survey Phase 3 |
|
|
28
|
+
| `$W/prepare_res.md` | /literature-survey Phase 3 |
|
|
28
29
|
|
|
29
|
-
**If
|
|
30
|
+
**If papers are missing, STOP:** "需要先运行 /literature-survey 完成论文下载"
|
|
31
|
+
|
|
32
|
+
**Note:** 如果 `prepare_res.md` 中注明"无可用参考仓库",代码映射步骤可跳过,但需在 survey_res.md 中标注。
|
|
30
33
|
|
|
31
34
|
## Output
|
|
32
35
|
|
|
@@ -67,11 +70,14 @@ ls $W/papers/_meta/
|
|
|
67
70
|
- **数学公式**:至少 1 个关键公式(保留 LaTeX 格式)
|
|
68
71
|
- **创新点**:与同领域其他方法的区别
|
|
69
72
|
|
|
70
|
-
#### 2.3
|
|
73
|
+
#### 2.3 映射到参考代码
|
|
74
|
+
|
|
75
|
+
**⚠️ 强制性步骤(当 repos/ 存在时)** — 代码映射是下游 plan 和 implement 的关键输入。
|
|
71
76
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
77
|
+
读取 `$W/prepare_res.md` 中的仓库列表,对每个公式/核心概念:
|
|
78
|
+
1. 在对应仓库中搜索实现代码(用 grep 关键类名/函数名)
|
|
79
|
+
2. 记录**文件路径、行号、代码片段**
|
|
80
|
+
3. 如果多个仓库有不同实现,记录差异
|
|
75
81
|
|
|
76
82
|
#### 2.4 写入笔记
|
|
77
83
|
|
|
@@ -127,7 +133,19 @@ $$
|
|
|
127
133
|
|
|
128
134
|
## 关键公式汇总
|
|
129
135
|
|
|
130
|
-
|
|
136
|
+
**每个公式附带代码映射,供下游 plan 和 implement 参考。**
|
|
137
|
+
|
|
138
|
+
| 公式名称 | LaTeX | 参考代码 |
|
|
139
|
+
|----------|-------|----------|
|
|
140
|
+
| {name} | $...$ | `repos/{repo}/path.py:L42` |
|
|
141
|
+
| ... | ... | ... |
|
|
142
|
+
|
|
143
|
+
## 参考代码架构摘要
|
|
144
|
+
|
|
145
|
+
基于 repos/ 中的参考实现,推荐的代码结构:
|
|
146
|
+
- 数据加载: 参考 `repos/{repo}/data/`
|
|
147
|
+
- 模型实现: 参考 `repos/{repo}/model/`
|
|
148
|
+
- 训练循环: 参考 `repos/{repo}/train.py`
|
|
131
149
|
```
|
|
132
150
|
|
|
133
151
|
---
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: write-review-paper
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use this when the user wants to write a literature review, survey paper, or thesis chapter from existing papers. Guides reading strategy, note-taking, synthesis, and academic writing. NOT for searching new papers (/literature-survey) or generating ideas (/idea-generation)."
|
|
4
4
|
metadata:
|
|
5
5
|
{
|
|
6
6
|
"openclaw":
|