scientify 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/src/tools/paper-browser.d.ts +24 -0
- package/dist/src/tools/paper-browser.d.ts.map +1 -0
- package/dist/src/tools/paper-browser.js +121 -0
- package/dist/src/tools/paper-browser.js.map +1 -0
- package/package.json +1 -1
- package/skills/literature-survey/SKILL.md +89 -11
- package/skills/research-experiment/SKILL.md +70 -4
- package/skills/research-plan/SKILL.md +20 -6
- package/skills/research-review/SKILL.md +48 -12
- package/skills/research-survey/SKILL.md +34 -7
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAmBlD,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAG,EAAE,iBAAiB,QAkFtD"}
|
package/dist/index.js
CHANGED
|
@@ -3,6 +3,7 @@ import { handleResearchStatus, handlePapers, handleIdeas, handleProjects, handle
|
|
|
3
3
|
import { createArxivSearchTool } from "./src/tools/arxiv-search.js";
|
|
4
4
|
import { createArxivDownloadTool } from "./src/tools/arxiv-download.js";
|
|
5
5
|
import { createGithubSearchTool } from "./src/tools/github-search-tool.js";
|
|
6
|
+
import { createPaperBrowserTool } from "./src/tools/paper-browser.js";
|
|
6
7
|
import { createAutoUpdaterService } from "./src/services/auto-updater.js";
|
|
7
8
|
import { createSkillInjectionHook } from "./src/hooks/inject-skill.js";
|
|
8
9
|
// Default: check every hour
|
|
@@ -12,6 +13,7 @@ export default function register(api) {
|
|
|
12
13
|
api.registerTool(createArxivSearchTool());
|
|
13
14
|
api.registerTool(createArxivDownloadTool());
|
|
14
15
|
api.registerTool(createGithubSearchTool());
|
|
16
|
+
api.registerTool(createPaperBrowserTool());
|
|
15
17
|
// Register auto-updater service (silent updates)
|
|
16
18
|
const pluginConfig = api.pluginConfig;
|
|
17
19
|
const autoUpdateEnabled = pluginConfig?.autoUpdate !== false; // enabled by default
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,YAAY,EACZ,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,gCAAgC,CAAC;AAC1E,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAEvE,4BAA4B;AAC5B,MAAM,wBAAwB,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAsB;IACrD,iBAAiB;IACjB,GAAG,CAAC,YAAY,CAAC,qBAAqB,EAAE,CAAC,CAAC;IAC1C,GAAG,CAAC,YAAY,CAAC,uBAAuB,EAAE,CAAC,CAAC;IAC5C,GAAG,CAAC,YAAY,CAAC,sBAAsB,EAAE,CAAC,CAAC;IAE3C,iDAAiD;IACjD,MAAM,YAAY,GAAG,GAAG,CAAC,YAAoD,CAAC;IAC9E,MAAM,iBAAiB,GAAG,YAAY,EAAE,UAAU,KAAK,KAAK,CAAC,CAAC,qBAAqB;IAEnF,IAAI,iBAAiB,EAAE,CAAC;QACtB,GAAG,CAAC,eAAe,CACjB,wBAAwB,CAAC;YACvB,WAAW,EAAE,WAAW;YACxB,eAAe,EAAE,wBAAwB;YACzC,MAAM,EAAE;gBACN,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;gBACnC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;gBACnC,KAAK,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC;aACxC;SACF,CAAC,CACH,CAAC;IACJ,CAAC;IAED,sCAAsC;IACtC,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,gEAAgE;QAC7E,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,oBAAoB;KAC9B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,0CAA0C;QACvD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,YAAY;KACtB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,WAAW;KACrB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,cAAc;KACxB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,oDAAoD;QACjE,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,IAAI,EAAE,yCAAyC;QAC5D,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,qDAAqD;IACrD,4EAA4E;IAC5E,2EAA2E;IAC3E,4EAA4E;IAC5E,yEAAyE;IACzE,GAAG,CAAC,EAAE,CAAC,kBAAkB,EAAE,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAE/E,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;AAC1D,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,YAAY,EACZ,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AACpE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAC3E,OAAO,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC;AACtE,OAAO,EAAE,wBAAwB,EAAE,MAAM,gCAAgC,CAAC;AAC1E,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAEvE,4BAA4B;AAC5B,MAAM,wBAAwB,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAEhD,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAsB;IACrD,iBAAiB;IACjB,GAAG,CAAC,YAAY,CAAC,qBAAqB,EAAE,CAAC,CAAC;IAC1C,GAAG,CAAC,YAAY,CAAC,uBAAuB,EAAE,CAAC,CAAC;IAC5C,GAAG,CAAC,YAAY,CAAC,sBAAsB,EAAE,CAAC,CAAC;IAC3C,GAAG,CAAC,YAAY,CAAC,sBAAsB,EAAE,CAAC,CAAC;IAE3C,iDAAiD;IACjD,MAAM,YAAY,GAAG,GAAG,CAAC,YAAoD,CAAC;IAC9E,MAAM,iBAAiB,GAAG,YAAY,EAAE,UAAU,KAAK,KAAK,CAAC,CAAC,qBAAqB;IAEnF,IAAI,iBAAiB,EAAE,CAAC;QACtB,GAAG,CAAC,eAAe,CACjB,wBAAwB,CAAC;YACvB,WAAW,EAAE,WAAW;YACxB,eAAe,EAAE,wBAAwB;YACzC,MAAM,EAAE;gBACN,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;gBACnC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;gBACnC,KAAK,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC;aACxC;SACF,CAAC,CACH,CAAC;IACJ,CAAC;IAED,sCAAsC;IACtC,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,gEAAgE;QAC7E,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,oBAAoB;KAC9B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,0CAA0C;QACvD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,YAAY;KACtB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,WAAW;KACrB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,cAAc;KACxB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,oDAAoD;QACjE,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,IAAI,EAAE,yCAAyC;QAC5D,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,qDAAqD;IACrD,4EAA4E;IAC5E,2EAA2E;IAC3E,4EAA4E;IAC5E,yEAAyE;IACzE,GAAG,CAAC,EAAE,CAAC,kBAAkB,EAAE,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAE/E,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;AAC1D,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export declare const PaperBrowserToolSchema: import("@sinclair/typebox").TObject<{
|
|
2
|
+
file_path: import("@sinclair/typebox").TString;
|
|
3
|
+
start_line: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
4
|
+
num_lines: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
5
|
+
}>;
|
|
6
|
+
export declare function createPaperBrowserTool(): {
|
|
7
|
+
label: string;
|
|
8
|
+
name: string;
|
|
9
|
+
description: string;
|
|
10
|
+
parameters: import("@sinclair/typebox").TObject<{
|
|
11
|
+
file_path: import("@sinclair/typebox").TString;
|
|
12
|
+
start_line: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
13
|
+
num_lines: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
14
|
+
}>;
|
|
15
|
+
execute: (_toolCallId: string, rawArgs: unknown) => Promise<{
|
|
16
|
+
type: "tool_result";
|
|
17
|
+
content: {
|
|
18
|
+
type: "text";
|
|
19
|
+
text: string;
|
|
20
|
+
}[];
|
|
21
|
+
isError?: boolean;
|
|
22
|
+
}>;
|
|
23
|
+
};
|
|
24
|
+
//# sourceMappingURL=paper-browser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paper-browser.d.ts","sourceRoot":"","sources":["../../../src/tools/paper-browser.ts"],"names":[],"mappings":"AAOA,eAAO,MAAM,sBAAsB;;;;EAiBjC,CAAC;AAqBH,wBAAgB,sBAAsB;;;;;;;;;2BAOL,MAAM,WAAW,OAAO;;;;;;;;EAuFxD"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
import { readFileSync, existsSync, statSync } from "node:fs";
|
|
3
|
+
import { Result } from "./result.js";
|
|
4
|
+
const DEFAULT_VIEWPORT_SIZE = 100;
|
|
5
|
+
const MAX_VIEWPORT_SIZE = 500;
|
|
6
|
+
export const PaperBrowserToolSchema = Type.Object({
|
|
7
|
+
file_path: Type.String({
|
|
8
|
+
description: "Path to the paper file (.tex, .md, or any text file).",
|
|
9
|
+
}),
|
|
10
|
+
start_line: Type.Optional(Type.Number({
|
|
11
|
+
description: "Starting line number (1-indexed). Default: 1.",
|
|
12
|
+
minimum: 1,
|
|
13
|
+
})),
|
|
14
|
+
num_lines: Type.Optional(Type.Number({
|
|
15
|
+
description: `Number of lines to display (default: ${DEFAULT_VIEWPORT_SIZE}, max: ${MAX_VIEWPORT_SIZE}).`,
|
|
16
|
+
minimum: 1,
|
|
17
|
+
maximum: MAX_VIEWPORT_SIZE,
|
|
18
|
+
})),
|
|
19
|
+
});
|
|
20
|
+
function readStringParam(params, key, opts) {
|
|
21
|
+
const value = params[key];
|
|
22
|
+
if (value === undefined || value === null) {
|
|
23
|
+
if (opts?.required) {
|
|
24
|
+
throw new Error(`Missing required parameter: ${key}`);
|
|
25
|
+
}
|
|
26
|
+
return undefined;
|
|
27
|
+
}
|
|
28
|
+
return String(value);
|
|
29
|
+
}
|
|
30
|
+
function readNumberParam(params, key, opts) {
|
|
31
|
+
const value = params[key];
|
|
32
|
+
if (value === undefined || value === null)
|
|
33
|
+
return undefined;
|
|
34
|
+
const num = Number(value);
|
|
35
|
+
if (isNaN(num))
|
|
36
|
+
return undefined;
|
|
37
|
+
return opts?.integer ? Math.floor(num) : num;
|
|
38
|
+
}
|
|
39
|
+
export function createPaperBrowserTool() {
|
|
40
|
+
return {
|
|
41
|
+
label: "Paper Browser",
|
|
42
|
+
name: "paper_browser",
|
|
43
|
+
description: "Read large paper files (.tex, .md) in paginated chunks. Use this to avoid loading entire multi-thousand-line files into context at once. Returns a viewport of lines with navigation information.",
|
|
44
|
+
parameters: PaperBrowserToolSchema,
|
|
45
|
+
execute: async (_toolCallId, rawArgs) => {
|
|
46
|
+
const params = rawArgs;
|
|
47
|
+
const filePath = readStringParam(params, "file_path", { required: true });
|
|
48
|
+
const startLine = Math.max(1, readNumberParam(params, "start_line", { integer: true }) ?? 1);
|
|
49
|
+
const numLines = Math.min(readNumberParam(params, "num_lines", { integer: true }) ?? DEFAULT_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE);
|
|
50
|
+
// Validate file exists
|
|
51
|
+
if (!existsSync(filePath)) {
|
|
52
|
+
return Result.err("file_not_found", `File does not exist: ${filePath}`);
|
|
53
|
+
}
|
|
54
|
+
// Check if it's a file (not directory)
|
|
55
|
+
let stats;
|
|
56
|
+
try {
|
|
57
|
+
stats = statSync(filePath);
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
return Result.err("file_error", `Cannot access file: ${error instanceof Error ? error.message : String(error)}`);
|
|
61
|
+
}
|
|
62
|
+
if (!stats.isFile()) {
|
|
63
|
+
return Result.err("not_a_file", `Path is not a file: ${filePath}`);
|
|
64
|
+
}
|
|
65
|
+
// Read file content
|
|
66
|
+
let content;
|
|
67
|
+
try {
|
|
68
|
+
content = readFileSync(filePath, "utf-8");
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
return Result.err("read_error", `Failed to read file: ${error instanceof Error ? error.message : String(error)}`);
|
|
72
|
+
}
|
|
73
|
+
// Split into lines
|
|
74
|
+
const lines = content.split("\n");
|
|
75
|
+
const totalLines = lines.length;
|
|
76
|
+
// Validate start line
|
|
77
|
+
if (startLine > totalLines) {
|
|
78
|
+
return Result.err("invalid_range", `Start line ${startLine} exceeds total lines ${totalLines}`);
|
|
79
|
+
}
|
|
80
|
+
// Extract viewport
|
|
81
|
+
const endLine = Math.min(startLine + numLines - 1, totalLines);
|
|
82
|
+
const viewportLines = lines.slice(startLine - 1, endLine);
|
|
83
|
+
// Add line numbers (matching cat -n format for consistency with Read tool)
|
|
84
|
+
const numberedLines = viewportLines
|
|
85
|
+
.map((line, idx) => {
|
|
86
|
+
const lineNum = startLine + idx;
|
|
87
|
+
return `${lineNum.toString().padStart(6, " ")}\t${line}`;
|
|
88
|
+
})
|
|
89
|
+
.join("\n");
|
|
90
|
+
// Navigation hints
|
|
91
|
+
const hasMore = endLine < totalLines;
|
|
92
|
+
const hasPrev = startLine > 1;
|
|
93
|
+
let navigationHint = "";
|
|
94
|
+
if (hasMore && hasPrev) {
|
|
95
|
+
navigationHint = `\n\nNavigate: paper_browser({ file_path: "${filePath}", start_line: ${endLine + 1} }) for next page, or start_line: ${Math.max(1, startLine - numLines)} for previous page.`;
|
|
96
|
+
}
|
|
97
|
+
else if (hasMore) {
|
|
98
|
+
navigationHint = `\n\nMore content below. Use: paper_browser({ file_path: "${filePath}", start_line: ${endLine + 1} })`;
|
|
99
|
+
}
|
|
100
|
+
else if (hasPrev) {
|
|
101
|
+
navigationHint = `\n\nEnd of file. Use: paper_browser({ file_path: "${filePath}", start_line: ${Math.max(1, startLine - numLines)} }) for previous page.`;
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
navigationHint = "\n\n[End of file]";
|
|
105
|
+
}
|
|
106
|
+
return Result.ok({
|
|
107
|
+
file_path: filePath,
|
|
108
|
+
total_lines: totalLines,
|
|
109
|
+
viewport: {
|
|
110
|
+
start_line: startLine,
|
|
111
|
+
end_line: endLine,
|
|
112
|
+
num_lines: viewportLines.length,
|
|
113
|
+
},
|
|
114
|
+
content: numberedLines + navigationHint,
|
|
115
|
+
has_more: hasMore,
|
|
116
|
+
has_prev: hasPrev,
|
|
117
|
+
});
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=paper-browser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paper-browser.js","sourceRoot":"","sources":["../../../src/tools/paper-browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAClC,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B,MAAM,CAAC,MAAM,sBAAsB,GAAG,IAAI,CAAC,MAAM,CAAC;IAChD,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC;QACrB,WAAW,EAAE,uDAAuD;KACrE,CAAC;IACF,UAAU,EAAE,IAAI,CAAC,QAAQ,CACvB,IAAI,CAAC,MAAM,CAAC;QACV,WAAW,EAAE,+CAA+C;QAC5D,OAAO,EAAE,CAAC;KACX,CAAC,CACH;IACD,SAAS,EAAE,IAAI,CAAC,QAAQ,CACtB,IAAI,CAAC,MAAM,CAAC;QACV,WAAW,EAAE,wCAAwC,qBAAqB,UAAU,iBAAiB,IAAI;QACzG,OAAO,EAAE,CAAC;QACV,OAAO,EAAE,iBAAiB;KAC3B,CAAC,CACH;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,MAA+B,EAAE,GAAW,EAAE,IAA6B;IAClG,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IAC1B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAC1C,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,eAAe,CAAC,MAA+B,EAAE,GAAW,EAAE,IAA4B;IACjG,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IAC1B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,SAAS,CAAC;IAC5D,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC1B,IAAI,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC/C,CAAC;AAED,MAAM,UAAU,sBAAsB;IACpC,OAAO;QACL,KAAK,EAAE,eAAe;QACtB,IAAI,EAAE,eAAe;QACrB,WAAW,EACT,mMAAmM;QACrM,UAAU,EAAE,sBAAsB;QAClC,OAAO,EAAE,KAAK,EAAE,WAAmB,EAAE,OAAgB,EAAE,EAAE;YACvD,MAAM,MAAM,GAAG,OAAkC,CAAC;YAClD,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,EAAE,WAAW,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAE,CAAC;YAC3E,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,eAAe,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;YAC7F,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACvB,eAAe,CAAC,MAAM,EAAE,WAAW,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,IAAI,qBAAqB,EAChF,iBAAiB,CAClB,CAAC;YAEF,uBAAuB;YACvB,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC1B,OAAO,MAAM,CAAC,GAAG,CAAC,gBAAgB,EAAE,wBAAwB,QAAQ,EAAE,CAAC,CAAC;YAC1E,CAAC;YAED,uCAAuC;YACvC,IAAI,KAAK,CAAC;YACV,IAAI,CAAC;gBACH,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC7B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,uBAAuB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACnH,CAAC;YAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;gBACpB,OAAO,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,uBAAuB,QAAQ,EAAE,CAAC,CAAC;YACrE,CAAC;YAED,oBAAoB;YACpB,IAAI,OAAe,CAAC;YACpB,IAAI,CAAC;gBACH,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC5C,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,wBAAwB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACpH,CAAC;YAED,mBAAmB;YACnB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;YAEhC,sBAAsB;YACtB,IAAI,SAAS,GAAG,UAAU,EAAE,CAAC;gBAC3B,OAAO,MAAM,CAAC,GAAG,CACf,eAAe,EACf,cAAc,SAAS,wBAAwB,UAAU,EAAE,CAC5D,CAAC;YACJ,CAAC;YAED,mBAAmB;YACnB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,QAAQ,GAAG,CAAC,EAAE,UAAU,CAAC,CAAC;YAC/D,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC;YAE1D,2EAA2E;YAC3E,MAAM,aAAa,GAAG,aAAa;iBAChC,GAAG,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;gBACjB,MAAM,OAAO,GAAG,SAAS,GAAG,GAAG,CAAC;gBAChC,OAAO,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3D,CAAC,CAAC;iBACD,IAAI,CAAC,IAAI,CAAC,CAAC;YAEd,mBAAmB;YACnB,MAAM,OAAO,GAAG,OAAO,GAAG,UAAU,CAAC;YACrC,MAAM,OAAO,GAAG,SAAS,GAAG,CAAC,CAAC;YAE9B,IAAI,cAAc,GAAG,EAAE,CAAC;YACxB,IAAI,OAAO,IAAI,OAAO,EAAE,CAAC;gBACvB,cAAc,GAAG,6CAA6C,QAAQ,kBAAkB,OAAO,GAAG,CAAC,qCAAqC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAC,qBAAqB,CAAC;YACjM,CAAC;iBAAM,IAAI,OAAO,EAAE,CAAC;gBACnB,cAAc,GAAG,4DAA4D,QAAQ,kBAAkB,OAAO,GAAG,CAAC,KAAK,CAAC;YAC1H,CAAC;iBAAM,IAAI,OAAO,EAAE,CAAC;gBACnB,cAAc,GAAG,qDAAqD,QAAQ,kBAAkB,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAC,wBAAwB,CAAC;YAC5J,CAAC;iBAAM,CAAC;gBACN,cAAc,GAAG,mBAAmB,CAAC;YACvC,CAAC;YAED,OAAO,MAAM,CAAC,EAAE,CAAC;gBACf,SAAS,EAAE,QAAQ;gBACnB,WAAW,EAAE,UAAU;gBACvB,QAAQ,EAAE;oBACR,UAAU,EAAE,SAAS;oBACrB,QAAQ,EAAE,OAAO;oBACjB,SAAS,EAAE,aAAa,CAAC,MAAM;iBAChC;gBACD,OAAO,EAAE,aAAa,GAAG,cAAc;gBACvC,QAAQ,EAAE,OAAO;gBACjB,QAAQ,EAAE,OAAO;aAClB,CAAC,CAAC;QACL,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scientify",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.1",
|
|
4
4
|
"description": "Scientify - AI-powered research workflow automation for OpenClaw. Includes idea generation, literature review, research pipeline skills, and arxiv tool.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -21,11 +21,15 @@ metadata:
|
|
|
21
21
|
├── survey/
|
|
22
22
|
│ ├── search_terms.json # 检索词列表
|
|
23
23
|
│ └── report.md # 最终报告
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
├── papers/
|
|
25
|
+
│ ├── _downloads/ # 原始下载
|
|
26
|
+
│ ├── _meta/ # 每篇论文的元数据
|
|
27
|
+
│ │ └── {arxiv_id}.json
|
|
28
|
+
│ └── {direction}/ # 整理后的分类
|
|
29
|
+
├── repos/ # 参考代码仓库(Phase 3)
|
|
30
|
+
│ ├── {repo_name_1}/
|
|
31
|
+
│ └── {repo_name_2}/
|
|
32
|
+
└── prepare_res.md # 仓库选择报告(Phase 3)
|
|
29
33
|
```
|
|
30
34
|
|
|
31
35
|
---
|
|
@@ -95,11 +99,84 @@ arxiv_download({
|
|
|
95
99
|
|
|
96
100
|
---
|
|
97
101
|
|
|
98
|
-
### Phase 3:
|
|
102
|
+
### Phase 3: GitHub 代码搜索与参考仓库选择
|
|
99
103
|
|
|
100
|
-
|
|
104
|
+
**目标**:为下游 skill(research-survey、research-plan、research-implement)提供可参考的开源实现。
|
|
101
105
|
|
|
102
|
-
#### 3.1
|
|
106
|
+
#### 3.1 选择高分论文
|
|
107
|
+
|
|
108
|
+
读取 `papers/_meta/` 下得分 ≥4 的论文,选出 **Top 5** 最相关论文。
|
|
109
|
+
|
|
110
|
+
#### 3.2 搜索参考仓库
|
|
111
|
+
|
|
112
|
+
对每篇选中论文,用以下关键词组合搜索 GitHub 仓库:
|
|
113
|
+
- 论文标题 + "code" / "implementation"
|
|
114
|
+
- 核心方法名 + 作者名
|
|
115
|
+
- 论文中提到的数据集名 + 任务名
|
|
116
|
+
|
|
117
|
+
使用 `github_search` 工具:
|
|
118
|
+
```javascript
|
|
119
|
+
// 示例:
|
|
120
|
+
github_search({
|
|
121
|
+
query: "{paper_title} implementation",
|
|
122
|
+
max_results: 10,
|
|
123
|
+
sort: "stars",
|
|
124
|
+
language: "python" // 可选:根据论文领域选择语言
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
// 如果有具体方法名:
|
|
128
|
+
github_search({
|
|
129
|
+
query: "{method_name} {author_last_name}",
|
|
130
|
+
max_results: 5
|
|
131
|
+
})
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**提示**:如果需要 GitHub API 高频率限制,设置环境变量 `GITHUB_TOKEN`。
|
|
135
|
+
|
|
136
|
+
#### 3.3 筛选与 clone
|
|
137
|
+
|
|
138
|
+
对搜索到的仓库,评估:
|
|
139
|
+
- Star 数(建议 >100)
|
|
140
|
+
- 代码质量(有 README、有 requirements.txt、代码结构清晰)
|
|
141
|
+
- 与论文的匹配度(README 中引用了论文 / 实现了论文中的方法)
|
|
142
|
+
|
|
143
|
+
选择 **3-5 个**最相关的仓库,clone 到 `repos/`:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
mkdir -p "$PROJECT_DIR/repos"
|
|
147
|
+
cd "$PROJECT_DIR/repos"
|
|
148
|
+
git clone --depth 1 <repo_url>
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
#### 3.4 写入选择报告
|
|
152
|
+
|
|
153
|
+
创建 `$PROJECT_DIR/prepare_res.md`:
|
|
154
|
+
|
|
155
|
+
```markdown
|
|
156
|
+
# 参考仓库选择
|
|
157
|
+
|
|
158
|
+
| 仓库 | 对应论文 | Stars | 选择理由 |
|
|
159
|
+
|------|----------|-------|----------|
|
|
160
|
+
| repos/{repo_name} | {paper_title} (arxiv:{id}) | {N} | {理由} |
|
|
161
|
+
|
|
162
|
+
## 各仓库关键文件
|
|
163
|
+
|
|
164
|
+
### {repo_name}
|
|
165
|
+
- **模型实现**: `model/` 或 `models/`
|
|
166
|
+
- **训练脚本**: `train.py` 或 `main.py`
|
|
167
|
+
- **数据加载**: `data/` 或 `dataset.py`
|
|
168
|
+
- **核心文件**: `{关键文件路径}` — {描述}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**如果搜不到相关仓库**,在 `prepare_res.md` 中注明"无可用参考仓库",后续 skill 将不依赖代码映射。
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
### Phase 4: 分类整理
|
|
176
|
+
|
|
177
|
+
所有检索词和代码搜索完毕后:
|
|
178
|
+
|
|
179
|
+
#### 4.1 读取所有元数据
|
|
103
180
|
|
|
104
181
|
```bash
|
|
105
182
|
ls $PROJECT_DIR/papers/_meta/
|
|
@@ -107,11 +184,11 @@ ls $PROJECT_DIR/papers/_meta/
|
|
|
107
184
|
|
|
108
185
|
读取所有 `.json` 文件,汇总论文列表。
|
|
109
186
|
|
|
110
|
-
####
|
|
187
|
+
#### 4.2 聚类分析
|
|
111
188
|
|
|
112
189
|
根据论文的标题、摘要、来源检索词,识别 3-6 个研究方向。
|
|
113
190
|
|
|
114
|
-
####
|
|
191
|
+
#### 4.3 创建文件夹并移动
|
|
115
192
|
|
|
116
193
|
```bash
|
|
117
194
|
mkdir -p "$PROJECT_DIR/papers/data-driven"
|
|
@@ -120,12 +197,13 @@ mv "$PROJECT_DIR/papers/_downloads/2401.12345" "$PROJECT_DIR/papers/data-driven/
|
|
|
120
197
|
|
|
121
198
|
---
|
|
122
199
|
|
|
123
|
-
### Phase
|
|
200
|
+
### Phase 5: 生成报告
|
|
124
201
|
|
|
125
202
|
创建 `survey/report.md`:
|
|
126
203
|
- 调研概要(检索词数、论文数、方向数)
|
|
127
204
|
- 各研究方向概述
|
|
128
205
|
- Top 10 论文
|
|
206
|
+
- **参考仓库摘要**(引用 prepare_res.md)
|
|
129
207
|
- 建议阅读顺序
|
|
130
208
|
|
|
131
209
|
---
|
|
@@ -31,7 +31,8 @@ metadata:
|
|
|
31
31
|
|
|
32
32
|
| File | Content |
|
|
33
33
|
|------|---------|
|
|
34
|
-
| `$W/experiment_res.md` |
|
|
34
|
+
| `$W/experiment_res.md` | 完整实验报告(含 full training + 消融 + 补充实验) |
|
|
35
|
+
| `$W/experiment_analysis/analysis_{N}.md` | 每轮实验分析报告(迭代过程中产生) |
|
|
35
36
|
|
|
36
37
|
---
|
|
37
38
|
|
|
@@ -69,9 +70,55 @@ python run.py # full epochs
|
|
|
69
70
|
python run.py --epochs 2 --ablation no_attention
|
|
70
71
|
```
|
|
71
72
|
|
|
72
|
-
### Step 4:
|
|
73
|
+
### Step 4: 实验分析→补充实验迭代(2 轮)
|
|
73
74
|
|
|
74
|
-
|
|
75
|
+
**⚠️ 这是 Novix Exp Analyzer 机制 — 对已有结果进行分析,提出补充实验,执行后再分析。**
|
|
76
|
+
|
|
77
|
+
循环 **2 次**:
|
|
78
|
+
|
|
79
|
+
#### 4.1 分析当前结果
|
|
80
|
+
|
|
81
|
+
读取当前所有实验结果(full training + 消融),写入分析报告 `$W/experiment_analysis/analysis_{N}.md`:
|
|
82
|
+
|
|
83
|
+
```markdown
|
|
84
|
+
# Experiment Analysis Round {N}
|
|
85
|
+
|
|
86
|
+
## 当前结果摘要
|
|
87
|
+
- Full training: {metrics}
|
|
88
|
+
- 消融实验: {key findings}
|
|
89
|
+
|
|
90
|
+
## 发现的问题或机会
|
|
91
|
+
1. {observation} → 建议: {experiment}
|
|
92
|
+
2. ...
|
|
93
|
+
|
|
94
|
+
## 补充实验计划
|
|
95
|
+
| 实验名称 | 目的 | 修改内容 | 预期结果 |
|
|
96
|
+
|----------|------|----------|----------|
|
|
97
|
+
| {exp_name} | {why} | {what to change} | {expected} |
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
补充实验类型参考(**Novix Exp Analyzer** 的典型输出):
|
|
101
|
+
- **敏感性分析**:关键超参数(lr、hidden_dim、dropout)的影响
|
|
102
|
+
- **可视化**:attention map、embedding 可视化、训练曲线对比图
|
|
103
|
+
- **对比实验**:与 baseline 方法的性能对比
|
|
104
|
+
- **鲁棒性测试**:不同数据规模/噪声水平下的表现
|
|
105
|
+
|
|
106
|
+
#### 4.2 执行补充实验
|
|
107
|
+
|
|
108
|
+
根据分析报告中的计划,修改代码并执行补充实验。**只改实验相关参数/配置,不改核心算法逻辑。**
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
cd $W/project && source .venv/bin/activate
|
|
112
|
+
python run.py --experiment {exp_name}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
记录结果后,回到 4.1 进行下一轮分析(共 2 轮)。
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
### Step 5: 写入最终实验报告
|
|
120
|
+
|
|
121
|
+
汇总所有实验结果(full training + 消融 + 2 轮补充实验),写入 `$W/experiment_res.md`:
|
|
75
122
|
|
|
76
123
|
```markdown
|
|
77
124
|
# Experiment Report
|
|
@@ -97,8 +144,25 @@ python run.py --epochs 2 --ablation no_attention
|
|
|
97
144
|
| No {component} | 去掉 {X} | {value} | {-/+}% |
|
|
98
145
|
| ... | ... | ... | ... |
|
|
99
146
|
|
|
147
|
+
## Supplementary Experiments
|
|
148
|
+
|
|
149
|
+
### Sensitivity Analysis
|
|
150
|
+
| 超参数 | 值 | val_metric | 备注 |
|
|
151
|
+
|--------|-----|-----------|------|
|
|
152
|
+
| ... | ... | ... | ... |
|
|
153
|
+
|
|
154
|
+
### Comparison with Baselines
|
|
155
|
+
| 方法 | val_metric | 备注 |
|
|
156
|
+
|------|-----------|------|
|
|
157
|
+
| Ours | {value} | — |
|
|
158
|
+
| {Baseline} | {value} | ... |
|
|
159
|
+
|
|
160
|
+
### Visualizations
|
|
161
|
+
- 训练曲线: `$W/project/figures/training_curve.png`
|
|
162
|
+
- {其他可视化}: `$W/project/figures/{name}.png`
|
|
163
|
+
|
|
100
164
|
## Conclusions
|
|
101
|
-
- {key findings}
|
|
165
|
+
- {key findings from all experiments}
|
|
102
166
|
|
|
103
167
|
## Limitations
|
|
104
168
|
- {limitations and future work}
|
|
@@ -112,3 +176,5 @@ python run.py --epochs 2 --ablation no_attention
|
|
|
112
176
|
2. 所有数值必须来自真实执行输出
|
|
113
177
|
3. 消融实验至少做 2 个
|
|
114
178
|
4. 如果 full training 失败(OOM 等),调整 batch_size 后重试,不要跳过
|
|
179
|
+
5. **补充实验迭代必须做 2 轮(Novix Exp Analyzer 机制)** — 第 1 轮针对初始结果,第 2 轮针对补充实验结果
|
|
180
|
+
6. 补充实验不改核心算法,只改实验配置/参数/可视化代码
|
|
@@ -23,7 +23,8 @@ metadata:
|
|
|
23
23
|
| `$W/task.json` | /research-pipeline or user |
|
|
24
24
|
| `$W/survey_res.md` | /research-survey |
|
|
25
25
|
| `$W/notes/paper_*.md` | /research-survey |
|
|
26
|
-
| `$W/repos/`
|
|
26
|
+
| `$W/repos/` | /literature-survey Phase 3 |
|
|
27
|
+
| `$W/prepare_res.md` | /literature-survey Phase 3 |
|
|
27
28
|
|
|
28
29
|
**If `survey_res.md` is missing, STOP:** "需要先运行 /research-survey 完成深度分析"
|
|
29
30
|
|
|
@@ -41,10 +42,22 @@ metadata:
|
|
|
41
42
|
|
|
42
43
|
读取以下文件,理解研究目标和技术方案:
|
|
43
44
|
- `$W/task.json` — 研究目标
|
|
44
|
-
- `$W/survey_res.md` —
|
|
45
|
-
-
|
|
45
|
+
- `$W/survey_res.md` — 技术路线建议、核心公式、**公式→代码映射表**、参考代码架构摘要
|
|
46
|
+
- `$W/prepare_res.md` — 参考仓库列表及关键文件说明
|
|
46
47
|
|
|
47
|
-
### Step 2:
|
|
48
|
+
### Step 2: 参考代码深度分析
|
|
49
|
+
|
|
50
|
+
**⚠️ 强制性步骤(Novix Plan Agent 机制)** — 读参考仓库的实现细节,确保 plan 有具体可行的依据。
|
|
51
|
+
|
|
52
|
+
对 `prepare_res.md` 中的重点仓库:
|
|
53
|
+
1. 读取目录结构和 README
|
|
54
|
+
2. 读取核心模型代码,理解架构实现方式
|
|
55
|
+
3. 读取训练脚本,理解超参数选择和训练技巧
|
|
56
|
+
4. 读取数据加载代码,理解预处理流程
|
|
57
|
+
|
|
58
|
+
为每个组件记录:**参考文件路径 + 关键实现细节**。这些信息将直接填入 plan 的"参考代码"列。
|
|
59
|
+
|
|
60
|
+
### Step 3: 制定四部分计划
|
|
48
61
|
|
|
49
62
|
写入 `$W/plan_res.md`:
|
|
50
63
|
|
|
@@ -100,11 +113,12 @@ metadata:
|
|
|
100
113
|
2. {ablation 2}
|
|
101
114
|
```
|
|
102
115
|
|
|
103
|
-
### Step
|
|
116
|
+
### Step 4: 自检
|
|
104
117
|
|
|
105
118
|
验证计划的完整性:
|
|
106
119
|
- [ ] 每个模型组件都有对应公式
|
|
107
|
-
- [ ]
|
|
120
|
+
- [ ] **每个组件的"参考代码"列已填写**(当 repos/ 存在时)
|
|
121
|
+
- [ ] 数据集有具体获取方式(URL 或下载命令)
|
|
108
122
|
- [ ] Loss 函数有数学定义
|
|
109
123
|
- [ ] 评估指标有明确定义
|
|
110
124
|
- [ ] 训练参数合理(不要 lr=0.1 for Adam)
|
|
@@ -48,7 +48,27 @@ metadata:
|
|
|
48
48
|
- `$W/project/` — 实际代码
|
|
49
49
|
- `$W/ml_res.md` — 执行结果
|
|
50
50
|
|
|
51
|
-
### Step 2:
|
|
51
|
+
### Step 2: 提取原子性概念清单
|
|
52
|
+
|
|
53
|
+
**⚠️ 这是 Novix Judge Agent 的核心机制 — 逐一核对每个原子性学术概念。**
|
|
54
|
+
|
|
55
|
+
从 `$W/survey_res.md` 的"关键公式汇总"和"核心方法对比"中,提取所有需要在代码中实现的**原子性学术概念**(每个公式、每个核心组件都是一个概念)。
|
|
56
|
+
|
|
57
|
+
为每个概念记录:
|
|
58
|
+
- 概念名称(如 "Multi-Head Attention", "Contrastive Loss", "Batch Normalization")
|
|
59
|
+
- 对应公式(LaTeX 格式)
|
|
60
|
+
- 预期代码位置(根据 plan_res.md 推断)
|
|
61
|
+
|
|
62
|
+
示例清单:
|
|
63
|
+
```
|
|
64
|
+
原子性概念清单(从 survey_res.md 提取):
|
|
65
|
+
1. Multi-Head Attention — $Attention(Q,K,V) = softmax(\frac{QK^T}{\sqrt{d_k}})V$ — 预期在 model/attention.py
|
|
66
|
+
2. Layer Normalization — $LN(x) = \gamma \frac{x - \mu}{\sigma} + \beta$ — 预期在 model/layers.py
|
|
67
|
+
3. Residual Connection — $y = F(x) + x$ — 预期贯穿所有模型组件
|
|
68
|
+
...
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Step 3: 逐项检查
|
|
52
72
|
|
|
53
73
|
#### A. 数据集真实性审查
|
|
54
74
|
|
|
@@ -62,7 +82,7 @@ metadata:
|
|
|
62
82
|
|
|
63
83
|
| 检查项 | 方法 |
|
|
64
84
|
|--------|------|
|
|
65
|
-
|
|
|
85
|
+
| **原子性概念逐一核对** | **对照 Step 2 的概念清单,逐个检查**:该概念是否在代码中有对应实现?公式翻译是否正确?维度/参数是否一致?每个概念标注 ✓ 或 ✗ 并记录代码位置 |
|
|
66
86
|
| Loss 函数正确 | 对比 plan Training Plan vs `training/loss.py`,验证数学公式是否正确翻译为代码 |
|
|
67
87
|
| 评估指标正确 | 对比 plan Testing Plan vs `testing/`,确认指标计算逻辑无误 |
|
|
68
88
|
| 关键算法未被简化 | 检查 plan 中的核心创新点是否被完整实现,而非用简化/占位逻辑替代 |
|
|
@@ -76,7 +96,7 @@ metadata:
|
|
|
76
96
|
| Loss 合理 | 非 NaN/Inf,有下降趋势(epoch 1 loss > epoch 2 loss) |
|
|
77
97
|
| 数据管道匹配 plan | 对比 plan Dataset Plan vs `data/` 实现,batch size、预处理步骤一致 |
|
|
78
98
|
|
|
79
|
-
### Step
|
|
99
|
+
### Step 4: 写入审查报告
|
|
80
100
|
|
|
81
101
|
写入 `$W/iterations/judge_v1.md`:
|
|
82
102
|
|
|
@@ -92,8 +112,16 @@ metadata:
|
|
|
92
112
|
- [x/✗] Data loading code produces correct shape/dtype/count
|
|
93
113
|
- [x/✗] No undeclared mock data
|
|
94
114
|
|
|
95
|
-
### 算法实现
|
|
96
|
-
|
|
115
|
+
### 算法实现 - 原子性概念核对
|
|
116
|
+
|
|
117
|
+
**逐一核对 Step 2 提取的每个学术概念:**
|
|
118
|
+
|
|
119
|
+
| 概念 | 公式 | 代码位置 | 结果 | 备注 |
|
|
120
|
+
|------|------|----------|------|------|
|
|
121
|
+
| {概念名} | $...$ | `model/xxx.py:L42` | ✓/✗ | {正确实现/公式错误/缺失/简化为占位符} |
|
|
122
|
+
| ... | ... | ... | ... | ... |
|
|
123
|
+
|
|
124
|
+
### 算法实现 - 整体检查
|
|
97
125
|
- [x/✗] Loss function correctly implements the math
|
|
98
126
|
- [x/✗] Key algorithm components fully implemented (no simplified placeholders)
|
|
99
127
|
- [x/✗] Evaluation metrics correct
|
|
@@ -108,21 +136,27 @@ metadata:
|
|
|
108
136
|
2. ...
|
|
109
137
|
```
|
|
110
138
|
|
|
111
|
-
### Step
|
|
139
|
+
### Step 5: 迭代(如果 NEEDS_REVISION)
|
|
140
|
+
|
|
141
|
+
**⚠️ 防偏移机制:每轮迭代都重新读取原始设计文档,确保修改方向正确。**
|
|
112
142
|
|
|
113
143
|
循环最多 3 次:
|
|
114
144
|
|
|
115
145
|
1. 读取 `judge_v{N}.md` 的修改建议
|
|
116
|
-
2.
|
|
117
|
-
|
|
146
|
+
2. **防偏移检查:重新读取** `$W/survey_res.md` 和 `$W/plan_res.md`
|
|
147
|
+
- 对照原始学术设计目标
|
|
148
|
+
- 确保修改不是为了"绕过审查"而偏离学术严谨性
|
|
149
|
+
- 确认修改符合 survey 中的公式定义和 plan 中的设计意图
|
|
150
|
+
3. 修改 `$W/project/` 中的代码
|
|
151
|
+
4. 重新执行:
|
|
118
152
|
```bash
|
|
119
153
|
cd $W/project && source .venv/bin/activate && python run.py --epochs 2
|
|
120
154
|
```
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
155
|
+
5. 读取执行输出,验证修复
|
|
156
|
+
6. **重新执行 Step 2-4**(提取概念清单 → 逐项检查 → 写报告),写入 `judge_v{N+1}.md`
|
|
157
|
+
7. 如果 PASS → 停止;否则继续
|
|
124
158
|
|
|
125
|
-
### Step
|
|
159
|
+
### Step 6: 最终判定
|
|
126
160
|
|
|
127
161
|
3 轮后仍 NEEDS_REVISION → 在最后一份 judge 中列出剩余问题,标记 `verdict: BLOCKED`,等待用户介入。
|
|
128
162
|
|
|
@@ -137,3 +171,5 @@ metadata:
|
|
|
137
171
|
5. **数据集必须验证真实性** —— 实际执行数据加载代码,确认有真实数据(哪怕是小规模);纯随机 tensor 不算
|
|
138
172
|
6. **执行时间必须与算力匹配** —— 2 epoch 训练时间过短(数据量 >1000 却 <2s)说明数据未加载或训练是空循环
|
|
139
173
|
7. **算法实现必须完整** —— plan 中标注的核心创新点必须逐一检查,不能被简化为 `nn.Linear` 占位
|
|
174
|
+
8. **原子性概念逐一核对(Novix Judge 机制)** —— Step 2 提取的每个概念都必须在 judge 报告的表格中有对应行,标注 ✓ 或 ✗
|
|
175
|
+
9. **防偏移(每轮迭代必须重新对齐)** —— Step 5 每轮修改前必须重新读取 survey_res.md 和 plan_res.md,确保不偏离原始设计目标
|
|
@@ -24,9 +24,12 @@ Read and verify these files exist before starting:
|
|
|
24
24
|
|------|--------|
|
|
25
25
|
| `$W/papers/_meta/*.json` | /literature-survey |
|
|
26
26
|
| `$W/papers/_downloads/` or `$W/papers/{direction}/` | /literature-survey |
|
|
27
|
-
| `$W/repos/`
|
|
27
|
+
| `$W/repos/` | /literature-survey Phase 3 |
|
|
28
|
+
| `$W/prepare_res.md` | /literature-survey Phase 3 |
|
|
28
29
|
|
|
29
|
-
**If
|
|
30
|
+
**If papers are missing, STOP:** "需要先运行 /literature-survey 完成论文下载"
|
|
31
|
+
|
|
32
|
+
**Note:** 如果 `prepare_res.md` 中注明"无可用参考仓库",代码映射步骤可跳过,但需在 survey_res.md 中标注。
|
|
30
33
|
|
|
31
34
|
## Output
|
|
32
35
|
|
|
@@ -58,6 +61,15 @@ ls $W/papers/_meta/
|
|
|
58
61
|
- **Model Architecture** section
|
|
59
62
|
- 数学公式定义
|
|
60
63
|
|
|
64
|
+
**对于大型论文**(>2000 行),使用 `paper_browser` 分页阅读:
|
|
65
|
+
```javascript
|
|
66
|
+
// 先读前 100 行找到 section 位置
|
|
67
|
+
paper_browser({ file_path: "$W/papers/{arxiv_id}/{file}.tex", start_line: 1, num_lines: 100 })
|
|
68
|
+
|
|
69
|
+
// 找到 Method section 后,跳转到该位置
|
|
70
|
+
paper_browser({ file_path: "$W/papers/{arxiv_id}/{file}.tex", start_line: 450, num_lines: 150 })
|
|
71
|
+
```
|
|
72
|
+
|
|
61
73
|
如果没有 .tex(只有 PDF),基于 abstract 分析。
|
|
62
74
|
|
|
63
75
|
#### 2.2 提取核心内容
|
|
@@ -67,11 +79,14 @@ ls $W/papers/_meta/
|
|
|
67
79
|
- **数学公式**:至少 1 个关键公式(保留 LaTeX 格式)
|
|
68
80
|
- **创新点**:与同领域其他方法的区别
|
|
69
81
|
|
|
70
|
-
#### 2.3
|
|
82
|
+
#### 2.3 映射到参考代码
|
|
71
83
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
84
|
+
**⚠️ 强制性步骤(当 repos/ 存在时)** — 代码映射是下游 plan 和 implement 的关键输入。
|
|
85
|
+
|
|
86
|
+
读取 `$W/prepare_res.md` 中的仓库列表,对每个公式/核心概念:
|
|
87
|
+
1. 在对应仓库中搜索实现代码(用 grep 关键类名/函数名)
|
|
88
|
+
2. 记录**文件路径、行号、代码片段**
|
|
89
|
+
3. 如果多个仓库有不同实现,记录差异
|
|
75
90
|
|
|
76
91
|
#### 2.4 写入笔记
|
|
77
92
|
|
|
@@ -127,7 +142,19 @@ $$
|
|
|
127
142
|
|
|
128
143
|
## 关键公式汇总
|
|
129
144
|
|
|
130
|
-
|
|
145
|
+
**每个公式附带代码映射,供下游 plan 和 implement 参考。**
|
|
146
|
+
|
|
147
|
+
| 公式名称 | LaTeX | 参考代码 |
|
|
148
|
+
|----------|-------|----------|
|
|
149
|
+
| {name} | $...$ | `repos/{repo}/path.py:L42` |
|
|
150
|
+
| ... | ... | ... |
|
|
151
|
+
|
|
152
|
+
## 参考代码架构摘要
|
|
153
|
+
|
|
154
|
+
基于 repos/ 中的参考实现,推荐的代码结构:
|
|
155
|
+
- 数据加载: 参考 `repos/{repo}/data/`
|
|
156
|
+
- 模型实现: 参考 `repos/{repo}/model/`
|
|
157
|
+
- 训练循环: 参考 `repos/{repo}/train.py`
|
|
131
158
|
```
|
|
132
159
|
|
|
133
160
|
---
|