scientify 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/src/tools/github-search-tool.d.ts +22 -0
- package/dist/src/tools/github-search-tool.d.ts.map +1 -0
- package/dist/src/tools/github-search-tool.js +134 -0
- package/dist/src/tools/github-search-tool.js.map +1 -0
- package/package.json +1 -1
- package/skills/idea-generation/references/idea-template.md +217 -0
- package/skills/install-scientify/SKILL.md +94 -0
- package/skills/literature-review/references/note-template.md +134 -0
- package/skills/research-pipeline/SKILL.md +1 -1
- package/skills/research-pipeline/references/prompts/implement.md +135 -0
- package/skills/research-pipeline/references/prompts/plan.md +142 -0
- package/skills/research-pipeline/references/prompts/review.md +118 -0
- package/skills/research-pipeline/references/prompts/survey.md +105 -0
- package/skills/research-pipeline/references/workspace-spec.md +81 -0
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAYlD,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAG,EAAE,iBAAiB,QAuDtD"}
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import { handleResearchStatus, handlePapers, handleIdeas, handleProjects, handleProjectSwitch, handleProjectDelete, } from "./src/commands.js";
|
|
2
2
|
import { createArxivTool } from "./src/tools/arxiv-tool.js";
|
|
3
|
+
import { createGithubSearchTool } from "./src/tools/github-search-tool.js";
|
|
3
4
|
export default function register(api) {
|
|
4
|
-
// Register
|
|
5
|
+
// Register tools
|
|
5
6
|
api.registerTool(createArxivTool());
|
|
7
|
+
api.registerTool(createGithubSearchTool());
|
|
6
8
|
// Register chat commands (bypass LLM)
|
|
7
9
|
api.registerCommand({
|
|
8
10
|
name: "research-status",
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,oBAAoB,EACpB,YAAY,EACZ,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,oBAAoB,EACpB,YAAY,EACZ,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAE3E,MAAM,CAAC,OAAO,UAAU,QAAQ,CAAC,GAAsB;IACrD,iBAAiB;IACjB,GAAG,CAAC,YAAY,CAAC,eAAe,EAAE,CAAC,CAAC;IACpC,GAAG,CAAC,YAAY,CAAC,sBAAsB,EAAE,CAAC,CAAC;IAE3C,sCAAsC;IACtC,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,gEAAgE;QAC7E,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,oBAAoB;KAC9B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,0CAA0C;QACvD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,YAAY;KACtB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,WAAW;KACrB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,cAAc;KACxB,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,wCAAwC;QACrD,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;QAClB,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,GAAG,CAAC,eAAe,CAAC;QAClB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,oDAAoD;QACjE,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,IAAI,EAAE,yCAAyC;QAC5D,OAAO,EAAE,mBAAmB;KAC7B,CAAC,CAAC;IAEH,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;AAC1D,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export declare const GithubSearchToolSchema: import("@sinclair/typebox").TObject<{
|
|
2
|
+
query: import("@sinclair/typebox").TString;
|
|
3
|
+
max_results: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
4
|
+
language: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
|
|
5
|
+
sort: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
|
|
6
|
+
}>;
|
|
7
|
+
export declare function createGithubSearchTool(): {
|
|
8
|
+
label: string;
|
|
9
|
+
name: string;
|
|
10
|
+
description: string;
|
|
11
|
+
parameters: import("@sinclair/typebox").TObject<{
|
|
12
|
+
query: import("@sinclair/typebox").TString;
|
|
13
|
+
max_results: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
14
|
+
language: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
|
|
15
|
+
sort: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
|
|
16
|
+
}>;
|
|
17
|
+
execute: (_toolCallId: string, rawArgs: unknown) => Promise<{
|
|
18
|
+
type: "tool_result";
|
|
19
|
+
content: string;
|
|
20
|
+
}>;
|
|
21
|
+
};
|
|
22
|
+
//# sourceMappingURL=github-search-tool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-search-tool.d.ts","sourceRoot":"","sources":["../../../src/tools/github-search-tool.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,sBAAsB;;;;;EAqBjC,CAAC;AAyCH,wBAAgB,sBAAsB;;;;;;;;;;2BAOL,MAAM,WAAW,OAAO;;;;EA4FxD"}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
const GITHUB_SEARCH_API = "https://api.github.com/search/repositories";
|
|
3
|
+
const DEFAULT_MAX_RESULTS = 10;
|
|
4
|
+
const MAX_RESULTS_LIMIT = 30;
|
|
5
|
+
export const GithubSearchToolSchema = Type.Object({
|
|
6
|
+
query: Type.String({
|
|
7
|
+
description: "Search query for GitHub repositories (e.g. 'graph neural network recommendation').",
|
|
8
|
+
}),
|
|
9
|
+
max_results: Type.Optional(Type.Number({
|
|
10
|
+
description: "Maximum number of results (1-30). Default: 10.",
|
|
11
|
+
minimum: 1,
|
|
12
|
+
maximum: MAX_RESULTS_LIMIT,
|
|
13
|
+
})),
|
|
14
|
+
language: Type.Optional(Type.String({
|
|
15
|
+
description: "Filter by programming language (e.g. 'python', 'typescript').",
|
|
16
|
+
})),
|
|
17
|
+
sort: Type.Optional(Type.String({
|
|
18
|
+
description: 'Sort by: "stars" (default), "updated", or "best-match".',
|
|
19
|
+
})),
|
|
20
|
+
});
|
|
21
|
+
function readStringParam(params, key, opts) {
|
|
22
|
+
const value = params[key];
|
|
23
|
+
if (value === undefined || value === null) {
|
|
24
|
+
if (opts?.required) {
|
|
25
|
+
throw new Error(`Missing required parameter: ${key}`);
|
|
26
|
+
}
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
return String(value);
|
|
30
|
+
}
|
|
31
|
+
function readNumberParam(params, key, opts) {
|
|
32
|
+
const value = params[key];
|
|
33
|
+
if (value === undefined || value === null)
|
|
34
|
+
return undefined;
|
|
35
|
+
const num = Number(value);
|
|
36
|
+
if (isNaN(num))
|
|
37
|
+
return undefined;
|
|
38
|
+
return opts?.integer ? Math.floor(num) : num;
|
|
39
|
+
}
|
|
40
|
+
function resolveGithubToken() {
|
|
41
|
+
return ((process.env.GITHUB_TOKEN ?? "").trim() ||
|
|
42
|
+
(process.env.GH_TOKEN ?? "").trim() ||
|
|
43
|
+
(process.env.GITHUB_AI_TOKEN ?? "").trim() ||
|
|
44
|
+
undefined);
|
|
45
|
+
}
|
|
46
|
+
export function createGithubSearchTool() {
|
|
47
|
+
return {
|
|
48
|
+
label: "GitHub Search",
|
|
49
|
+
name: "github_search",
|
|
50
|
+
description: "Search GitHub repositories by keyword. Returns repo names, descriptions, star counts, and URLs. Reads GITHUB_TOKEN from environment for authentication.",
|
|
51
|
+
parameters: GithubSearchToolSchema,
|
|
52
|
+
execute: async (_toolCallId, rawArgs) => {
|
|
53
|
+
const params = rawArgs;
|
|
54
|
+
const query = readStringParam(params, "query", { required: true });
|
|
55
|
+
const maxResults = Math.min(readNumberParam(params, "max_results", { integer: true }) ?? DEFAULT_MAX_RESULTS, MAX_RESULTS_LIMIT);
|
|
56
|
+
const language = readStringParam(params, "language");
|
|
57
|
+
const rawSort = readStringParam(params, "sort") ?? "stars";
|
|
58
|
+
// Build GitHub search query
|
|
59
|
+
let searchQuery = query;
|
|
60
|
+
if (language) {
|
|
61
|
+
searchQuery += ` language:${language}`;
|
|
62
|
+
}
|
|
63
|
+
const sort = rawSort === "best-match" ? undefined : rawSort;
|
|
64
|
+
const urlParams = new URLSearchParams({
|
|
65
|
+
q: searchQuery,
|
|
66
|
+
per_page: String(maxResults),
|
|
67
|
+
order: "desc",
|
|
68
|
+
});
|
|
69
|
+
if (sort) {
|
|
70
|
+
urlParams.set("sort", sort);
|
|
71
|
+
}
|
|
72
|
+
const url = `${GITHUB_SEARCH_API}?${urlParams.toString()}`;
|
|
73
|
+
const token = resolveGithubToken();
|
|
74
|
+
const headers = {
|
|
75
|
+
Accept: "application/vnd.github+json",
|
|
76
|
+
"User-Agent": "scientify-agent",
|
|
77
|
+
};
|
|
78
|
+
if (token) {
|
|
79
|
+
headers.Authorization = `Bearer ${token}`;
|
|
80
|
+
}
|
|
81
|
+
let response;
|
|
82
|
+
try {
|
|
83
|
+
response = await fetch(url, { headers });
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
return {
|
|
87
|
+
type: "tool_result",
|
|
88
|
+
content: JSON.stringify({
|
|
89
|
+
error: "network_error",
|
|
90
|
+
message: `Failed to reach GitHub API: ${error instanceof Error ? error.message : String(error)}`,
|
|
91
|
+
}),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
if (response.status === 403) {
|
|
95
|
+
return {
|
|
96
|
+
type: "tool_result",
|
|
97
|
+
content: JSON.stringify({
|
|
98
|
+
error: "rate_limited",
|
|
99
|
+
message: "GitHub API rate limit exceeded. Set GITHUB_TOKEN environment variable for higher limits.",
|
|
100
|
+
}),
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
if (!response.ok) {
|
|
104
|
+
return {
|
|
105
|
+
type: "tool_result",
|
|
106
|
+
content: JSON.stringify({
|
|
107
|
+
error: "api_error",
|
|
108
|
+
message: `GitHub API returned ${response.status}: ${response.statusText}`,
|
|
109
|
+
}),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
const data = (await response.json());
|
|
113
|
+
const repos = (data.items ?? []).map((repo) => ({
|
|
114
|
+
name: repo.full_name,
|
|
115
|
+
url: repo.html_url,
|
|
116
|
+
description: repo.description ?? "",
|
|
117
|
+
stars: repo.stargazers_count,
|
|
118
|
+
language: repo.language ?? "unknown",
|
|
119
|
+
updated: repo.updated_at,
|
|
120
|
+
topics: repo.topics ?? [],
|
|
121
|
+
}));
|
|
122
|
+
return {
|
|
123
|
+
type: "tool_result",
|
|
124
|
+
content: JSON.stringify({
|
|
125
|
+
query,
|
|
126
|
+
total_count: data.total_count ?? 0,
|
|
127
|
+
returned: repos.length,
|
|
128
|
+
repos,
|
|
129
|
+
}),
|
|
130
|
+
};
|
|
131
|
+
},
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=github-search-tool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-search-tool.js","sourceRoot":"","sources":["../../../src/tools/github-search-tool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAEzC,MAAM,iBAAiB,GAAG,4CAA4C,CAAC;AACvE,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAC/B,MAAM,iBAAiB,GAAG,EAAE,CAAC;AAE7B,MAAM,CAAC,MAAM,sBAAsB,GAAG,IAAI,CAAC,MAAM,CAAC;IAChD,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC;QACjB,WAAW,EAAE,oFAAoF;KAClG,CAAC;IACF,WAAW,EAAE,IAAI,CAAC,QAAQ,CACxB,IAAI,CAAC,MAAM,CAAC;QACV,WAAW,EAAE,gDAAgD;QAC7D,OAAO,EAAE,CAAC;QACV,OAAO,EAAE,iBAAiB;KAC3B,CAAC,CACH;IACD,QAAQ,EAAE,IAAI,CAAC,QAAQ,CACrB,IAAI,CAAC,MAAM,CAAC;QACV,WAAW,EAAE,+DAA+D;KAC7E,CAAC,CACH;IACD,IAAI,EAAE,IAAI,CAAC,QAAQ,CACjB,IAAI,CAAC,MAAM,CAAC;QACV,WAAW,EAAE,yDAAyD;KACvE,CAAC,CACH;CACF,CAAC,CAAC;AAaH,SAAS,eAAe,CAAC,MAA+B,EAAE,GAAW,EAAE,IAA6B;IAClG,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IAC1B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAC1C,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,eAAe,CAAC,MAA+B,EAAE,GAAW,EAAE,IAA4B;IACjG,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IAC1B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,SAAS,CAAC;IAC5D,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC1B,IAAI,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC/C,CAAC;AAED,SAAS,kBAAkB;IACzB,OAAO,CACL,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;QACvC,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;QACnC,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;QAC1C,SAAS,CACV,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,sBAAsB;IACpC,OAAO;QACL,KAAK,EAAE,eAAe;QACtB,IAAI,EAAE,eAAe;QACrB,WAAW,EACT,yJAAyJ;QAC3J,UAAU,EAAE,sBAAsB;QAClC,OAAO,EAAE,KAAK,EAAE,WAAmB,EAAE,OAAgB,EAAE,EAAE;YACvD,MAAM,MAAM,GAAG,OAAkC,CAAC;YAClD,MAAM,KAAK,GAAG,eAAe,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAE,CAAC;YACpE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CACzB,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,IAAI,mBAAmB,EAChF,iBAAiB,CAClB,CAAC;YACF,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;YACrD,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC;YAE3D,4BAA4B;YAC5B,IAAI,WAAW,GAAG,KAAK,CAAC;YACxB,IAAI,QAAQ,EAAE,CAAC;gBACb,WAAW,IAAI,aAAa,QAAQ,EAAE,CAAC;YACzC,CAAC;YAED,MAAM,IAAI,GAAG,OAAO,KAAK,YAAY,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC;YAC5D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC;gBACpC,CAAC,EAAE,WAAW;gBACd,QAAQ,EAAE,MAAM,CAAC,UAAU,CAAC;gBAC5B,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YACH,IAAI,IAAI,EAAE,CAAC;gBACT,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,MAAM,GAAG,GAAG,GAAG,iBAAiB,IAAI,SAAS,CAAC,QAAQ,EAAE,EAAE,CAAC;YAC3D,MAAM,KAAK,GAAG,kBAAkB,EAAE,CAAC;YACnC,MAAM,OAAO,GAA2B;gBACtC,MAAM,EAAE,6BAA6B;gBACrC,YAAY,EAAE,iBAAiB;aAChC,CAAC;YACF,IAAI,KAAK,EAAE,CAAC;gBACV,OAAO,CAAC,aAAa,GAAG,UAAU,KAAK,EAAE,CAAC;YAC5C,CAAC;YAED,IAAI,QAAkB,CAAC;YACvB,IAAI,CAAC;gBACH,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAC3C,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO;oBACL,IAAI,EAAE,aAAsB;oBAC5B,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;wBACtB,KAAK,EAAE,eAAe;wBACtB,OAAO,EAAE,+BAA+B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;qBACjG,CAAC;iBACH,CAAC;YACJ,CAAC;YAED,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC5B,OAAO;oBACL,IAAI,EAAE,aAAsB;oBAC5B,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;wBACtB,KAAK,EAAE,cAAc;wBACrB,OAAO,EACL,0FAA0F;qBAC7F,CAAC;iBACH,CAAC;YACJ,CAAC;YAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,OAAO;oBACL,IAAI,EAAE,aAAsB;oBAC5B,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;wBACtB,KAAK,EAAE,WAAW;wBAClB,OAAO,EAAE,uBAAuB,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE;qBAC1E,CAAC;iBACH,CAAC;YACJ,CAAC;YAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAmD,CAAC;YACvF,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBAC9C,IAAI,EAAE,IAAI,CAAC,SAAS;gBACpB,GAAG,EAAE,IAAI,CAAC,QAAQ;gBAClB,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;gBACnC,KAAK,EAAE,IAAI,CAAC,gBAAgB;gBAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,SAAS;gBACpC,OAAO,EAAE,IAAI,CAAC,UAAU;gBACxB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE;aAC1B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,IAAI,EAAE,aAAsB;gBAC5B,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;oBACtB,KAAK;oBACL,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC;oBAClC,QAAQ,EAAE,KAAK,CAAC,MAAM;oBACtB,KAAK;iBACN,CAAC;aACH,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scientify",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Scientify - AI-powered research workflow automation for OpenClaw. Includes idea generation, literature review, research pipeline skills, and arxiv tool.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# Idea Template
|
|
2
|
+
|
|
3
|
+
Use this template for each idea in `~/.openclaw/workspace/ideas/`.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Idea N: [Short Descriptive Title]
|
|
8
|
+
|
|
9
|
+
## One-Line Summary
|
|
10
|
+
|
|
11
|
+
[A single sentence that captures the core innovation. Should be understandable without context.]
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Challenges Addressed
|
|
16
|
+
|
|
17
|
+
What problems in the current research landscape does this idea solve?
|
|
18
|
+
|
|
19
|
+
- **[Challenge 1]**: [Brief description of the technical limitation]
|
|
20
|
+
- **[Challenge 2]**: [Brief description of unsolved problem]
|
|
21
|
+
- **[Challenge 3]**: [Brief description of key bottleneck]
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Existing Methods & Their Limitations
|
|
26
|
+
|
|
27
|
+
| Method | Paper/Source | Strength | Weakness This Idea Addresses |
|
|
28
|
+
|--------|--------------|----------|------------------------------|
|
|
29
|
+
| [Method A] | [Citation] | [What it does well] | [Limitation] |
|
|
30
|
+
| [Method B] | [Citation] | [What it does well] | [Limitation] |
|
|
31
|
+
| [Method C] | [Citation] | [What it does well] | [Limitation] |
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Motivation
|
|
36
|
+
|
|
37
|
+
### Why is this problem important?
|
|
38
|
+
|
|
39
|
+
[Explain the significance of solving this problem. Who benefits? What applications are enabled?]
|
|
40
|
+
|
|
41
|
+
### What gap does this fill?
|
|
42
|
+
|
|
43
|
+
[Describe the specific research gap this idea addresses. Reference the limitations above.]
|
|
44
|
+
|
|
45
|
+
### Potential impact
|
|
46
|
+
|
|
47
|
+
[Quantify if possible: "Could improve X metric by Y%" or "Enables new application Z"]
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Proposed Method
|
|
52
|
+
|
|
53
|
+
### Core Insight
|
|
54
|
+
|
|
55
|
+
[2-3 sentences describing the key innovation. What is the "aha" moment?]
|
|
56
|
+
|
|
57
|
+
### Technical Approach
|
|
58
|
+
|
|
59
|
+
**Overview:**
|
|
60
|
+
|
|
61
|
+
[1 paragraph high-level description]
|
|
62
|
+
|
|
63
|
+
**Step-by-step methodology:**
|
|
64
|
+
|
|
65
|
+
1. **[Step 1 Name]**: [Description]
|
|
66
|
+
- Input: [what this step takes]
|
|
67
|
+
- Output: [what this step produces]
|
|
68
|
+
- Key operation: [main computation]
|
|
69
|
+
|
|
70
|
+
2. **[Step 2 Name]**: [Description]
|
|
71
|
+
- Input: ...
|
|
72
|
+
- Output: ...
|
|
73
|
+
- Key operation: ...
|
|
74
|
+
|
|
75
|
+
3. **[Step 3 Name]**: [Description]
|
|
76
|
+
- ...
|
|
77
|
+
|
|
78
|
+
### Mathematical Formulation
|
|
79
|
+
|
|
80
|
+
**Problem Setup:**
|
|
81
|
+
|
|
82
|
+
Let $X \in \mathbb{R}^{n \times d}$ denote [description]...
|
|
83
|
+
|
|
84
|
+
**Core Equations:**
|
|
85
|
+
|
|
86
|
+
```latex
|
|
87
|
+
% Main loss function
|
|
88
|
+
\mathcal{L} = \mathcal{L}_{task} + \lambda \mathcal{L}_{reg}
|
|
89
|
+
|
|
90
|
+
% Where task loss is:
|
|
91
|
+
\mathcal{L}_{task} = ...
|
|
92
|
+
|
|
93
|
+
% And regularization term is:
|
|
94
|
+
\mathcal{L}_{reg} = ...
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Key derivations (if applicable):**
|
|
98
|
+
|
|
99
|
+
[Show important mathematical steps that justify the approach]
|
|
100
|
+
|
|
101
|
+
### Architecture / Algorithm
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
Algorithm: [Name]
|
|
105
|
+
Input: [inputs]
|
|
106
|
+
Output: [outputs]
|
|
107
|
+
|
|
108
|
+
1. Initialize [parameters]
|
|
109
|
+
2. For each [iteration]:
|
|
110
|
+
a. Compute [something]
|
|
111
|
+
b. Update [something]
|
|
112
|
+
3. Return [result]
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Or for neural architectures:
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
[Input] → [Layer 1] → [Layer 2] → ... → [Output]
|
|
119
|
+
(dim: ...) (dim: ...) (dim: ...)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Expected Advantages
|
|
125
|
+
|
|
126
|
+
Why should this approach work better than existing methods?
|
|
127
|
+
|
|
128
|
+
- **[Advantage 1]**: [Explanation with reasoning]
|
|
129
|
+
- **[Advantage 2]**: [Explanation with reasoning]
|
|
130
|
+
- **[Advantage 3]**: [Explanation with reasoning]
|
|
131
|
+
|
|
132
|
+
**Theoretical justification (if applicable):**
|
|
133
|
+
|
|
134
|
+
[Brief argument for why this should work]
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Potential Challenges
|
|
139
|
+
|
|
140
|
+
What could go wrong? How to mitigate?
|
|
141
|
+
|
|
142
|
+
| Challenge | Risk Level | Mitigation Strategy |
|
|
143
|
+
|-----------|------------|---------------------|
|
|
144
|
+
| [Challenge 1] | High/Med/Low | [How to address] |
|
|
145
|
+
| [Challenge 2] | High/Med/Low | [How to address] |
|
|
146
|
+
| [Challenge 3] | High/Med/Low | [How to address] |
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Evaluation Plan
|
|
151
|
+
|
|
152
|
+
### Datasets
|
|
153
|
+
|
|
154
|
+
| Dataset | Task | Size | Why Chosen |
|
|
155
|
+
|---------|------|------|------------|
|
|
156
|
+
| [Dataset 1] | [Task] | [Size] | [Reason] |
|
|
157
|
+
| [Dataset 2] | [Task] | [Size] | [Reason] |
|
|
158
|
+
|
|
159
|
+
### Baselines
|
|
160
|
+
|
|
161
|
+
| Method | Paper | Why Compare |
|
|
162
|
+
|--------|-------|-------------|
|
|
163
|
+
| [Baseline 1] | [Citation] | [Reason] |
|
|
164
|
+
| [Baseline 2] | [Citation] | [Reason] |
|
|
165
|
+
|
|
166
|
+
### Metrics
|
|
167
|
+
|
|
168
|
+
| Metric | Description | Expected Improvement |
|
|
169
|
+
|--------|-------------|---------------------|
|
|
170
|
+
| [Metric 1] | [What it measures] | [X% over baseline] |
|
|
171
|
+
| [Metric 2] | [What it measures] | [Y% over baseline] |
|
|
172
|
+
|
|
173
|
+
### Ablation Studies
|
|
174
|
+
|
|
175
|
+
What components to ablate to understand contribution?
|
|
176
|
+
|
|
177
|
+
1. [Component 1]: Remove/replace to test [hypothesis]
|
|
178
|
+
2. [Component 2]: Remove/replace to test [hypothesis]
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Scores
|
|
183
|
+
|
|
184
|
+
| Criterion | Score (1-5) | Justification |
|
|
185
|
+
|-----------|-------------|---------------|
|
|
186
|
+
| **Novelty** | [X] | [Why this score] |
|
|
187
|
+
| **Feasibility** | [X] | [Why this score] |
|
|
188
|
+
| **Impact** | [X] | [Why this score] |
|
|
189
|
+
| **Total** | [Sum] | |
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Implementation Notes
|
|
194
|
+
|
|
195
|
+
### Recommended Libraries
|
|
196
|
+
|
|
197
|
+
- [Library 1]: For [purpose]
|
|
198
|
+
- [Library 2]: For [purpose]
|
|
199
|
+
|
|
200
|
+
### Reference Code
|
|
201
|
+
|
|
202
|
+
- [Repo 1](URL): [What to reference]
|
|
203
|
+
- [Repo 2](URL): [What to reference]
|
|
204
|
+
|
|
205
|
+
### Estimated Effort
|
|
206
|
+
|
|
207
|
+
- Model implementation: [X days]
|
|
208
|
+
- Data pipeline: [X days]
|
|
209
|
+
- Training & evaluation: [X days]
|
|
210
|
+
- Total: [X days]
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## Related Ideas
|
|
215
|
+
|
|
216
|
+
- **Idea [M]**: [How it relates - could be combined? alternative approach?]
|
|
217
|
+
- **Future extension**: [What could come next after this idea]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: install-scientify
|
|
3
|
+
description: "Install Scientify - AI-powered research workflow automation plugin. Adds skills for idea-generation, literature-review, research-pipeline, arxiv search, and workspace management commands."
|
|
4
|
+
metadata:
|
|
5
|
+
{
|
|
6
|
+
"openclaw":
|
|
7
|
+
{
|
|
8
|
+
"emoji": "🔬",
|
|
9
|
+
"install":
|
|
10
|
+
[
|
|
11
|
+
{
|
|
12
|
+
"id": "scientify",
|
|
13
|
+
"kind": "node",
|
|
14
|
+
"package": "scientify",
|
|
15
|
+
"label": "Install Scientify plugin (npm)",
|
|
16
|
+
},
|
|
17
|
+
],
|
|
18
|
+
},
|
|
19
|
+
}
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
# Install Scientify
|
|
23
|
+
|
|
24
|
+
**Scientify** is an AI-powered research workflow automation plugin for OpenClaw.
|
|
25
|
+
|
|
26
|
+
## What You Get
|
|
27
|
+
|
|
28
|
+
### Skills (LLM-powered)
|
|
29
|
+
|
|
30
|
+
| Skill | Description |
|
|
31
|
+
|-------|-------------|
|
|
32
|
+
| **idea-generation** | Generate innovative research ideas. Searches arXiv/GitHub, downloads papers, analyzes literature, outputs 5 ideas with citations. |
|
|
33
|
+
| **research-pipeline** | End-to-end ML research workflow: idea → literature → survey → plan → implement → review → iterate. |
|
|
34
|
+
| **literature-review** | Generate structured notes and synthesis from collected papers. |
|
|
35
|
+
| **arxiv** | Search arXiv.org for papers and download .tex sources. |
|
|
36
|
+
|
|
37
|
+
### Commands (Direct, no LLM)
|
|
38
|
+
|
|
39
|
+
| Command | Description |
|
|
40
|
+
|---------|-------------|
|
|
41
|
+
| `/research-status` | Show workspace status |
|
|
42
|
+
| `/papers` | List downloaded papers |
|
|
43
|
+
| `/ideas` | List generated ideas |
|
|
44
|
+
| `/projects` | List all projects |
|
|
45
|
+
| `/project-switch <id>` | Switch project |
|
|
46
|
+
| `/project-delete <id>` | Delete project |
|
|
47
|
+
|
|
48
|
+
### Tool
|
|
49
|
+
|
|
50
|
+
- **arxiv** - Search arXiv.org API with keyword search, date filtering, automatic .tex download
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
Run:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
npm install -g scientify
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Or let OpenClaw install it automatically when you use this skill.
|
|
61
|
+
|
|
62
|
+
Then add to your OpenClaw config:
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"plugins": ["scientify"]
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Usage Examples
|
|
71
|
+
|
|
72
|
+
### Generate Research Ideas
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
帮我调研 "长文档摘要" 领域,生成一些创新的研究想法
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Daily Literature Tracking
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
帮我设置一个定时任务,每天检查 arXiv 上关于 "transformer efficiency" 的新论文,发到飞书
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Check Workspace
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
/research-status
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Links
|
|
91
|
+
|
|
92
|
+
- npm: https://www.npmjs.com/package/scientify
|
|
93
|
+
- GitHub: https://github.com/tsingyuai/scientific
|
|
94
|
+
- Author: tsingyuai
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# Paper Note Template
|
|
2
|
+
|
|
3
|
+
Use this template for each paper in `~/.openclaw/workspace/literature/notes/`.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# [Paper Title]
|
|
8
|
+
|
|
9
|
+
**ArXiv/DOI**: [id]
|
|
10
|
+
**Authors**: [First Author et al.]
|
|
11
|
+
**Year**: [YYYY]
|
|
12
|
+
**Venue**: [Conference/Journal, or "arXiv preprint"]
|
|
13
|
+
**PDF**: [local path or URL]
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## TL;DR
|
|
18
|
+
|
|
19
|
+
[1-2 sentences: What is the main contribution? What problem does it solve?]
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Problem Statement
|
|
24
|
+
|
|
25
|
+
[What gap or challenge does this paper address? Why is it important?]
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Method
|
|
30
|
+
|
|
31
|
+
### Core Idea
|
|
32
|
+
|
|
33
|
+
[The key insight or innovation in 2-3 sentences]
|
|
34
|
+
|
|
35
|
+
### Approach
|
|
36
|
+
|
|
37
|
+
[Step-by-step description of the method]
|
|
38
|
+
|
|
39
|
+
1. [Step 1]
|
|
40
|
+
2. [Step 2]
|
|
41
|
+
3. [Step 3]
|
|
42
|
+
|
|
43
|
+
### Key Equations
|
|
44
|
+
|
|
45
|
+
```latex
|
|
46
|
+
% Main loss function or formula
|
|
47
|
+
L = ...
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Architecture / Algorithm
|
|
51
|
+
|
|
52
|
+
[Describe the model architecture or algorithm structure]
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Experiments
|
|
57
|
+
|
|
58
|
+
### Setup
|
|
59
|
+
|
|
60
|
+
- **Datasets**: [list with brief description]
|
|
61
|
+
- **Baselines**: [methods compared against]
|
|
62
|
+
- **Metrics**: [evaluation metrics used]
|
|
63
|
+
|
|
64
|
+
### Main Results
|
|
65
|
+
|
|
66
|
+
| Method | Dataset | Metric | Score |
|
|
67
|
+
|--------|---------|--------|-------|
|
|
68
|
+
| Proposed | ... | ... | ... |
|
|
69
|
+
| Baseline 1 | ... | ... | ... |
|
|
70
|
+
|
|
71
|
+
### Ablation Studies
|
|
72
|
+
|
|
73
|
+
[Key findings from ablation experiments]
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Analysis
|
|
78
|
+
|
|
79
|
+
### Strengths
|
|
80
|
+
|
|
81
|
+
- ✅ [Strength 1]
|
|
82
|
+
- ✅ [Strength 2]
|
|
83
|
+
- ✅ [Strength 3]
|
|
84
|
+
|
|
85
|
+
### Weaknesses / Limitations
|
|
86
|
+
|
|
87
|
+
- ⚠️ [Weakness 1]
|
|
88
|
+
- ⚠️ [Weakness 2]
|
|
89
|
+
|
|
90
|
+
### Questions / Unclear Points
|
|
91
|
+
|
|
92
|
+
- ❓ [Question about the paper]
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Relevance
|
|
97
|
+
|
|
98
|
+
### To My Research
|
|
99
|
+
|
|
100
|
+
[How does this paper relate to what I'm working on?]
|
|
101
|
+
|
|
102
|
+
### Key Takeaways
|
|
103
|
+
|
|
104
|
+
1. [Actionable insight 1]
|
|
105
|
+
2. [Actionable insight 2]
|
|
106
|
+
|
|
107
|
+
### Ideas to Explore
|
|
108
|
+
|
|
109
|
+
- [ ] [Idea inspired by this paper]
|
|
110
|
+
- [ ] [Potential extension or combination]
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Key Quotes
|
|
115
|
+
|
|
116
|
+
> "[Important quote 1]" — Section X
|
|
117
|
+
|
|
118
|
+
> "[Important quote 2]" — Section Y
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## References to Follow
|
|
123
|
+
|
|
124
|
+
- [ ] **[Author et al., Year]**: [Title] — [Why interesting]
|
|
125
|
+
- [ ] **[Author et al., Year]**: [Title] — [Why interesting]
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Meta
|
|
130
|
+
|
|
131
|
+
- **Read on**: [date]
|
|
132
|
+
- **Time spent**: [estimate]
|
|
133
|
+
- **Rating**: [⭐⭐⭐⭐⭐ or 1-5]
|
|
134
|
+
- **Would cite**: [Yes/No/Maybe]
|
|
@@ -193,7 +193,7 @@ Execute:
|
|
|
193
193
|
cd $WORKSPACE/project && pip install -r requirements.txt && python run.py --epochs 2
|
|
194
194
|
```
|
|
195
195
|
|
|
196
|
-
|
|
196
|
+
**Note:** GPU support requires external configuration. For GPU-accelerated training, consider using a dedicated ML environment or cloud instance.
|
|
197
197
|
|
|
198
198
|
**Output:** `$WORKSPACE/project/` (code) + `$WORKSPACE/ml_res.md` (implementation report)
|
|
199
199
|
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Implementation Guide
|
|
2
|
+
|
|
3
|
+
You are implementing an ML research project based on the plan in `workspace/plan_res.md`. The goal is a self-contained, runnable codebase in `workspace/project/`.
|
|
4
|
+
|
|
5
|
+
## Core Principles
|
|
6
|
+
|
|
7
|
+
### 1. Self-Contained Project
|
|
8
|
+
|
|
9
|
+
ALL code must reside within `workspace/project/`. No direct imports from `workspace/repos/`. Reference code should be studied, understood, and rewritten to fit the project's architecture.
|
|
10
|
+
|
|
11
|
+
When adapting reference code:
|
|
12
|
+
|
|
13
|
+
- Understand the core logic and algorithm, not just copy the syntax.
|
|
14
|
+
- Rewrite to fit consistent naming conventions and coding style.
|
|
15
|
+
- Document the origin: add a comment like `# Adapted from repos/xyz/model/attention.py`.
|
|
16
|
+
- Include all necessary utility functions — do not rely on external helpers.
|
|
17
|
+
|
|
18
|
+
### 2. Follow the Plan Exactly
|
|
19
|
+
|
|
20
|
+
Implement every component listed in `workspace/plan_res.md`:
|
|
21
|
+
|
|
22
|
+
- Every atomic definition from the Model Plan becomes a class or module.
|
|
23
|
+
- The dataset pipeline matches the Dataset Plan.
|
|
24
|
+
- The loss function matches the Training Plan formula.
|
|
25
|
+
- The evaluation matches the Testing Plan metrics.
|
|
26
|
+
|
|
27
|
+
Do not skip components. Do not substitute simpler alternatives. If a component seems wrong, flag it rather than silently changing it.
|
|
28
|
+
|
|
29
|
+
### 3. Real Data, Not Toy Data
|
|
30
|
+
|
|
31
|
+
Use the actual datasets specified in the plan. If the dataset requires downloading, write the download logic. Never substitute with random data or tiny synthetic datasets for the implementation (the quick validation uses real data with 2 epochs, not fake data).
|
|
32
|
+
|
|
33
|
+
## Project Structure
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
workspace/project/
|
|
37
|
+
model/
|
|
38
|
+
__init__.py
|
|
39
|
+
[component files matching Model Plan]
|
|
40
|
+
data/
|
|
41
|
+
__init__.py
|
|
42
|
+
dataset.py # Dataset class
|
|
43
|
+
loader.py # DataLoader configuration
|
|
44
|
+
preprocess.py # Preprocessing logic
|
|
45
|
+
training/
|
|
46
|
+
__init__.py
|
|
47
|
+
trainer.py # Training loop
|
|
48
|
+
loss.py # Loss functions
|
|
49
|
+
testing/
|
|
50
|
+
__init__.py
|
|
51
|
+
evaluator.py # Evaluation logic
|
|
52
|
+
metrics.py # Metric implementations
|
|
53
|
+
utils/
|
|
54
|
+
__init__.py
|
|
55
|
+
[shared utilities]
|
|
56
|
+
run.py # Main entry point
|
|
57
|
+
requirements.txt # All dependencies with versions
|
|
58
|
+
README.md # Brief description of the project
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Implementation Sequence
|
|
62
|
+
|
|
63
|
+
Follow this order to catch issues early:
|
|
64
|
+
|
|
65
|
+
1. **requirements.txt**: List all dependencies. Pin major versions.
|
|
66
|
+
2. **Data pipeline**: Implement dataset loading first. Verify with a small print test.
|
|
67
|
+
3. **Model architecture**: Implement each component. Verify shapes with dummy input.
|
|
68
|
+
4. **Loss function**: Implement and verify with dummy predictions.
|
|
69
|
+
5. **Training loop**: Wire everything together. Include logging.
|
|
70
|
+
6. **Evaluation**: Implement metrics and test evaluation pipeline.
|
|
71
|
+
7. **run.py**: Main entry point with argument parsing.
|
|
72
|
+
|
|
73
|
+
After each step, run a quick sanity check before moving on.
|
|
74
|
+
|
|
75
|
+
## Quick Validation Run
|
|
76
|
+
|
|
77
|
+
The first run uses 2 epochs only:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
cd workspace/project
|
|
81
|
+
pip install -r requirements.txt
|
|
82
|
+
python run.py --epochs 2
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Expected outcomes:
|
|
86
|
+
- No import errors or missing dependencies.
|
|
87
|
+
- Loss decreases (even slightly) over 2 epochs.
|
|
88
|
+
- No NaN or Inf in loss or gradients.
|
|
89
|
+
- Evaluation metrics produce reasonable (not necessarily good) numbers.
|
|
90
|
+
- Memory usage stays within limits.
|
|
91
|
+
|
|
92
|
+
If the run fails, debug and fix before reporting. Common issues:
|
|
93
|
+
- Shape mismatches: print tensor shapes at each step.
|
|
94
|
+
- OOM: reduce batch size or model size for validation.
|
|
95
|
+
- Data loading errors: verify file paths and formats.
|
|
96
|
+
|
|
97
|
+
## Debugging Tips
|
|
98
|
+
|
|
99
|
+
- Add `print(f"tensor.shape = {tensor.shape}")` at critical points during initial debugging.
|
|
100
|
+
- Use `torch.autograd.set_detect_anomaly(True)` to catch gradient issues.
|
|
101
|
+
- If training is unstable, check learning rate and gradient norms.
|
|
102
|
+
- Remove debugging prints before the final version.
|
|
103
|
+
|
|
104
|
+
## Implementation Report
|
|
105
|
+
|
|
106
|
+
After the quick validation succeeds, write `workspace/ml_res.md`:
|
|
107
|
+
|
|
108
|
+
```markdown
|
|
109
|
+
# Implementation Report
|
|
110
|
+
|
|
111
|
+
## Components Implemented
|
|
112
|
+
- [List each module with brief description]
|
|
113
|
+
|
|
114
|
+
## Quick Validation Results
|
|
115
|
+
- Epochs: 2
|
|
116
|
+
- Final training loss: [value]
|
|
117
|
+
- Validation metrics: [values]
|
|
118
|
+
- Runtime: [time]
|
|
119
|
+
- GPU memory: [peak usage]
|
|
120
|
+
|
|
121
|
+
## Deviations from Plan
|
|
122
|
+
- [Any changes made and why]
|
|
123
|
+
|
|
124
|
+
## Known Issues
|
|
125
|
+
- [Any issues encountered]
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Rules
|
|
129
|
+
|
|
130
|
+
1. Never import from `workspace/repos/` — adapt and rewrite instead.
|
|
131
|
+
2. Never use toy/synthetic data — use real datasets from the plan.
|
|
132
|
+
3. Never skip plan components — implement everything or flag the issue.
|
|
133
|
+
4. Always validate with 2 epochs before declaring success.
|
|
134
|
+
5. Always write `requirements.txt` with pinned versions.
|
|
135
|
+
6. If you cannot resolve an issue after 3 attempts, document the problem and ask the user.
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Implementation Planning Guide
|
|
2
|
+
|
|
3
|
+
You are creating a detailed, actionable implementation plan. This plan must be specific enough that the implementation step can follow it without ambiguity.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
Before planning, you must have:
|
|
8
|
+
|
|
9
|
+
- `workspace/task.json` — the research idea
|
|
10
|
+
- `workspace/survey_res.md` — the literature survey with theory-to-code mappings
|
|
11
|
+
- `workspace/prepare_res.md` — selected reference repositories
|
|
12
|
+
|
|
13
|
+
Read ALL of these files thoroughly before writing the plan. Also browse the reference codebases in `workspace/repos/` to understand their structure and reusable components.
|
|
14
|
+
|
|
15
|
+
## Plan Structure
|
|
16
|
+
|
|
17
|
+
The plan has four mandatory sections. Write all four to `workspace/plan_res.md`.
|
|
18
|
+
|
|
19
|
+
### 1. Dataset Plan
|
|
20
|
+
|
|
21
|
+
```markdown
|
|
22
|
+
## Dataset Plan
|
|
23
|
+
|
|
24
|
+
### Data Source
|
|
25
|
+
- Dataset name and where to obtain it
|
|
26
|
+
- Size and format
|
|
27
|
+
- Any preprocessing requirements
|
|
28
|
+
|
|
29
|
+
### Data Loading Pipeline
|
|
30
|
+
1. **Read**: How to load raw data (file format, library to use)
|
|
31
|
+
2. **Preprocess**: Transformations, tokenization, normalization, feature extraction
|
|
32
|
+
3. **DataLoader**: Batch construction, sampling strategy, collate function
|
|
33
|
+
|
|
34
|
+
### Data Splits
|
|
35
|
+
- Train/validation/test split ratios
|
|
36
|
+
- Any special handling (e.g., cold-start users, temporal splits)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Refer to the reference codebases for data loading patterns. Cite specific files: "See `repos/xyz/data/loader.py` for the graph construction approach."
|
|
40
|
+
|
|
41
|
+
### 2. Model Plan
|
|
42
|
+
|
|
43
|
+
```markdown
|
|
44
|
+
## Model Plan
|
|
45
|
+
|
|
46
|
+
### Architecture Overview
|
|
47
|
+
[High-level description of the model architecture]
|
|
48
|
+
|
|
49
|
+
### Components (one per atomic definition)
|
|
50
|
+
|
|
51
|
+
#### [Atomic Definition 1]
|
|
52
|
+
- **Math**: $formula$
|
|
53
|
+
- **Implementation**: Class name, input/output shapes, key methods
|
|
54
|
+
- **Reference**: repos/xyz/model/attention.py, class MultiHeadAttention
|
|
55
|
+
- **Adaptation notes**: [What to change from the reference]
|
|
56
|
+
|
|
57
|
+
#### [Atomic Definition 2]
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
### Forward Pass
|
|
61
|
+
[Step-by-step description of the forward pass, connecting all components]
|
|
62
|
+
|
|
63
|
+
### Parameter Count Estimate
|
|
64
|
+
[Rough estimate to sanity-check the architecture]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Every atomic definition from `survey_res.md` must appear as a component. If a definition doesn't map to a model component, explain why.
|
|
68
|
+
|
|
69
|
+
### 3. Training Plan
|
|
70
|
+
|
|
71
|
+
```markdown
|
|
72
|
+
## Training Plan
|
|
73
|
+
|
|
74
|
+
### Loss Function
|
|
75
|
+
- Formula: $L = ...$
|
|
76
|
+
- Components: [list each loss term and its purpose]
|
|
77
|
+
- Reference: repos/xyz/training/loss.py
|
|
78
|
+
|
|
79
|
+
### Optimizer
|
|
80
|
+
- Algorithm: [Adam, AdamW, SGD, etc.]
|
|
81
|
+
- Learning rate: [value] with [schedule: cosine, step, warmup, etc.]
|
|
82
|
+
- Weight decay: [value]
|
|
83
|
+
|
|
84
|
+
### Hyperparameters
|
|
85
|
+
| Parameter | Value | Rationale |
|
|
86
|
+
|-----------|-------|-----------|
|
|
87
|
+
| Batch size | ... | ... |
|
|
88
|
+
| Hidden dim | ... | ... |
|
|
89
|
+
| Num layers | ... | ... |
|
|
90
|
+
| Dropout | ... | ... |
|
|
91
|
+
|
|
92
|
+
### Training Loop
|
|
93
|
+
1. Forward pass
|
|
94
|
+
2. Compute loss
|
|
95
|
+
3. Backward pass
|
|
96
|
+
4. Gradient clipping (if applicable)
|
|
97
|
+
5. Optimizer step
|
|
98
|
+
6. Logging (every N steps)
|
|
99
|
+
7. Validation (every M epochs)
|
|
100
|
+
|
|
101
|
+
### Quick Validation
|
|
102
|
+
- Epochs: 2 (for initial validation)
|
|
103
|
+
- Expected behavior: loss should decrease, no NaN/Inf
|
|
104
|
+
|
|
105
|
+
### Full Training
|
|
106
|
+
- Epochs: [value from reference papers]
|
|
107
|
+
- Early stopping: [criteria]
|
|
108
|
+
- Checkpoint: save best model by validation metric
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 4. Testing Plan
|
|
112
|
+
|
|
113
|
+
```markdown
|
|
114
|
+
## Testing Plan
|
|
115
|
+
|
|
116
|
+
### Metrics
|
|
117
|
+
- Primary: [e.g., NDCG@10, BLEU, F1]
|
|
118
|
+
- Secondary: [e.g., Recall@20, Hit Rate]
|
|
119
|
+
- Reference: repos/xyz/evaluation/metrics.py
|
|
120
|
+
|
|
121
|
+
### Evaluation Protocol
|
|
122
|
+
1. Load best checkpoint
|
|
123
|
+
2. Run inference on test set
|
|
124
|
+
3. Compute metrics
|
|
125
|
+
4. Compare against baselines (from papers)
|
|
126
|
+
|
|
127
|
+
### Baselines
|
|
128
|
+
| Method | Metric | Value | Source |
|
|
129
|
+
|--------|--------|-------|--------|
|
|
130
|
+
| ... | ... | ... | [paper] |
|
|
131
|
+
|
|
132
|
+
### Expected Results
|
|
133
|
+
[Reasonable range for the proposed method based on paper claims]
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Quality Criteria
|
|
137
|
+
|
|
138
|
+
- Every section must reference specific files from `workspace/repos/` where applicable.
|
|
139
|
+
- Hyperparameter values should come from reference papers or standard practice, not guesses.
|
|
140
|
+
- The plan must be implementable end-to-end without additional research.
|
|
141
|
+
- If any information is missing (e.g., dataset not publicly available), flag it explicitly.
|
|
142
|
+
- Do not over-engineer: plan what's needed for a solid implementation, not a production system.
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Code Review Guide
|
|
2
|
+
|
|
3
|
+
You are reviewing the implementation in `workspace/project/` to verify it correctly implements the research idea. This is a quality gate before full training.
|
|
4
|
+
|
|
5
|
+
## Review Process
|
|
6
|
+
|
|
7
|
+
### Phase 1: Verify Against Survey
|
|
8
|
+
|
|
9
|
+
Read `workspace/survey_res.md` and extract the list of atomic definitions. For each atomic definition:
|
|
10
|
+
|
|
11
|
+
1. Find the corresponding code in `workspace/project/`.
|
|
12
|
+
2. Compare the code implementation against the mathematical formula.
|
|
13
|
+
3. Check: does the code faithfully implement the math? Watch for:
|
|
14
|
+
- Missing terms in equations.
|
|
15
|
+
- Incorrect tensor operations (e.g., sum vs mean, wrong axis).
|
|
16
|
+
- Hardcoded values where parameters should be used.
|
|
17
|
+
- Simplifications that change the method's behavior.
|
|
18
|
+
|
|
19
|
+
### Phase 2: Verify Against Plan
|
|
20
|
+
|
|
21
|
+
Read `workspace/plan_res.md`. Check each section:
|
|
22
|
+
|
|
23
|
+
**Dataset Plan:**
|
|
24
|
+
- Is the correct dataset used (not a substitute)?
|
|
25
|
+
- Does the preprocessing match the plan?
|
|
26
|
+
- Is the DataLoader configured correctly (batch size, sampling)?
|
|
27
|
+
|
|
28
|
+
**Model Plan:**
|
|
29
|
+
- Are all components present?
|
|
30
|
+
- Does the forward pass match the described architecture?
|
|
31
|
+
- Are parameter counts reasonable?
|
|
32
|
+
|
|
33
|
+
**Training Plan:**
|
|
34
|
+
- Is the loss function correct (all terms present, correct weighting)?
|
|
35
|
+
- Is the optimizer configured as planned?
|
|
36
|
+
- Are hyperparameters matching?
|
|
37
|
+
|
|
38
|
+
**Testing Plan:**
|
|
39
|
+
- Are the correct metrics implemented?
|
|
40
|
+
- Is the evaluation protocol correct?
|
|
41
|
+
|
|
42
|
+
### Phase 3: Code Quality
|
|
43
|
+
|
|
44
|
+
Check for implementation quality issues:
|
|
45
|
+
|
|
46
|
+
- **Not a toy**: The implementation should be substantive, not a simplified stub.
|
|
47
|
+
- **Correctness**: No obvious bugs (wrong indices, missing gradients, data leakage).
|
|
48
|
+
- **Completeness**: All imports resolved, all functions implemented (no `pass` or `TODO`).
|
|
49
|
+
- **Runnability**: The code should run end-to-end without errors.
|
|
50
|
+
|
|
51
|
+
### Phase 4: Cross-Reference with Codebases
|
|
52
|
+
|
|
53
|
+
If needed, compare against reference codebases in `workspace/repos/`:
|
|
54
|
+
|
|
55
|
+
- Are key algorithmic patterns correctly adapted?
|
|
56
|
+
- Were critical implementation details preserved during adaptation?
|
|
57
|
+
|
|
58
|
+
## Review Output
|
|
59
|
+
|
|
60
|
+
Write the review to `workspace/iterations/judge_vN.md` (increment N for each review iteration):
|
|
61
|
+
|
|
62
|
+
```markdown
|
|
63
|
+
# Review vN
|
|
64
|
+
|
|
65
|
+
## Verdict: PASS / NEEDS_REVISION
|
|
66
|
+
|
|
67
|
+
## Atomic Definition Checklist
|
|
68
|
+
|
|
69
|
+
| Definition | Implemented | Correct | Notes |
|
|
70
|
+
|-----------|-------------|---------|-------|
|
|
71
|
+
| [def 1] | Yes/No | Yes/No | [details] |
|
|
72
|
+
| [def 2] | Yes/No | Yes/No | [details] |
|
|
73
|
+
| ... | ... | ... | ... |
|
|
74
|
+
|
|
75
|
+
## Plan Compliance
|
|
76
|
+
|
|
77
|
+
| Section | Status | Notes |
|
|
78
|
+
|---------|--------|-------|
|
|
79
|
+
| Dataset | OK / Issue | ... |
|
|
80
|
+
| Model | OK / Issue | ... |
|
|
81
|
+
| Training | OK / Issue | ... |
|
|
82
|
+
| Testing | OK / Issue | ... |
|
|
83
|
+
|
|
84
|
+
## Issues (if NEEDS_REVISION)
|
|
85
|
+
|
|
86
|
+
### Issue 1: [Title]
|
|
87
|
+
- **Location**: `project/model/attention.py`, line ~42
|
|
88
|
+
- **Problem**: [Description of what's wrong]
|
|
89
|
+
- **Expected**: [What the correct implementation should do]
|
|
90
|
+
- **Suggestion**: [Specific fix]
|
|
91
|
+
|
|
92
|
+
### Issue 2: [Title]
|
|
93
|
+
...
|
|
94
|
+
|
|
95
|
+
## Summary
|
|
96
|
+
[Brief overall assessment: what's good, what needs work]
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Verdict Criteria
|
|
100
|
+
|
|
101
|
+
**PASS** if:
|
|
102
|
+
- All atomic definitions are implemented and correct.
|
|
103
|
+
- All plan sections are satisfied.
|
|
104
|
+
- Code runs end-to-end with decreasing loss.
|
|
105
|
+
- No critical bugs.
|
|
106
|
+
|
|
107
|
+
**NEEDS_REVISION** if:
|
|
108
|
+
- Any atomic definition is missing or incorrectly implemented.
|
|
109
|
+
- Any plan section has significant gaps.
|
|
110
|
+
- Code has bugs that prevent correct execution.
|
|
111
|
+
- Implementation is a toy/stub rather than a genuine attempt.
|
|
112
|
+
|
|
113
|
+
## Iteration Rules
|
|
114
|
+
|
|
115
|
+
- Each review is independent: re-evaluate everything, not just previously flagged issues.
|
|
116
|
+
- Be specific in suggestions: cite file names, line numbers, and concrete fixes.
|
|
117
|
+
- After 3 iterations of NEEDS_REVISION, escalate to the user with a summary of remaining issues.
|
|
118
|
+
- Never approve code that doesn't run or produces NaN/Inf.
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Literature Survey Guide
|
|
2
|
+
|
|
3
|
+
You are performing a literature survey to bridge theory and implementation. Your goal is to extract actionable knowledge from papers and codebases that will directly inform the implementation.
|
|
4
|
+
|
|
5
|
+
## Process
|
|
6
|
+
|
|
7
|
+
### Phase 1: Decompose the Idea
|
|
8
|
+
|
|
9
|
+
Before reading any papers, break the research idea (from `task.json`) into **atomic academic definitions**. Each atomic definition must be:
|
|
10
|
+
|
|
11
|
+
- A single, self-contained concept (e.g., "multi-head attention", "contrastive loss", "graph convolution").
|
|
12
|
+
- Have clear mathematical foundations.
|
|
13
|
+
- Be implementable as a code module.
|
|
14
|
+
- Be traceable to specific papers.
|
|
15
|
+
|
|
16
|
+
Write down your list of atomic definitions before proceeding. This ensures systematic coverage.
|
|
17
|
+
|
|
18
|
+
### Phase 2: Paper Reading (per paper)
|
|
19
|
+
|
|
20
|
+
For each paper in `workspace/papers/`:
|
|
21
|
+
|
|
22
|
+
1. **Skim first**: Read title, abstract, introduction, and conclusion to understand the paper's scope.
|
|
23
|
+
2. **Targeted reading**: For each atomic definition relevant to this paper, find:
|
|
24
|
+
- The formal definition (usually in a "Method" or "Approach" section).
|
|
25
|
+
- Mathematical formulas (equations, loss functions, update rules).
|
|
26
|
+
- Key theoretical claims or properties.
|
|
27
|
+
3. **Search strategically**: Use keyword search within the .tex file. Look for `\begin{equation}`, `\mathcal`, `\text{loss}`, etc.
|
|
28
|
+
|
|
29
|
+
### Phase 3: Code Reading (per repo)
|
|
30
|
+
|
|
31
|
+
For each reference codebase in `workspace/repos/`:
|
|
32
|
+
|
|
33
|
+
1. **Understand structure**: List the directory tree first.
|
|
34
|
+
2. **Find implementations**: Map each mathematical formula to its code implementation:
|
|
35
|
+
- Model architecture classes → model definition formulas
|
|
36
|
+
- Loss function implementations → loss formulas
|
|
37
|
+
- Data processing pipelines → input/output specifications
|
|
38
|
+
3. **Document the mapping**: For each formula, note the exact file, class, and function that implements it.
|
|
39
|
+
|
|
40
|
+
### Phase 4: Write Notes
|
|
41
|
+
|
|
42
|
+
For each paper, create `workspace/notes/paper_NNN.md` with this structure:
|
|
43
|
+
|
|
44
|
+
```markdown
|
|
45
|
+
# [Paper Title]
|
|
46
|
+
ArXiv: [id] | Authors: [first author et al.]
|
|
47
|
+
|
|
48
|
+
## Core Method
|
|
49
|
+
[1-2 paragraph summary of the paper's main contribution]
|
|
50
|
+
|
|
51
|
+
## Atomic Definitions Covered
|
|
52
|
+
[List which atomic definitions from Phase 1 this paper addresses]
|
|
53
|
+
|
|
54
|
+
## Math Formulas
|
|
55
|
+
|
|
56
|
+
### [Definition Name 1]
|
|
57
|
+
$$formula$$
|
|
58
|
+
- Variables: [explain each variable]
|
|
59
|
+
- Context: [when/where this is applied]
|
|
60
|
+
|
|
61
|
+
### [Definition Name 2]
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
## Code Implementation
|
|
65
|
+
|
|
66
|
+
### [Definition Name 1]
|
|
67
|
+
- **Repo**: repos/[name]
|
|
68
|
+
- **File**: path/to/file.py, class ClassName, method method_name
|
|
69
|
+
- **Key logic**:
|
|
70
|
+
```python
|
|
71
|
+
# Excerpt of the most relevant 10-30 lines
|
|
72
|
+
```
|
|
73
|
+
- **Notes**: [any adaptations, simplifications, or deviations from the paper]
|
|
74
|
+
|
|
75
|
+
## Key Insights
|
|
76
|
+
- [Insight 1: anything surprising or important for implementation]
|
|
77
|
+
- [Insight 2: ...]
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Phase 5: Synthesize
|
|
81
|
+
|
|
82
|
+
After all papers are surveyed, write `workspace/survey_res.md`:
|
|
83
|
+
|
|
84
|
+
```markdown
|
|
85
|
+
# Literature Survey: [Research Idea]
|
|
86
|
+
|
|
87
|
+
## Atomic Definitions
|
|
88
|
+
[Complete list with brief descriptions]
|
|
89
|
+
|
|
90
|
+
## Theory-to-Code Mapping
|
|
91
|
+
[For each atomic definition: the formula, which papers define it, which repos implement it, and the recommended implementation approach]
|
|
92
|
+
|
|
93
|
+
## Implementation Recommendations
|
|
94
|
+
[Which reference implementations to adapt, potential pitfalls, suggested architecture]
|
|
95
|
+
|
|
96
|
+
## Open Questions
|
|
97
|
+
[Anything unclear that may need user input]
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Quality Criteria
|
|
101
|
+
|
|
102
|
+
- Every atomic definition must appear in at least one paper note.
|
|
103
|
+
- Every formula must have a corresponding code reference (or be flagged as "no reference found").
|
|
104
|
+
- Do not skip papers. If a paper is not relevant, note why and move on.
|
|
105
|
+
- Err on the side of extracting more detail rather than less. The implementation step depends on this survey.
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Workspace Directory Specification
|
|
2
|
+
|
|
3
|
+
All research pipeline artifacts live in a `workspace/` directory. The location is either specified by the user or defaults to the current working directory plus `workspace/`.
|
|
4
|
+
|
|
5
|
+
## Directory Layout
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
workspace/
|
|
9
|
+
task.json # Input: research task definition
|
|
10
|
+
search_results.md # Step 2: arxiv + github search results
|
|
11
|
+
prepare_res.md # Step 3: selected repos and rationale
|
|
12
|
+
survey_res.md # Step 5: synthesized literature survey
|
|
13
|
+
plan_res.md # Step 6: four-part implementation plan
|
|
14
|
+
ml_res.md # Step 7: implementation report
|
|
15
|
+
experiment_res.md # Step 10: full training results
|
|
16
|
+
|
|
17
|
+
repos/ # Step 3: cloned reference repositories
|
|
18
|
+
repo-name-1/
|
|
19
|
+
repo-name-2/
|
|
20
|
+
|
|
21
|
+
papers/ # Step 4: downloaded paper sources
|
|
22
|
+
2401.12345.tex
|
|
23
|
+
2401.67890.tex
|
|
24
|
+
|
|
25
|
+
notes/ # Step 5: per-paper survey notes
|
|
26
|
+
paper_001.md
|
|
27
|
+
paper_002.md
|
|
28
|
+
|
|
29
|
+
iterations/ # Steps 8-9: review history
|
|
30
|
+
judge_v1.md
|
|
31
|
+
judge_v2.md
|
|
32
|
+
|
|
33
|
+
project/ # Step 7: implementation code
|
|
34
|
+
model/
|
|
35
|
+
data/
|
|
36
|
+
training/
|
|
37
|
+
testing/
|
|
38
|
+
utils/
|
|
39
|
+
run.py
|
|
40
|
+
requirements.txt
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Conventions
|
|
44
|
+
|
|
45
|
+
### File Existence = Step Completion
|
|
46
|
+
|
|
47
|
+
The research pipeline uses file existence as the checkpoint mechanism. Before executing any step, check whether its output file already exists. If it does, skip the step.
|
|
48
|
+
|
|
49
|
+
This enables:
|
|
50
|
+
- **Crash recovery**: resume from the last completed step.
|
|
51
|
+
- **Incremental progress**: re-running the pipeline skips completed work.
|
|
52
|
+
- **Transparency**: a human can inspect progress by listing the directory.
|
|
53
|
+
|
|
54
|
+
### Naming Rules
|
|
55
|
+
|
|
56
|
+
- Markdown files (`.md`) for human-readable outputs.
|
|
57
|
+
- JSON files (`.json`) for structured data (task definition).
|
|
58
|
+
- Paper notes use sequential numbering: `paper_001.md`, `paper_002.md`.
|
|
59
|
+
- Review iterations use version numbering: `judge_v1.md`, `judge_v2.md`.
|
|
60
|
+
|
|
61
|
+
### Immutability
|
|
62
|
+
|
|
63
|
+
Once a step's output is written, do NOT modify it unless the user explicitly asks. If a step needs to be re-done, delete the output file first, then re-execute.
|
|
64
|
+
|
|
65
|
+
Exception: `workspace/project/` is mutable during the implement-review-iterate loop (Steps 7-9).
|
|
66
|
+
|
|
67
|
+
### task.json Schema
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"idea": "A 1-3 sentence description of the research idea",
|
|
72
|
+
"references": ["2401.12345", "paper title string"],
|
|
73
|
+
"domain": "recommendation systems",
|
|
74
|
+
"date_limit": "2024-01-01"
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
- `idea` (required): The core research idea to implement.
|
|
79
|
+
- `references` (optional): ArXiv IDs or paper titles as starting points.
|
|
80
|
+
- `domain` (optional): Research domain for focused searching.
|
|
81
|
+
- `date_limit` (optional): Only consider papers published after this date.
|