oh-my-openidea 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +130 -0
  3. package/dist/agents/architect.d.ts +2 -0
  4. package/dist/agents/critic.d.ts +2 -0
  5. package/dist/agents/designer.d.ts +2 -0
  6. package/dist/agents/explorer.d.ts +2 -0
  7. package/dist/agents/fixer.d.ts +2 -0
  8. package/dist/agents/index.d.ts +22 -0
  9. package/dist/agents/librarian.d.ts +2 -0
  10. package/dist/agents/oracle.d.ts +2 -0
  11. package/dist/agents/orchestrator.d.ts +15 -0
  12. package/dist/agents/surveyor.d.ts +2 -0
  13. package/dist/agents/synthesizer.d.ts +2 -0
  14. package/dist/agents/writer.d.ts +2 -0
  15. package/dist/background/background-manager.d.ts +175 -0
  16. package/dist/background/index.d.ts +2 -0
  17. package/dist/background/tmux-session-manager.d.ts +63 -0
  18. package/dist/cli/chutes-selection.d.ts +3 -0
  19. package/dist/cli/config-io.d.ts +26 -0
  20. package/dist/cli/config-manager.d.ts +12 -0
  21. package/dist/cli/custom-skills.d.ts +29 -0
  22. package/dist/cli/dynamic-model-selection.d.ts +14 -0
  23. package/dist/cli/external-rankings.d.ts +8 -0
  24. package/dist/cli/index.d.ts +2 -0
  25. package/dist/cli/index.js +17102 -0
  26. package/dist/cli/install.d.ts +2 -0
  27. package/dist/cli/model-key-normalization.d.ts +1 -0
  28. package/dist/cli/model-selection.d.ts +30 -0
  29. package/dist/cli/opencode-models.d.ts +18 -0
  30. package/dist/cli/opencode-selection.d.ts +3 -0
  31. package/dist/cli/paths.d.ts +9 -0
  32. package/dist/cli/precedence-resolver.d.ts +16 -0
  33. package/dist/cli/providers.d.ts +204 -0
  34. package/dist/cli/research.d.ts +2 -0
  35. package/dist/cli/scoring-v2/engine.d.ts +4 -0
  36. package/dist/cli/scoring-v2/features.d.ts +3 -0
  37. package/dist/cli/scoring-v2/index.d.ts +4 -0
  38. package/dist/cli/scoring-v2/types.d.ts +17 -0
  39. package/dist/cli/scoring-v2/weights.d.ts +2 -0
  40. package/dist/cli/skills.d.ts +52 -0
  41. package/dist/cli/system.d.ts +6 -0
  42. package/dist/cli/types.d.ts +140 -0
  43. package/dist/config/agent-mcps.d.ts +15 -0
  44. package/dist/config/constants.d.ts +14 -0
  45. package/dist/config/index.d.ts +4 -0
  46. package/dist/config/loader.d.ts +30 -0
  47. package/dist/config/schema.d.ts +218 -0
  48. package/dist/config/utils.d.ts +10 -0
  49. package/dist/hooks/auto-update-checker/cache.d.ts +6 -0
  50. package/dist/hooks/auto-update-checker/checker.d.ts +28 -0
  51. package/dist/hooks/auto-update-checker/constants.d.ts +11 -0
  52. package/dist/hooks/auto-update-checker/index.d.ts +17 -0
  53. package/dist/hooks/auto-update-checker/types.d.ts +23 -0
  54. package/dist/hooks/delegate-task-retry/guidance.d.ts +2 -0
  55. package/dist/hooks/delegate-task-retry/hook.d.ts +8 -0
  56. package/dist/hooks/delegate-task-retry/index.d.ts +4 -0
  57. package/dist/hooks/delegate-task-retry/patterns.d.ts +11 -0
  58. package/dist/hooks/idea-quality-gate/index.d.ts +22 -0
  59. package/dist/hooks/index.d.ts +7 -0
  60. package/dist/hooks/json-error-recovery/hook.d.ts +18 -0
  61. package/dist/hooks/json-error-recovery/index.d.ts +1 -0
  62. package/dist/hooks/phase-reminder/index.d.ts +25 -0
  63. package/dist/hooks/post-read-nudge/index.d.ts +18 -0
  64. package/dist/index.d.ts +5 -0
  65. package/dist/index.js +29330 -0
  66. package/dist/mcp/arxiv.d.ts +10 -0
  67. package/dist/mcp/context7.d.ts +6 -0
  68. package/dist/mcp/google-scholar.d.ts +10 -0
  69. package/dist/mcp/grep-app.d.ts +6 -0
  70. package/dist/mcp/index.d.ts +6 -0
  71. package/dist/mcp/semantic-scholar.d.ts +12 -0
  72. package/dist/mcp/types.d.ts +12 -0
  73. package/dist/mcp/websearch.d.ts +6 -0
  74. package/dist/research/connectors.d.ts +6 -0
  75. package/dist/research/defaults.d.ts +2 -0
  76. package/dist/research/index.d.ts +4 -0
  77. package/dist/research/pipeline.d.ts +9 -0
  78. package/dist/research/store.d.ts +31 -0
  79. package/dist/research/text.d.ts +8 -0
  80. package/dist/research/types.d.ts +194 -0
  81. package/dist/tools/arxiv/index.d.ts +7 -0
  82. package/dist/tools/ast-grep/cli.d.ts +15 -0
  83. package/dist/tools/ast-grep/constants.d.ts +25 -0
  84. package/dist/tools/ast-grep/downloader.d.ts +5 -0
  85. package/dist/tools/ast-grep/index.d.ts +10 -0
  86. package/dist/tools/ast-grep/tools.d.ts +3 -0
  87. package/dist/tools/ast-grep/types.d.ts +30 -0
  88. package/dist/tools/ast-grep/utils.d.ts +4 -0
  89. package/dist/tools/background.d.ts +13 -0
  90. package/dist/tools/google-scholar/index.d.ts +8 -0
  91. package/dist/tools/grep/cli.d.ts +3 -0
  92. package/dist/tools/grep/constants.d.ts +18 -0
  93. package/dist/tools/grep/downloader.d.ts +3 -0
  94. package/dist/tools/grep/index.d.ts +5 -0
  95. package/dist/tools/grep/tools.d.ts +2 -0
  96. package/dist/tools/grep/types.d.ts +35 -0
  97. package/dist/tools/grep/utils.d.ts +2 -0
  98. package/dist/tools/idea-store/index.d.ts +7 -0
  99. package/dist/tools/index.d.ts +6 -0
  100. package/dist/tools/lsp/client.d.ts +42 -0
  101. package/dist/tools/lsp/config.d.ts +4 -0
  102. package/dist/tools/lsp/constants.d.ts +8 -0
  103. package/dist/tools/lsp/index.d.ts +3 -0
  104. package/dist/tools/lsp/tools.d.ts +5 -0
  105. package/dist/tools/lsp/types.d.ts +28 -0
  106. package/dist/tools/lsp/utils.d.ts +21 -0
  107. package/dist/tools/paper-reader/index.d.ts +8 -0
  108. package/dist/tools/research.d.ts +3 -0
  109. package/dist/tools/semantic-scholar/index.d.ts +12 -0
  110. package/dist/utils/agent-variant.d.ts +47 -0
  111. package/dist/utils/env.d.ts +1 -0
  112. package/dist/utils/index.d.ts +6 -0
  113. package/dist/utils/logger.d.ts +1 -0
  114. package/dist/utils/polling.d.ts +21 -0
  115. package/dist/utils/tmux.d.ts +32 -0
  116. package/dist/utils/zip-extractor.d.ts +1 -0
  117. package/package.json +66 -0
  118. package/src/skills/experiment-design/SKILL.md +153 -0
  119. package/src/skills/hypothesis-generation/SKILL.md +102 -0
  120. package/src/skills/idea-critique/SKILL.md +129 -0
  121. package/src/skills/literature-review/SKILL.md +95 -0
  122. package/src/skills/paper-outline/SKILL.md +137 -0
@@ -0,0 +1,28 @@
1
+ import type { CreateFile, DeleteFile, Diagnostic, DocumentSymbol, Location, LocationLink, Position, Range, RenameFile, SymbolInformation as SymbolInfo, TextDocumentEdit, TextDocumentIdentifier, TextEdit, VersionedTextDocumentIdentifier, WorkspaceEdit } from 'vscode-languageserver-protocol';
2
+ export interface LSPServerConfig {
3
+ id: string;
4
+ command: string[];
5
+ extensions: string[];
6
+ disabled?: boolean;
7
+ env?: Record<string, string>;
8
+ initialization?: Record<string, unknown>;
9
+ }
10
+ export interface ResolvedServer {
11
+ id: string;
12
+ command: string[];
13
+ extensions: string[];
14
+ env?: Record<string, string>;
15
+ initialization?: Record<string, unknown>;
16
+ }
17
+ export type ServerLookupResult = {
18
+ status: 'found';
19
+ server: ResolvedServer;
20
+ } | {
21
+ status: 'not_configured';
22
+ extension: string;
23
+ } | {
24
+ status: 'not_installed';
25
+ server: ResolvedServer;
26
+ installHint: string;
27
+ };
28
+ export type { Position, Range, Location, LocationLink, Diagnostic, TextDocumentIdentifier, VersionedTextDocumentIdentifier, TextEdit, TextDocumentEdit, CreateFile, RenameFile, DeleteFile, WorkspaceEdit, SymbolInfo, DocumentSymbol, };
@@ -0,0 +1,21 @@
1
+ import type { LSPClient } from './client';
2
+ import type { Diagnostic, Location, LocationLink, ServerLookupResult, WorkspaceEdit } from './types';
3
+ export declare function findWorkspaceRoot(filePath: string): string;
4
+ export declare function uriToPath(uri: string): string;
5
+ export declare function formatServerLookupError(result: Exclude<ServerLookupResult, {
6
+ status: 'found';
7
+ }>): string;
8
+ export declare function withLspClient<T>(filePath: string, fn: (client: LSPClient) => Promise<T>): Promise<T>;
9
+ export declare function formatLocation(loc: Location | LocationLink): string;
10
+ export declare function formatSymbolKind(kind: number): string;
11
+ export declare function formatSeverity(severity: number | undefined): string;
12
+ export declare function formatDiagnostic(diag: Diagnostic): string;
13
+ export declare function filterDiagnosticsBySeverity(diagnostics: Diagnostic[], severityFilter?: 'error' | 'warning' | 'information' | 'hint' | 'all'): Diagnostic[];
14
+ export interface ApplyResult {
15
+ success: boolean;
16
+ filesModified: string[];
17
+ totalEdits: number;
18
+ errors: string[];
19
+ }
20
+ export declare function applyWorkspaceEdit(edit: WorkspaceEdit | null): ApplyResult;
21
+ export declare function formatApplyResult(result: ApplyResult): string;
@@ -0,0 +1,8 @@
1
+ import { type ToolDefinition } from '@opencode-ai/plugin';
2
+ /**
3
+ * Retrieve paper content from an arXiv ID or arXiv abstract URL.
4
+ * Uses arXiv's HTML rendering endpoint (available for most papers since ~2023)
5
+ * to extract structured text without requiring a PDF parser.
6
+ * Falls back to fetching the abstract page for older papers.
7
+ */
8
+ export declare const paper_reader: ToolDefinition;
@@ -0,0 +1,3 @@
1
+ import { type ToolDefinition } from '@opencode-ai/plugin';
2
+ import { type ResearchConfig } from '../research';
3
+ export declare function createResearchTools(directory: string, researchConfig?: Partial<ResearchConfig>): Record<string, ToolDefinition>;
@@ -0,0 +1,12 @@
1
+ import { type ToolDefinition } from '@opencode-ai/plugin';
2
+ /**
3
+ * Search Semantic Scholar for academic papers.
4
+ * Returns papers with citation counts, influential citation flags, and AI-generated TLDRs.
5
+ * Free API — no key required for basic access (rate limit: 100 req/5min).
6
+ * Set SEMANTIC_SCHOLAR_API_KEY env var for higher limits.
7
+ */
8
+ export declare const semantic_scholar_search: ToolDefinition;
9
+ /**
10
+ * Retrieve citation and reference graph for a paper via Semantic Scholar.
11
+ */
12
+ export declare const citation_graph: ToolDefinition;
@@ -0,0 +1,47 @@
1
+ import type { PluginConfig } from '../config';
2
+ /**
3
+ * Normalizes an agent name by trimming whitespace and removing the optional @ prefix.
4
+ *
5
+ * @param agentName - The agent name to normalize (e.g., "@oracle" or "oracle")
6
+ * @returns The normalized agent name without @ prefix and trimmed of whitespace
7
+ *
8
+ * @example
9
+ * normalizeAgentName("@oracle") // returns "oracle"
10
+ * normalizeAgentName(" explore ") // returns "explore"
11
+ */
12
+ export declare function normalizeAgentName(agentName: string): string;
13
+ /**
14
+ * Resolves the variant configuration for a specific agent.
15
+ *
16
+ * Looks up the agent's variant in the plugin configuration. Returns undefined if:
17
+ * - No config is provided
18
+ * - The agent has no variant configured
19
+ * - The variant is not a string
20
+ * - The variant is empty or whitespace-only
21
+ *
22
+ * @param config - The plugin configuration object
23
+ * @param agentName - The name of the agent (with or without @ prefix)
24
+ * @returns The trimmed variant string, or undefined if no valid variant is found
25
+ *
26
+ * @example
27
+ * resolveAgentVariant(config, "@oracle") // returns "high" if configured
28
+ */
29
+ export declare function resolveAgentVariant(config: PluginConfig | undefined, agentName: string): string | undefined;
30
+ /**
31
+ * Applies a variant to a request body if the body doesn't already have one.
32
+ *
33
+ * This function will NOT override an existing variant in the body. If no variant
34
+ * is provided or the body already has a variant, the original body is returned.
35
+ *
36
+ * @template T - The type of the body object, must have an optional variant property
37
+ * @param variant - The variant string to apply (or undefined)
38
+ * @param body - The request body object
39
+ * @returns The body with the variant applied (new object) or the original body unchanged
40
+ *
41
+ * @example
42
+ * applyAgentVariant("high", { agent: "oracle" }) // returns { agent: "oracle", variant: "high" }
43
+ * applyAgentVariant("high", { agent: "oracle", variant: "low" }) // returns original body with variant: "low"
44
+ */
45
+ export declare function applyAgentVariant<T extends {
46
+ variant?: string;
47
+ }>(variant: string | undefined, body: T): T;
@@ -0,0 +1 @@
1
+ export declare function getEnv(name: string): string | undefined;
@@ -0,0 +1,6 @@
1
+ export * from './agent-variant';
2
+ export * from './env';
3
+ export { log } from './logger';
4
+ export * from './polling';
5
+ export * from './tmux';
6
+ export { extractZip } from './zip-extractor';
@@ -0,0 +1 @@
1
+ export declare function log(message: string, data?: unknown): void;
@@ -0,0 +1,21 @@
1
+ export interface PollOptions {
2
+ pollInterval?: number;
3
+ maxPollTime?: number;
4
+ stableThreshold?: number;
5
+ signal?: AbortSignal;
6
+ }
7
+ export interface PollResult<T> {
8
+ success: boolean;
9
+ data?: T;
10
+ timedOut?: boolean;
11
+ aborted?: boolean;
12
+ }
13
+ /**
14
+ * Generic polling utility that waits for a condition to be met.
15
+ * Returns when the condition is satisfied or timeout/abort occurs.
16
+ */
17
+ export declare function pollUntilStable<T>(fetchFn: () => Promise<T>, isStable: (current: T, previous: T | null, stableCount: number) => boolean, opts?: PollOptions): Promise<PollResult<T>>;
18
+ /**
19
+ * Simple delay utility
20
+ */
21
+ export declare function delay(ms: number): Promise<void>;
@@ -0,0 +1,32 @@
1
+ import type { TmuxConfig } from '../config/schema';
2
+ /**
3
+ * Reset the server availability cache (useful when server might have started)
4
+ */
5
+ export declare function resetServerCheck(): void;
6
+ /**
7
+ * Get cached tmux path, initializing if needed
8
+ */
9
+ export declare function getTmuxPath(): Promise<string | null>;
10
+ /**
11
+ * Check if we're running inside tmux
12
+ */
13
+ export declare function isInsideTmux(): boolean;
14
+ export interface SpawnPaneResult {
15
+ success: boolean;
16
+ paneId?: string;
17
+ }
18
+ /**
19
+ * Spawn a new tmux pane running `opencode attach <serverUrl> --session <sessionId>`
20
+ * This connects the new TUI to the existing server so it receives streaming updates.
21
+ * After spawning, applies the configured layout to auto-rebalance all panes.
22
+ * Returns the pane ID so it can be closed later.
23
+ */
24
+ export declare function spawnTmuxPane(sessionId: string, description: string, config: TmuxConfig, serverUrl: string): Promise<SpawnPaneResult>;
25
+ /**
26
+ * Close a tmux pane by its ID and reapply layout to rebalance remaining panes
27
+ */
28
+ export declare function closeTmuxPane(paneId: string): Promise<boolean>;
29
+ /**
30
+ * Start background check for tmux availability
31
+ */
32
+ export declare function startTmuxCheck(): void;
@@ -0,0 +1 @@
1
+ export declare function extractZip(archivePath: string, destDir: string): Promise<void>;
package/package.json ADDED
@@ -0,0 +1,66 @@
1
+ {
2
+ "name": "oh-my-openidea",
3
+ "version": "0.1.0",
4
+ "description": "Multi-agent framework for CS/ML research idea generation — literature survey, hypothesis generation, novelty checking, methodology design, and paper outlining",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "bin": {
8
+ "oh-my-openidea": "./dist/cli/index.js"
9
+ },
10
+ "type": "module",
11
+ "license": "MIT",
12
+ "keywords": [
13
+ "opencode",
14
+ "opencode-plugin",
15
+ "ai",
16
+ "agents",
17
+ "research",
18
+ "llm",
19
+ "arxiv",
20
+ "semantic-scholar",
21
+ "research-ideas",
22
+ "hypothesis-generation"
23
+ ],
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "https://github.com/xiaozg/oh-my-openidea"
27
+ },
28
+ "bugs": {
29
+ "url": "https://github.com/xiaozg/oh-my-openidea/issues"
30
+ },
31
+ "homepage": "https://github.com/xiaozg/oh-my-openidea#readme",
32
+ "files": [
33
+ "dist",
34
+ "src/skills",
35
+ "README.md",
36
+ "LICENSE"
37
+ ],
38
+ "scripts": {
39
+ "build": "bun build src/index.ts --outdir dist --target bun --format esm && bun build src/cli/index.ts --outdir dist/cli --target bun --format esm && tsc --emitDeclarationOnly",
40
+ "typecheck": "tsc --noEmit",
41
+ "test": "bun test",
42
+ "lint": "biome lint .",
43
+ "format": "biome format . --write",
44
+ "check": "biome check --write .",
45
+ "check:ci": "biome check .",
46
+ "dev": "bun run build && opencode",
47
+ "prepublishOnly": "bun run build",
48
+ "release:patch": "npm version patch && git push --follow-tags && npm publish",
49
+ "release:minor": "npm version minor && git push --follow-tags && npm publish",
50
+ "release:major": "npm version major && git push --follow-tags && npm publish"
51
+ },
52
+ "dependencies": {
53
+ "@modelcontextprotocol/sdk": "^1.26.0",
54
+ "@opencode-ai/plugin": "^1.2.6",
55
+ "@opencode-ai/sdk": "^1.2.6",
56
+ "zod": "^4.3.6"
57
+ },
58
+ "devDependencies": {
59
+ "@biomejs/biome": "2.4.2",
60
+ "bun-types": "1.3.9",
61
+ "typescript": "^5.9.3"
62
+ },
63
+ "trustedDependencies": [
64
+ "@ast-grep/cli"
65
+ ]
66
+ }
@@ -0,0 +1,153 @@
1
+ ```skill
2
+ ---
3
+ name: experiment-design
4
+ description: Design a complete, publication-ready experimental plan for a validated research idea — including method, baselines, datasets, evaluation metrics, ablations, and compute estimate.
5
+ ---
6
+ ```
7
+
8
+ # Experiment Design Skill
9
+
10
+ This skill produces a **concrete, executable experimental plan** for a validated research idea. It is designed to run after `idea-critique` confirms an idea is worth pursuing (Overall ≥ 6/10).
11
+
12
+ ## When to Use
13
+
14
+ Use this skill when:
15
+ - A research idea has passed the critique quality gate
16
+ - You need to specify exactly how to run experiments before writing any code
17
+ - Preparing a research proposal, grant application, or lab presentation
18
+ - Collaborating with team members who need a precise implementation spec
19
+
20
+ ## Prerequisites
21
+
22
+ - Validated research idea (from `idea-critique` skill, status: `validated`)
23
+ - Literature landscape map (to know what baselines exist)
24
+ - Rough sense of compute available
25
+
26
+ ## Workflow
27
+
28
+ ### Step 1: Baseline Identification (Surveyor + Architect)
29
+
30
+ Launch **@surveyor** to find the current state-of-the-art on the target benchmark(s).
31
+ Specifically:
32
+ - Find the top-performing methods from the last 2 years
33
+ - Retrieve their code/checkpoint links
34
+ - Identify what exact evaluation protocol they use
35
+
36
+ Then launch **@architect** with:
37
+ - The validated idea description
38
+ - The baseline landscape from Surveyor
39
+
40
+ ### Step 2: Dataset Selection (Architect)
41
+
42
+ For each candidate dataset, **@architect** evaluates:
43
+ 1. **Relevance**: Does it directly test the core claim?
44
+ 2. **Accessibility**: Is it publicly available? License?
45
+ 3. **Standard**: Is it used in related work (essential for comparison)?
46
+ 4. **Size**: Is it large enough to show statistical significance?
47
+
48
+ Select:
49
+ - 1–2 **primary datasets** (the ones the paper centers on)
50
+ - 1–2 **secondary datasets** (for generalization / ablation)
51
+
52
+ ### Step 3: Metric Definition (Architect)
53
+
54
+ Define evaluation metrics:
55
+ - **Primary metric**: Used for the main table headline (must match related work's primary)
56
+ - **Secondary metrics**: Complementary signals
57
+ - **Statistical considerations**: Significance test, number of seeds, confidence intervals
58
+
59
+ **Anti-patterns to avoid**:
60
+ - Reporting only metrics where the method looks best
61
+ - Using a non-standard metric without justification
62
+ - No statistical significance testing for close results
63
+
64
+ ### Step 4: Ablation Plan (Architect)
65
+
66
+ Design ablations that prove each claimed contribution independently.
67
+
68
+ Rule: **Every claim in the paper must be supported by at least one ablation.**
69
+
70
+ Template:
71
+ ```
72
+ Ablation plan:
73
+ 1. Full model (proposed method) — baseline for ablation comparison
74
+ 2. Remove [component X] — tests that X is necessary
75
+ 3. Replace [component X] with [standard alternative] — tests that X is better than the obvious alternative
76
+ 4. Replace [component Y] with [simpler version] — tests sensitivity
77
+ 5. [Domain generalization]: apply to [different dataset] — tests claim of general applicability
78
+ ```
79
+
80
+ ### Step 5: Compute Estimate (Architect)
81
+
82
+ For each experiment:
83
+ - Model size and architecture
84
+ - Training/inference hardware requirements
85
+ - Estimated wall-clock time
86
+ - Total GPU-hours
87
+
88
+ Flag if total compute exceeds reasonable academic budget (>1000 GPU-hours on A100).
89
+
90
+ ### Step 6: Implementation Notes (Architect)
91
+
92
+ Key decisions to document:
93
+ - Framework (PyTorch version, key libraries)
94
+ - Any non-obvious hyperparameter choices
95
+ - Potential implementation traps (numerical stability, memory optimization)
96
+ - Code structure recommendations
97
+
98
+ ### Step 7: Output and Save
99
+
100
+ ```markdown
101
+ # Experimental Plan: [Idea Title]
102
+
103
+ ## Method Overview
104
+ [2–3 sentences describing the approach precisely enough to implement]
105
+
106
+ ## Baselines
107
+ | Method | Paper | Code | Why essential |
108
+ |--------|-------|------|---------------|
109
+ | ... | ... | ... | ... |
110
+
111
+ ## Datasets
112
+ | Dataset | Task | Size | Primary/Secondary | License | URL |
113
+ |---------|------|------|-------------------|---------|-----|
114
+ | ... | ... | ... | ... | ... | ... |
115
+
116
+ ## Evaluation Protocol
117
+ - Primary metric: [name] — [definition and justification]
118
+ - Secondary metrics: [list]
119
+ - Statistical test: [test name], [min N seeds], [CI level]
120
+
121
+ ## Ablation Plan
122
+ 1. ...
123
+ 2. ...
124
+ (see Phase 4)
125
+
126
+ ## Compute Estimate
127
+ | Experiment | Hardware | Time | GPU-Hours |
128
+ |------------|----------|------|-----------|
129
+ | Main training | 4× A100 | Xh | X |
130
+ | Ablation | 2× A100 | Xh each | X total |
131
+ | Total | | | X |
132
+
133
+ ## Implementation Notes
134
+ - Framework: PyTorch X.X + HuggingFace Transformers X.X
135
+ - Key dependencies: [list]
136
+ - Critical decisions: [list non-obvious choices]
137
+
138
+ ## Expected Results
139
+ Research hypothesis: [specific predicted improvement]
140
+ Confidence: [High / Medium / Low]
141
+ ```
142
+
143
+ Update the idea in `idea_store` (action: `update`):
144
+ - Store the experimental plan in `methodology` field
145
+ - Update status to `in_progress`
146
+
147
+ ## Quality Criteria
148
+
149
+ - At least 3 baselines (including the most recent SOTA)
150
+ - At least 3 ablations covering all claimed contributions
151
+ - Primary dataset must be the same as related work (for fair comparison)
152
+ - Compute estimate must be realistic (not "1 GPU, 1 hour")
153
+ - Implementation notes must mention the training framework and key libraries
@@ -0,0 +1,102 @@
1
+ ```skill
2
+ ---
3
+ name: hypothesis-generation
4
+ description: Generate, score, and rank novel CS/ML research hypotheses based on identified literature gaps. Produces 3–5 concrete, differentiated ideas with preliminary novelty assessments.
5
+ ---
6
+ ```
7
+
8
+ # Hypothesis Generation Skill
9
+
10
+ This skill takes a **literature landscape map** (from the literature-review skill or a direct description of the field) and generates **concrete, original research hypotheses** ranked by novelty and feasibility.
11
+
12
+ ## When to Use
13
+
14
+ Use this skill when:
15
+ - You have a gap analysis and want to convert it into testable research ideas
16
+ - Brainstorming multiple research directions to compare before committing
17
+ - Generating variations of an initial idea to find the strongest formulation
18
+
19
+ ## Prerequisites
20
+
21
+ You should have:
22
+ - A research topic or field description
23
+ - Ideally: output from the `literature-review` skill (landscape map + gaps)
24
+
25
+ If no prior literature review is available, the orchestrator will run a quick survey first.
26
+
27
+ ## Workflow
28
+
29
+ ### Step 1: Ingest Landscape
30
+
31
+ Parse the provided landscape map or gap analysis. Extract:
32
+ - The 3 most promising gaps (evidence-backed, non-trivial)
33
+ - The dominant methodology paradigm in the field
34
+ - Key open benchmarks or evaluation axes
35
+
36
+ ### Step 2: Generate Hypotheses (Self — Orchestrator)
37
+
38
+ For each gap, generate 1–2 research hypotheses using this template:
39
+
40
+ ```
41
+ **Hypothesis [N]**: [One-sentence claim]
42
+ - **Problem**: [What is missing or broken]
43
+ - **Proposed approach**: [Core idea in 2–3 sentences]
44
+ - **Motivation**: [Why this might work — theoretical or empirical grounding]
45
+ - **Distinguishing factor**: [What makes this different from existing work]
46
+ - **Preliminary novelty confidence**: [High / Medium / Low] — [Reason]
47
+ ```
48
+
49
+ **Diversity constraint**: Hypotheses must differ in at least one of: (a) methodology family (e.g., don't generate 5 fine-tuning ideas), (b) problem framing, (c) target application domain.
50
+
51
+ ### Step 3: Quick Deduplication (Surveyor)
52
+
53
+ For each hypothesis, run a targeted arXiv search to check if an obvious direct solution exists.
54
+ Use `arxiv_search` with the core claim as the query. This is a quick check — deep validation happens in the `idea-critique` skill.
55
+
56
+ Example: For "hypothesis: sparse attention patterns can be learned from dense attention", search:
57
+ `arxiv_search(query="learning sparse attention patterns", categories=["cs.LG"], max_results=10)`
58
+
59
+ ### Step 4: Score and Rank
60
+
61
+ Score each surviving hypothesis on:
62
+ | Criterion | Weight | Description |
63
+ |-----------|--------|-------------|
64
+ | Novelty | 40% | How differentiated from existing work |
65
+ | Feasibility | 30% | Can be done in 6–12 months with standard compute |
66
+ | Significance | 20% | Would matter if it worked |
67
+ | Clarity | 10% | Is the claim testable and well-defined |
68
+
69
+ Rank by weighted score. Present top 3–5.
70
+
71
+ ### Step 5: Output
72
+
73
+ ```markdown
74
+ # Research Hypotheses: [Topic]
75
+
76
+ Generated: [date]
77
+ Based on: [literature source]
78
+
79
+ ## Ranked Hypotheses
80
+
81
+ ### 🥇 Hypothesis 1: [Title]
82
+ **Score**: Novelty X/10 | Feasibility X/10 | Significance X/10 | Overall X/10
83
+ **Problem**: ...
84
+ **Approach**: ...
85
+ **Why novel**: ...
86
+ **Key risk**: ...
87
+ **Recommended next step**: Run `idea-critique` skill to validate novelty
88
+
89
+ ### 🥈 Hypothesis 2: [Title]
90
+ ...
91
+ ```
92
+
93
+ ### Step 6: Save
94
+
95
+ Save the top 3 hypotheses to the idea store using `idea_store` with action `save` (status: `draft`).
96
+
97
+ ## Quality Criteria
98
+
99
+ - Each hypothesis must be **specific and testable** (not "explore X" but "propose method Y to improve Z by doing W")
100
+ - Must cite at least one supporting gap from the previous literature review
101
+ - No two hypotheses may address the same gap in the same way
102
+ - Quick deduplication search is mandatory before ranking
@@ -0,0 +1,129 @@
1
+ ```skill
2
+ ---
3
+ name: idea-critique
4
+ description: Run adversarial multi-round critique on a research idea through @critic and @architect. Produces a full review report with scores, weaknesses, improvement suggestions, and a final verdict.
5
+ ---
6
+ ```
7
+
8
+ # Idea Critique Skill
9
+
10
+ This skill orchestrates a **rigorous adversarial review** of a research idea, mimicking the NeurIPS/ICML program committee process. It is a prerequisite before investing effort in experiment design or writing.
11
+
12
+ ## When to Use
13
+
14
+ Use this skill when:
15
+ - A research hypothesis needs novelty validation before proceeding
16
+ - Preparing an idea for discussion with collaborators
17
+ - Stress-testing assumptions before writing a paper proposal
18
+ - Comparing multiple ideas to select the strongest one
19
+
20
+ ## Prerequisites
21
+
22
+ You need at minimum:
23
+ - The idea's **problem statement** (one clear sentence)
24
+ - The **core proposed approach** (2–3 sentences)
25
+ - The **research context** (field + related work references)
26
+
27
+ ## Workflow
28
+
29
+ ### Round 1: Novelty Check (Critic)
30
+
31
+ Launch **@critic** with:
32
+ - The full idea description
33
+ - The list of related papers found during the literature survey
34
+
35
+ **@critic's job**:
36
+ 1. Search arXiv and Semantic Scholar for papers that directly address this idea
37
+ 2. Identify degree of overlap: exact duplicate / partial overlap / related but different
38
+ 3. Report specific conflicting papers with their arXiv IDs
39
+
40
+ **Overlap classification**:
41
+ - **Exact** (≥90% overlap): Idea should be rejected or significantly reformulated
42
+ - **High** (50–90%): Core claim is addressed; must differentiate further
43
+ - **Moderate** (20–50%): Current work is related but the gap is real
44
+ - **Low** (<20%): Genuinely novel direction
45
+
46
+ ### Round 2: Full Review (Critic)
47
+
48
+ If novelty check passes (overlap < 50%), run the full NeurIPS-style review:
49
+
50
+ ```
51
+ Scores (1–10):
52
+ - Novelty: X — [justification with paper citations]
53
+ - Feasibility: X — [compute/data requirements assessment]
54
+ - Significance: X — [impact if successful]
55
+ - Clarity: X — [how well-defined is the contribution]
56
+ - Overall: X
57
+
58
+ Weaknesses (must identify at least 3):
59
+ 1. ...
60
+ 2. ...
61
+ 3. ...
62
+
63
+ Strengths (must identify at least 2):
64
+ 1. ...
65
+ 2. ...
66
+
67
+ Concrete improvements:
68
+ 1. ...
69
+ 2. ...
70
+
71
+ Verdict: ACCEPT / WEAK_ACCEPT / WEAK_REJECT / REJECT
72
+ ```
73
+
74
+ **Advancement threshold**: Overall ≥ 6/10.
75
+
76
+ ### Round 3: Improvement (Orchestrator)
77
+
78
+ If the verdict is WEAK_REJECT or the overall score is 5–6:
79
+ 1. Present @critic's weaknesses to the user
80
+ 2. Generate one improved version of the idea addressing the top 2 weaknesses
81
+ 3. Re-run the critique (only once — reject if still below threshold)
82
+
83
+ ### Round 4: Update Idea Store
84
+
85
+ Update the idea in the idea store using `idea_store` with action `update`:
86
+ - Set `critique_done: true`
87
+ - Record the scores in `scores` field
88
+ - Update `status` to `validated` if Overall ≥ 6, or `abandoned` if rejected
89
+ - Save @critic's full review in `notes`
90
+
91
+ ### Output Format
92
+
93
+ ```markdown
94
+ # Critique Report: [Idea Title]
95
+
96
+ ## Novelty Check
97
+ [Overlap classification + conflicting papers found]
98
+
99
+ ## Review Scores
100
+ | Dimension | Score | Key Reason |
101
+ |--------------|-------|-------------------------------------|
102
+ | Novelty | X/10 | ... |
103
+ | Feasibility | X/10 | ... |
104
+ | Significance | X/10 | ... |
105
+ | Clarity | X/10 | ... |
106
+ | Overall | X/10 | ... |
107
+
108
+ ## Weaknesses
109
+ 1. ...
110
+ 2. ...
111
+ 3. ...
112
+
113
+ ## Strengths
114
+ 1. ...
115
+ 2. ...
116
+
117
+ ## Improvements Applied
118
+ [If a revised version was generated]
119
+
120
+ ## Verdict
121
+ [ACCEPT / WEAK_ACCEPT / WEAK_REJECT / REJECT] — Proceed to `experiment-design`? [Yes / No]
122
+ ```
123
+
124
+ ## Quality Criteria
125
+
126
+ - @critic must run at least 3 targeted searches (not just general queries)
127
+ - Novelty must be verified with specific paper IDs, not general statements
128
+ - At least 3 weaknesses must be identified even for strong ideas
129
+ - The skill must produce a binary decision (proceed / don't proceed) at the end