@research-copilot/plugin 1.1.15 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/.claude-plugin/plugin.json +3 -2
  2. package/dist/.codex-plugin/plugin.toml +2 -1
  3. package/dist/.cursor-plugin/plugin.json +3 -2
  4. package/dist/.gemini-plugin/plugin.json +3 -2
  5. package/dist/.opencode-plugin/plugin.json +3 -2
  6. package/dist/.windsurf-plugin/plugin.json +3 -2
  7. package/dist/agents/copilot-conductor.agent.md +60 -0
  8. package/dist/agents/copilot-experiment.agent.md +56 -0
  9. package/dist/agents/copilot-ideation.agent.md +45 -0
  10. package/dist/agents/copilot-literature.agent.md +34 -0
  11. package/dist/agents/copilot-polisher.agent.md +30 -0
  12. package/dist/agents/copilot-rebuttal.agent.md +35 -0
  13. package/dist/agents/copilot-reviewer.agent.md +35 -0
  14. package/dist/agents/copilot-writer.agent.md +39 -0
  15. package/dist/hooks/dispatch-reminder.json +17 -0
  16. package/dist/hooks/loop-armer.json +17 -0
  17. package/dist/hooks/research-copilot-guard.hook.md +51 -0
  18. package/dist/hooks/scientist-guardrails.json +17 -0
  19. package/dist/hooks/scripts/__tests__/__init__.py +0 -0
  20. package/dist/hooks/scripts/__tests__/test_post_tool_loop_armer.py +88 -0
  21. package/dist/hooks/scripts/__tests__/test_research_copilot_guard_main_session.py +150 -0
  22. package/dist/hooks/scripts/__tests__/test_session_start_memory_injector.py +66 -0
  23. package/dist/hooks/scripts/__tests__/test_user_prompt_dispatch_reminder.py +37 -0
  24. package/dist/hooks/scripts/_copilot_hook_lib.py +564 -0
  25. package/dist/hooks/scripts/copilot_subagent_stop.py +203 -0
  26. package/dist/hooks/scripts/copilot_write_guard.py +96 -0
  27. package/dist/hooks/scripts/post_tool_loop_armer.py +61 -0
  28. package/dist/hooks/scripts/research_copilot_guard.py +208 -0
  29. package/dist/hooks/scripts/scientist_guardrails.py +29 -0
  30. package/dist/hooks/scripts/session_start_memory_injector.py +188 -0
  31. package/dist/hooks/scripts/user_prompt_dispatch_reminder.py +40 -0
  32. package/dist/hooks/session-memory-injector.json +17 -0
  33. package/dist/hooks/tests/__init__.py +0 -0
  34. package/dist/hooks/tests/conftest.py +61 -0
  35. package/dist/hooks/tests/fixtures/transcript_copilot_experiment_complete.jsonl +2 -0
  36. package/dist/hooks/tests/fixtures/transcript_copilot_experiment_state_jump.jsonl +2 -0
  37. package/dist/hooks/tests/fixtures/transcript_copilot_literature.jsonl +2 -0
  38. package/dist/hooks/tests/fixtures/transcript_main_only.jsonl +2 -0
  39. package/dist/hooks/tests/fixtures/transcript_malformed_state_output.jsonl +2 -0
  40. package/dist/hooks/tests/integration_run.ps1 +65 -0
  41. package/dist/hooks/tests/test_copilot_hook_lib.py +398 -0
  42. package/dist/hooks/tests/test_copilot_subagent_stop.py +186 -0
  43. package/dist/hooks/tests/test_copilot_write_guard.py +137 -0
  44. package/dist/hooks/tests/test_session_start_snapshot.py +116 -0
  45. package/dist/hooks/tests/test_state_machine_consistency.py +75 -0
  46. package/dist/skills/arxivsub-skill/SKILL.md +98 -0
  47. package/dist/skills/arxivsub-skill/skill.json +5 -0
  48. package/dist/skills/de-ai-checker/SKILL.md +110 -0
  49. package/dist/skills/de-ai-checker/skill.json +5 -0
  50. package/dist/skills/deep-interview/SKILL.md +91 -0
  51. package/dist/skills/deep-interview/skill.json +5 -0
  52. package/dist/skills/grill-with-docs/SKILL.md +120 -0
  53. package/dist/skills/grill-with-docs/skill.json +5 -0
  54. package/dist/skills/init-mcp/SKILL.md +83 -0
  55. package/dist/skills/init-mcp/skill.json +5 -0
  56. package/dist/skills/model-escalation/SKILL.md +93 -0
  57. package/dist/skills/model-escalation/skill.json +5 -0
  58. package/dist/skills/paper-architecture-web-drawing/SKILL.md +282 -0
  59. package/dist/skills/paper-architecture-web-drawing/skill.json +5 -0
  60. package/dist/skills/paper-deai/SKILL.md +53 -0
  61. package/dist/skills/paper-deai/skill.json +5 -0
  62. package/dist/skills/paper-en2zh/SKILL.md +29 -0
  63. package/dist/skills/paper-en2zh/skill.json +5 -0
  64. package/dist/skills/paper-expand/SKILL.md +43 -0
  65. package/dist/skills/paper-expand/skill.json +5 -0
  66. package/dist/skills/paper-experiment-analysis/SKILL.md +38 -0
  67. package/dist/skills/paper-experiment-analysis/skill.json +5 -0
  68. package/dist/skills/paper-figure-caption/SKILL.md +29 -0
  69. package/dist/skills/paper-figure-caption/skill.json +5 -0
  70. package/dist/skills/paper-logic-check/SKILL.md +30 -0
  71. package/dist/skills/paper-logic-check/skill.json +5 -0
  72. package/dist/skills/paper-polish/SKILL.md +34 -305
  73. package/dist/skills/paper-polish/skill.json +5 -0
  74. package/dist/skills/paper-review/SKILL.md +49 -0
  75. package/dist/skills/paper-review/skill.json +5 -0
  76. package/dist/skills/paper-sanity-check/SKILL.md +122 -0
  77. package/dist/skills/paper-sanity-check/skill.json +5 -0
  78. package/dist/skills/paper-shorten/SKILL.md +42 -0
  79. package/dist/skills/paper-shorten/skill.json +5 -0
  80. package/dist/skills/paper-table-caption/SKILL.md +29 -0
  81. package/dist/skills/paper-table-caption/skill.json +5 -0
  82. package/dist/skills/paper-translate/SKILL.md +48 -0
  83. package/dist/skills/paper-translate/skill.json +5 -0
  84. package/dist/skills/plugin-dev-agent-development/SKILL.md +95 -0
  85. package/dist/skills/plugin-dev-agent-development/skill.json +5 -0
  86. package/dist/skills/research-workflow/SKILL.md +116 -0
  87. package/dist/skills/research-workflow/skill.json +5 -0
  88. package/dist/skills/scientist-experiment-runner/SKILL.md +76 -0
  89. package/dist/skills/scientist-experiment-runner/skill.json +5 -0
  90. package/dist/skills/scientist-ideation/SKILL.md +52 -0
  91. package/dist/skills/scientist-ideation/skill.json +5 -0
  92. package/dist/skills/scientist-plotting/SKILL.md +49 -0
  93. package/dist/skills/scientist-plotting/skill.json +5 -0
  94. package/dist/skills/scientist-review/SKILL.md +40 -0
  95. package/dist/skills/scientist-review/skill.json +5 -0
  96. package/dist/skills/scientist-runtime-init/SKILL.md +46 -0
  97. package/dist/skills/scientist-runtime-init/skill.json +5 -0
  98. package/dist/skills/scientist-writeup/SKILL.md +60 -0
  99. package/dist/skills/scientist-writeup/skill.json +5 -0
  100. package/dist/skills/talk-normal/SKILL.md +73 -0
  101. package/dist/skills/talk-normal/skill.json +5 -0
  102. package/package.json +1 -1
  103. package/dist/agents/rc-experiment.md +0 -203
  104. package/dist/agents/rc-ideation.md +0 -224
  105. package/dist/agents/rc-literature.md +0 -228
  106. package/dist/agents/rc-plan.md +0 -189
  107. package/dist/agents/rc-polisher.md +0 -166
  108. package/dist/agents/rc-rebuttal.md +0 -194
  109. package/dist/agents/rc-reviewer.md +0 -187
  110. package/dist/agents/rc-update-spec.md +0 -231
  111. package/dist/agents/rc-verify.md +0 -234
  112. package/dist/agents/rc-writer.md +0 -161
  113. package/dist/skills/experiment-design/SKILL.md +0 -331
  114. package/dist/skills/full-research-workflow/SKILL.md +0 -363
  115. package/dist/skills/literature-search/SKILL.md +0 -244
  116. package/dist/skills/sanity-check/SKILL.md +0 -449
  117. package/dist/skills/submission-sprint/SKILL.md +0 -361
@@ -0,0 +1,120 @@
1
+ ---
2
+ name: grill-with-docs
3
+ description: "Post-plan stress test. Use AFTER a plan is drafted (Goal anchor, ideation candidate, rebuttal strategy, pipeline template) to gap-check the plan against the project's existing documentation, terminology, and recent reviewer / handoff history in `.copilot/`. Sharpens fuzzy terms inline, cross-references the codebase / tex / logs, and offers ADRs only when a decision is hard to reverse. Never used to draft the plan itself. Triggers on: '校验计划', '对着文档拷问', '把计划放到文档里盘一遍', 'grill the plan', 'stress-test plan', 'audit plan against docs', 'check plan for gaps'."
4
+ version: 0.2.0
5
+ ---
6
+
7
+ # Grill with docs — Post-plan gap check
8
+
9
+ Run **after** a plan exists (Goal anchor in `experiments.md`, selected direction in `ideas.md`, response strategy in `rebuttal/round-N.md`, or routing decision in `decisions.md`). Purpose: stress-test that plan against the project's existing language and documented state, surface contradictions, and update the docs inline as terminology is resolved.
10
+
11
+ This skill is **not for plan drafting**. If no plan exists yet, run `deep-interview` first, hand off to the planning agent, then return here.
12
+
13
+ ## When this skill fires
14
+
15
+ Fire automatically when the most recent disk write was one of:
16
+
17
+ - `## Goal anchor` block freshly written to `.copilot/experiments.md`
18
+ - A new `## Selected direction` in `.copilot/ideas.md`
19
+ - A new `## Reviewer N strategy` block in `rebuttal/round-N.md`
20
+ - A new pipeline template entry in `.copilot/decisions.md`
21
+
22
+ Also fires on user request: "校验一下这个计划" / "grill this plan."
23
+
24
+ ## Documentation surface (auto-detected)
25
+
26
+ ```
27
+ .copilot/
28
+ ├── state.md ← stage cursor + loop counters
29
+ ├── literature.md ← locked baseline + related work
30
+ ├── ideas.md ← user preferences + candidates + selected
31
+ ├── experiments.md ← Goal anchor + Run-N history
32
+ ├── handoff.md ← writer / polisher / reviewer / rebuttal facts
33
+ ├── decisions.md ← approval-gate decisions
34
+ ├── glossary.md ← created lazily by THIS skill on first term resolve
35
+ ├── adr/ ← created lazily by THIS skill on first ADR
36
+ └── reviews/round-N.md ← independent review rounds
37
+ ```
38
+
39
+ `glossary.md` and `adr/` are created **only when** the first term / first ADR appears — do not pre-create empty scaffolds.
40
+
41
+ ## Procedure
42
+
43
+ ### Step 1 — Read the plan + the docs
44
+
45
+ Load the just-written plan block and the relevant `.copilot/` files. For multi-doc projects (rare in this repo), also load any sibling `CONTEXT.md` / `CONTEXT-MAP.md` if present.
46
+
47
+ ### Step 2 — Run the four challenges, in order
48
+
49
+ | Challenge | What you do |
50
+ |---|---|
51
+ | **Glossary clash** | For every noun phrase in the plan, check `.copilot/glossary.md` (and the existing tex / `ideas.md` / `literature.md`). If a term collides with prior usage or is fuzzy ("module," "robustness," "improvement"), propose a precise canonical term and ask the user to confirm. Update `glossary.md` inline when resolved. |
52
+ | **Sharpen fuzzy language** | For every claim ("works better," "more robust," "faster"), demand the metric / unit / baseline / threshold. Push the user to a number or a falsifiable shape. |
53
+ | **Concrete scenario stress test** | For every relationship in the plan ("Module A feeds Module B"), spell out one concrete scenario end-to-end. If the scenario breaks, flag it before any experiment burns compute. |
54
+ | **Cross-reference with code / data** | For every "how it works" claim, grep the codebase / tex / logs and confirm the code agrees. If the plan describes behaviour the code does not exhibit, the plan is wrong — flag it. |
55
+
56
+ Each challenge runs **once** per pass. One question at a time, with a recommended answer + the file:line that motivated the question.
57
+
58
+ ### Step 3 — Update docs inline (lazy creation)
59
+
60
+ When a term is resolved, write to `.copilot/glossary.md` immediately:
61
+
62
+ ```markdown
63
+ ## <Canonical term>
64
+ - Definition: <one sentence>
65
+ - First defined: <YYYY-MM-DD> (during grill-with-docs of <plan slug>)
66
+ - Aliases to avoid: <fuzzy or colliding terms now retired>
67
+ - Used in: <file paths / sections>
68
+ ```
69
+
70
+ Create `glossary.md` if it does not yet exist.
71
+
72
+ ### Step 4 — Offer an ADR only when all three are true
73
+
74
+ Add to `.copilot/adr/NNNN-<slug>.md` only when:
75
+
76
+ 1. **Hard to reverse** — changing this mid-project means redoing experiments / rewriting sections
77
+ 2. **Surprising without context** — a future reader (or a reviewer) will ask "why this way?"
78
+ 3. **Result of a real trade-off** — there were ≥2 alternatives, one was picked for a specific reason
79
+
80
+ If even one is false, skip the ADR. Most decisions live in `decisions.md` already and do not need promotion. Number ADRs by file count: first ADR is `0001-<slug>.md`.
81
+
82
+ ADR template:
83
+
84
+ ```markdown
85
+ # <NNNN> — <Title>
86
+ - Status: accepted | superseded by NNNN
87
+ - Date: <YYYY-MM-DD>
88
+ - Context: <2-3 sentences — why this came up, which plan triggered it>
89
+ - Decision: <the chosen alternative, one sentence>
90
+ - Alternatives considered: <list with one-line "why not">
91
+ - Consequences: <experiments / sections / future ablations this commits us to>
92
+ ```
93
+
94
+ ## Output
95
+
96
+ When the pass finishes, emit:
97
+
98
+ ```markdown
99
+ ## Grill-with-docs report — <plan slug>
100
+ - Date: <YYYY-MM-DD>
101
+ - Plan reviewed: <file:line>
102
+ - Glossary entries added / updated: <count> → <glossary.md anchors>
103
+ - Fuzzy claims sharpened: <count> → <plan file:line edits>
104
+ - Scenarios stress-tested: <count> → <list of scenarios + outcomes>
105
+ - Code cross-references: <count> → <files / functions verified>
106
+ - ADRs created: <count> → <adr/NNNN-*.md anchors, or "none — bar not met">
107
+ - Plan changes proposed: <list of edits to the plan file, with file:line>
108
+ - Residual risks: <list anything you grilled and could not resolve>
109
+ - Hand off to: <agent who acts on the changes, or "user approval" if changes need confirmation>
110
+ ```
111
+
112
+ ## Hard constraints
113
+
114
+ - **Post-plan only** — if no plan block exists, exit and recommend `deep-interview` first
115
+ - **Read before challenging** — every challenge must cite a concrete file:line, not "in general"
116
+ - **Update docs inline** — never batch glossary updates "for the end"
117
+ - **ADR bar is strict** — three conditions, ALL must hold; otherwise leave the decision in `decisions.md`
118
+ - **Do not edit the plan unilaterally** — propose edits with file:line; the writing agent (or user) applies them
119
+ - **One challenge round per pass** — do not loop the four challenges; if more passes are needed, the user explicitly re-invokes
120
+ - **Lazy file creation** — `glossary.md` and `adr/` directory created only on first real entry
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "grill-with-docs",
3
+ "description": "Post-plan stress test. Use AFTER a plan is drafted (Goal anchor, ideation candidate, rebuttal strategy, pipeline template) to gap-check the plan against the project's existing documentation, terminology, and recent reviewer / handoff history in `.copilot/`. Sharpens fuzzy terms inline, cross-references the codebase / tex / logs, and offers ADRs only when a decision is hard to reverse. Never used to draft the plan itself. Triggers on: '校验计划', '对着文档拷问', '把计划放到文档里盘一遍', 'grill the plan', 'stress-test plan', 'audit plan against docs', 'check plan for gaps'.",
4
+ "entry": "SKILL.md"
5
+ }
@@ -0,0 +1,83 @@
1
+ ---
2
+ name: init-mcp
3
+ description: "Use when setting up the plugin for the first time, installing dependencies, configuring MCP servers, or when the user says '初始化', 'init', 'setup', '装环境', '配置', 'install', 'configure', 'first time', '首次使用'. Handles both dependency marketplace installation and MCP server setup."
4
+ version: 0.3.0
5
+ ---
6
+
7
+ # Init MCP
8
+
9
+ One-shot plugin setup: install dependency marketplaces → install Python deps → write `.mcp.json` → register hooks → regenerate skill.json metadata → verify each server → report optional secrets.
10
+
11
+ ## Step 1: Install dependency marketplaces
12
+
13
+ This plugin depends on skills from 5 third-party marketplaces. If they are not added, plugin dependencies will stay unresolved (skills from those sources will be missing). The `superpowers` dependency uses Claude Code's built-in `claude-plugins-official` marketplace.
14
+
15
+ Check whether each marketplace is already added by looking at the user's installed plugins. For each missing marketplace, instruct the user to run:
16
+
17
+ ```
18
+ /plugin marketplace add Imbad0202/academic-research-skills
19
+ /plugin marketplace add Lylll9436/Paper-Polish-Workflow-skill
20
+ /plugin marketplace add multica-ai/andrej-karpathy-skills
21
+ /plugin marketplace add anthropics/skills
22
+ /plugin marketplace add Orchestra-Research/AI-Research-SKILLs
23
+ ```
24
+
25
+ These are `/plugin` commands that must be typed by the user in the Claude Code prompt — they cannot be run via Bash. After the user adds all marketplaces, proceed to Step 2.
26
+
27
+ If all marketplaces are already present, skip this step.
28
+
29
+ ## Step 2: Run the installer script
30
+
31
+ `self/install.py` is a cross-platform Python script that handles MCP and hook setup.
32
+
33
+ ```bash
34
+ python self/install.py
35
+ ```
36
+
37
+ Supported flags:
38
+ - `--target /path` install to a non-default workspace
39
+ - `--dry-run` print plan without writing files
40
+ - `--skip-deps` skip pip install
41
+ - `--skip-verify` skip the MCP startup handshake
42
+
43
+ ## What the script does
44
+
45
+ 1. **Report dependency marketplaces** — print the prerequisite `plugin marketplace add` commands.
46
+ 2. **Install Python deps** — read `self/mcp/requirements.txt`, run `pip install` (default: `pdfplumber`).
47
+ 3. **Write `.mcp.json`** — scan `self/mcp/servers/` for every `server.py`, generate a Claude-Code-style `.mcp.json` with **absolute paths** to avoid `${workspaceFolder}`-expansion failures.
48
+ 4. **Register hooks** — inject SessionStart, PreToolUse, UserPromptSubmit, and PostToolUse hooks into `.claude/settings.json`. Idempotent; no duplicates.
49
+ 5. **Register conductor agent** — set `agent: copilot-conductor` in `.claude/settings.json`.
50
+ 6. **Regenerate skill.json metadata** — required by Claude Code 2.1.142+. Calls `self/scripts/generate-skill-json.py` to walk every skill and write a sibling `skill.json` from its SKILL.md frontmatter.
51
+ 7. **Verify MCP startup** — send `initialize` JSON-RPC to each server and confirm a response.
52
+ 8. **Report optional secrets** — check `ARXIVSUB_SKILL_KEY`; if unset, warn but do not block install.
53
+
54
+ ## Trigger scenarios
55
+
56
+ - First-time use after a fresh clone → `/init-mcp`
57
+ - An MCP server is unresponsive → `/init-mcp` (the script rewrites config and re-verifies)
58
+ - After adding a new dependency marketplace → `/init-mcp` to re-verify everything
59
+
60
+ ## Servers currently under `self/mcp/servers/`
61
+
62
+ The repo-root `.mcp.json` is generated by scanning `self/mcp/servers/` — there is no static `self/mcp/mcp.json`.
63
+
64
+ | Server | Deps | Description |
65
+ |---|---|---|
66
+ | `ai-scientist` | stdlib only | runtime check, experiment directory browsing (non-model) |
67
+ | `arxiv-search` | stdlib only | arXiv search, 3-second rate-limit + 429 retry |
68
+ | `arxivsub-search` | stdlib + `ARXIVSUB_SKILL_KEY` | arXiv + top-venue joint search |
69
+ | `dblp-bib` | stdlib only | DBLP BibTeX query, 1.5-second rate-limit |
70
+ | `google-scholar` | stdlib only | Scholar metadata / citation formats |
71
+ | `pdf-text` | `pdfplumber` (preferred) / `PyPDF2` (fallback) | Local PDF text extraction |
72
+
73
+ ## After installation
74
+
75
+ 1. **Restart Claude Code** (or run `/clear`) so the new MCP config takes effect.
76
+ 2. In a fresh session, verify: call `arxiv-search.search_arxiv` or `dblp-bib.search_dblp_bibtex` to confirm tool registration.
77
+ 3. If `ARXIVSUB_SKILL_KEY` is unset, `arxivsub-search` returns `missing_api_key`; configure via env var or `.env` as the warning suggests.
78
+
79
+ ## Notes
80
+
81
+ - **Idempotent**: the script can run multiple times; existing hooks are not duplicated; existing `.mcp.json` is overwritten to stay in sync with `self/mcp/servers/`.
82
+ - **Does not touch global settings**: writes only project-level `.claude/settings.json`; never touches `~/.claude/settings.json`.
83
+ - **Other MCP entries**: the current implementation overwrites `.mcp.json`; if the user has non-`self/` MCP entries there, they must be merged manually. Use `python self/install.py --dry-run` to inspect the planned write.
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "init-mcp",
3
+ "description": "Use when setting up the plugin for the first time, installing dependencies, configuring MCP servers, or when the user says '初始化', 'init', 'setup', '装环境', '配置', 'install', 'configure', 'first time', '首次使用'. Handles both dependency marketplace installation and MCP server setup.",
4
+ "entry": "SKILL.md"
5
+ }
@@ -0,0 +1,93 @@
1
+ ---
2
+ name: model-escalation
3
+ description: "Use when repeated debugging or writing iterations fail, root cause is unclear, environment limits block progress, the user is still dissatisfied after multiple attempts, or the user says '疑难杂症', '卡住', '多轮迭代无解', '反复失败', '更强模型', '升级求助', 'stuck', 'escalate', 'stronger model'. Produces a hand-off summary suitable for a stronger model to pick up."
4
+ version: 0.2.0
5
+ ---
6
+
7
+ # Model Escalation
8
+
9
+ ## Role
10
+ When a problem has resisted multiple solid attempts in the current session, or you can clearly perceive that the current model / environment / context cannot continue to make high-quality progress, your job is to **stop low-yield trial-and-error** and produce a high-quality help summary suitable for handoff to a stronger model.
11
+
12
+ ## Use this skill when
13
+ - You have already done ≥ 2–3 rounds of substantive attempts; the problem is unresolved
14
+ - The root cause is unclear; continuing edits will significantly raise the risk of accidental damage
15
+ - Environment / permission / tool / context limits block verification
16
+ - The user remains dissatisfied and you have no high-confidence improvement path
17
+ - You can clearly describe the impasse but cannot reliably converge within this session
18
+
19
+ ## Core requirements
20
+ - Be honest about the current state; do not exaggerate, do not cover up
21
+ - Write only verified information; mark anything unverified explicitly as a "current hypothesis"
22
+ - Separate goal, current state, attempts, results, and blocker
23
+ - Preserve executable context: error messages, file paths, commands, I/O, blast radius
24
+ - Do not push responsibility onto the user; your job is to make the handoff as easy to pick up as possible
25
+
26
+ ## Output format
27
+
28
+ Output strictly in the structure below.
29
+
30
+ ### Recommend Escalating
31
+ The current problem has entered a high-cost iteration zone; continued trial-and-error in this session has low yield. Forward the following summary to a stronger model to continue.
32
+
33
+ ### 1. Goal
34
+ - 1–3 sentences on the desired end state
35
+ - Acceptance criteria or the user's expected outcome
36
+
37
+ ### 2. Current state
38
+ - Where you currently are
39
+ - Actual behavior or error symptom
40
+ - Files / modules / commands / data directly related to the issue
41
+
42
+ ### 3. Attempts so far
43
+ List in chronological order; each item includes:
44
+ 1. What was done
45
+ 2. Observed result
46
+ 3. What this rules out, or why it still fails
47
+
48
+ ### 4. Current judgment
49
+ - Confirmed facts
50
+ - Current hypotheses
51
+ - The actual blocker location
52
+
53
+ ### 5. Suggested questions for the stronger model
54
+ - 1–3 most central questions
55
+ - MUST be specific. NEVER "help me see what's wrong."
56
+
57
+ ### 6. Forwardable help prompt
58
+
59
+ ```text
60
+ I am working on a problem; please continue from the information below and prioritize a minimum verifiable plan.
61
+
62
+ Goal:
63
+ ...
64
+
65
+ Current state:
66
+ ...
67
+
68
+ Attempts so far:
69
+ ...
70
+
71
+ Confirmed facts:
72
+ ...
73
+
74
+ Current hypotheses:
75
+ ...
76
+
77
+ Blocker:
78
+ ...
79
+
80
+ Please focus on:
81
+ 1. ...
82
+ 2. ...
83
+ 3. ...
84
+
85
+ If you recommend code changes, prefer a minimum-change plan and state how to verify it.
86
+ ```
87
+
88
+ ## Execution checklist before output
89
+ 1. Have you clearly separated facts from hypotheses?
90
+ 2. Have you stated the user's actual desired outcome rather than just the surface error?
91
+ 3. Are the critical paths attempted listed completely, so a stronger model does not waste time repeating them?
92
+ 4. Are the suggested questions specific enough to act on?
93
+ 5. Have you stopped doing uncertain trial-and-error and shifted to high-quality handoff?
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "model-escalation",
3
+ "description": "Use when repeated debugging or writing iterations fail, root cause is unclear, environment limits block progress, the user is still dissatisfied after multiple attempts, or the user says '疑难杂症', '卡住', '多轮迭代无解', '反复失败', '更强模型', '升级求助', 'stuck', 'escalate', 'stronger model'. Produces a hand-off summary suitable for a stronger model to pick up.",
4
+ "entry": "SKILL.md"
5
+ }
@@ -0,0 +1,282 @@
1
+ ---
2
+ name: paper-architecture-web-drawing
3
+ description: "Use when the user wants a paper's abstract + method turned into a publication-ready architecture diagram, rendered as a single HTML file with inline SVG (and Python-generated SVG sub-figures for heatmaps / distributions / scatter / matrices). Triggers on: \"架构图\", \"结构图\", \"method figure\", \"overview figure\", \"pipeline diagram\", \"draw methodology\", \"网页绘图\". Enforces compactness numerics and a mandatory 10-round self-check loop. Do NOT use for line / bar / scatter data plots, posters, art, pure Mermaid sketches, or before the method is settled."
4
+ version: 0.5.1
5
+ ---
6
+
7
+ # Paper Architecture Web Drawing
8
+
9
+ Input: the paper's Abstract + Method (or a `.tex` / `.md` / `.txt` paper file in the workspace).
10
+ Output: a single HTML file with inline SVG rendering a top-conference-grade method figure; optionally an independent companion `.svg` of the same content.
11
+ Not for: line / bar / scatter data plots, posters, illustrations, pure Mermaid sketches, or pre-method-settled drafts.
12
+
13
+ ## 0. Seven non-negotiable rules
14
+
15
+ 1. **White background + vector-first**: pure white background, inline SVG as the dominant medium. **Banned: gradients, shadows, glassmorphism, glow, decorative backgrounds.** Only MathJax / KaTeX may go online.
16
+ 2. **At least 3 real glyphs**: weights / distribution / tokens / cache / codebook / attention / scatter etc. must be drawn as matrix grids, heatmaps, histograms, boxplots, or scatter. **Text boxes only = fail.** Glyphs expressible in Python (see §2.7) **prefer Python**; do not hand-write complex heatmaps or curves.
17
+ 3. **Equations near their module**: LaTeX equations anchor as local labels on their corresponding module. **Do not pile them in a bottom strip.** Render via MathJax / KaTeX in HTML; never write ASCII pseudo-equations (`sum(...)/sum(...)`).
18
+ 4. **Font-size floor (top-conference density)**: main title ≥ 26 px, section title ≥ 22 px, module label ≥ 18 px, equation label ≥ 16 px, auxiliary ≥ 14 px. **When tight, delete words before shrinking type.** viewBox must give enough room — no 1060×330 strip that crushes the type.
19
+ 5. **English labels only**: no Chinese labels; no single-component description longer than 10 words.
20
+ 6. **Browser verification**: after writing the HTML, open it in a browser and take a screenshot. On Windows: `start "" "$(pwd -W)/path.html"` or `python -m http.server`.
21
+ 7. **Every arrow has paper grounding**: NEVER invent modules, losses, or feedback loops.
22
+
23
+ ## 1. Banned visual modes
24
+
25
+ - **SmartArt / PowerPoint flowcharts**: equal-width rounded cards chained linearly, all nodes with the same corner radius and border.
26
+ - **Dashboard / poster style**: right-side KPI column, result-card stack, marketing badges, statistic stickers, glow emphasis.
27
+ - **Web-UI collage**: title bar + subtitle bar + content card patterns; pill-badge arrays.
28
+ - **Big box + arrow + bottom legend** as the dominant frame.
29
+ - **Top stage-label + bottom caption / problem statement / method summary**. The figure should be self-explanatory.
30
+ - **bypass / feedback / dashed line crossing** through module titles, equations, badges, or result numbers.
31
+ - **Small font + large whitespace** in exchange for content capacity.
32
+ - High-saturation red / green / purple, or five or more salient light-color blocks at once.
33
+
34
+ ## 2. Workflow
35
+
36
+ ### 2.1 Read the paper
37
+
38
+ - Look for `.tex` / `.md` / `.txt` in the workspace; read Abstract / Method / Approach / Overview / Framework.
39
+ - If multiple candidate files exist, **confirm with the user first** — do not guess.
40
+ - Only read context needed to reconstruct the main pipeline.
41
+
42
+ ### 2.2 Structure extraction (mandatory before drawing)
43
+
44
+ For every key module fill out these 5 fields. If you cannot articulate one, **do not draw the module**:
45
+
46
+ | Field | Content |
47
+ |---|---|
48
+ | `Name` | Short stable English module name, no slogans |
49
+ | `Type` | input / encoder / alignment / retrieval / fusion / optimization / loss / output |
50
+ | `Is novel?` | Is this a contribution that needs visual highlight? |
51
+ | `Internal elements` | The objects / operations worth visualizing inside it (attention, MLP, codebook, feature map, cache update) |
52
+ | `Topology role` | main-chain node / parallel branch / merge point / feedback point / training-only branch |
53
+
54
+ ### 2.3 Pick a layout family (in order of priority)
55
+
56
+ 1. Explicit feedback / iteration / alternating optimization / until convergence → **Loop / U-shape**
57
+ 2. train/infer or stage1/stage2 or coarse/fine or retrieve/generate → **Two-stage**
58
+ 3. ≥2 semantically independent branches merging into a shared main module → **Multi-branch with merge**
59
+ 4. Narrow column / single-column vertical reading → **Linear vertical**
60
+ 5. 3-6 serial stages, no strong feedback → **Linear horizontal** (default)
61
+ 6. Local complex substructure embedded in the main chain → **Hybrid composition**
62
+
63
+ **Tie-breakers**: prefer the layout that preserves a strong visual center, gives the key mechanism panel enough area, minimizes arrow crossings and diagonal text overlaps, and forms a natural input-vs-output contrast.
64
+
65
+ **Veto conditions** (any of these → switch layout):
66
+ - No room for a main illustration; all modules forced into equal-weight small boxes.
67
+ - Need >2 long cross-region connectors to convey the main flow.
68
+ - Key equations forced into corners.
69
+ - Output region and auxiliary text fighting for space.
70
+ - Must shrink font or add whitespace to fit.
71
+
72
+ ### 2.4 Default blueprint
73
+
74
+ **Input object → 2-3 mechanism panels → output object**
75
+
76
+ - **Left**: tensor / weights / KV cache / tokens / feature grid as a visualized input object (NOT a text box).
77
+ - **Middle**: each panel centers on a **main illustration**, not uniformly-sized small cards. Auxiliary objects (codebook / sensitivity map / objective / memory) anchor to their mechanism.
78
+ - **Right**: the transformed object of the same kind, preferring **structural change** over KPI summary.
79
+ - Input ↔ output **reuse the same graphic motif** to show state change (e.g. same-shape cache blocks before/after compression).
80
+ - Highlight only 1–2 core contribution modules: same-family slightly heavier border / slightly darker fill / local bracket / callout. **Never** via high saturation or large badges.
81
+
82
+ ### 2.5 Palette (pick 1 of 5; one family across the whole figure)
83
+
84
+ | Family | Use |
85
+ |---|---|
86
+ | **Blue-Gray** | Generic pipeline / system figure (default) |
87
+ | **Warm Tones** | Moderate emphasis on novelty |
88
+ | **Green-Cyan** | Generative / biological / light themes |
89
+ | **Purple-Blue** | Theory / math-heavy |
90
+ | **Monochrome** | Minimalist / B&W-print-friendly |
91
+
92
+ **Color roles** (consistent across any chosen family): Primary background (normal modules), Secondary background (minor modules), Accent background (contribution modules), Input/Output background (lighter), Primary border, Accent border, **Arrow color: one dark color across the whole figure**, Main text, Secondary text. **Never mix families.**
93
+
94
+ ### 2.6 Typography
95
+
96
+ - 2–3 stroke widths: main flow / secondary structure / coordinate auxiliaries.
97
+ - Small / medium corner radius; avoid web-card-style large rounding.
98
+ - **Sans for labels + serif for equations.**
99
+ - Short stable module titles, no slogans; subtitles default to omitted.
100
+ - Training-only branches: lighter fill + dashed arrows.
101
+ - Long paths (bypass, feedback, training-only branches) follow the outer edge of regions; do not cross dense text inside sub-panels.
102
+ - Compactness first: align, share edges, tighten via grouping. NEVER reduce font size to fit content.
103
+ - Asymmetric layouts allowed; area reflects importance; never chase column parity.
104
+
105
+ ### 2.7 Abstract object → glyph (Python vs hand-written SVG)
106
+
107
+ | Paper object | Glyph | Recommended source |
108
+ |---|---|---|
109
+ | `weights` / `kernels` / `parameters` matrix | matrix grid + outlier column / point highlight | matplotlib `imshow` |
110
+ | `distribution` / `density` / histogram | histogram / KDE curve | matplotlib `hist` + `kdeplot` |
111
+ | `outliers` / `IQR` / `boxplot` | boxplot + Q1 / Q3 / whisker annotations | matplotlib `boxplot` |
112
+ | `scatter` / two-variable relation / error comparison | scatter + diagonal + highlight region | matplotlib `scatter` |
113
+ | `attention` / `similarity` / `heatmap` | 2D heatmap + colorbar | matplotlib `imshow` (cmap viridis / coolwarm) |
114
+ | `eigenvectors` / `subspace` / `basis` | disk + direction arrows / axes | matplotlib `quiver` or hand-SVG |
115
+ | `quantization` / `clustering` / `codebook` | bin partition lines / cluster centers / lookup blocks | matplotlib + `axvline` for centroids |
116
+ | `tokens` / `patches` / `cache blocks` | brick array + bit-width tag | hand-written SVG (structured) |
117
+ | `loss landscape` / 3D surface | contour / pcolormesh | matplotlib |
118
+ | `loss` / `objective` / `constraint` | short equation chip (attached to module) | hand-written SVG + MathJax |
119
+ | module boxes, arrows, formula chips, brackets | box / line / label | **hand-written SVG** (Python is not elegant here) |
120
+
121
+ **Rule**: if the glyph carries numerical / distribution / geometric content → Python-generated SVG. If the glyph is a structured layout (box, arrow, equation slot) → hand-written SVG.
122
+
123
+ ### 2.7.1 Python-generated SVG sub-figures (matplotlib)
124
+
125
+ Place a `figures/<paper>_components.py` script; each subplot saves to a separate `.svg`; the main HTML embeds them inline or via `<img src=...svg>`. **Prefer inline** (single-file delivery, second-pass editable).
126
+
127
+ Minimum skeleton:
128
+
129
+ ```python
130
+ import matplotlib
131
+ matplotlib.use("Agg")
132
+ import matplotlib.pyplot as plt
133
+ import numpy as np
134
+
135
+ plt.rcParams.update({
136
+ "font.family": "DejaVu Sans",
137
+ "font.size": 14,
138
+ "axes.linewidth": 1.0,
139
+ "axes.spines.top": False,
140
+ "axes.spines.right": False,
141
+ "svg.fonttype": "none", # keep text as <text>, do not outline
142
+ })
143
+
144
+ def save(fig, path):
145
+ fig.savefig(path, format="svg", bbox_inches="tight",
146
+ pad_inches=0.05, transparent=True)
147
+ plt.close(fig)
148
+ ```
149
+
150
+ **Hard rules**:
151
+
152
+ - `svg.fonttype="none"`: text stays as editable `<text>`, not paths.
153
+ - `transparent=True` + main HTML white background, avoiding double background layers.
154
+ - Per-subplot font size ≥ 12 (after embedding scale, still readable).
155
+ - Use only your palette family (see §2.5); never matplotlib's default `tab:blue`.
156
+ - One figure per function; never `plt.show()` in the script.
157
+
158
+ ### 2.7.2 Inline embedding
159
+
160
+ After Python produces `comp_a.svg`, in the main HTML:
161
+
162
+ ```html
163
+ <g transform="translate(120, 80)">
164
+ <!-- inline-svg-include: comp_a.svg -->
165
+ </g>
166
+ ```
167
+
168
+ Two delivery paths:
169
+
170
+ - **Copy inline**: paste `comp_a.svg`'s inner `<g>...</g>` into the main SVG at the corresponding spot; drop the outer `<svg>` header.
171
+ - **Object reference**: in main HTML, `<image href="comp_a.svg" x=.. y=.. width=..>` or `<foreignObject>`. **MUST** verify rendering in the browser before delivery.
172
+
173
+ After inline embedding, **manually adjust size / position**: matplotlib's default viewBox differs from the main figure's coordinate system; wrap in a `<g transform="translate(x,y) scale(s)">`.
174
+
175
+ ### 2.8 Equation placement
176
+
177
+ - Every key equation has a dedicated slot (local white-background equation slot / module-internal equation strip / anchor aligned to glyph). **Never a floating web-sticker.**
178
+ - Long equations split into two short labels or shorter equivalent forms; never let a single long line crush a panel.
179
+ - objective / update / normalization / threshold equations live inside their module's slot, not piled in a unified bottom strip.
180
+ - The equation chip sits ≤ 15 px below its main illustration; no big air gap between them.
181
+
182
+ ### 2.9 Compactness numerics (hard targets)
183
+
184
+ Most "doesn't look like a method figure" failures come from **loose layout**. Enforce these upper bounds:
185
+
186
+ | Metric | Upper bound | Meaning |
187
+ |---|---|---|
188
+ | Whitespace ratio inside a panel | ≤ 15% | Title + glyphs + equations + labels cover ≥ 85% of panel area |
189
+ | Main illustration's share of panel visible area | ≥ 65% | "The figure dominates; text is secondary" |
190
+ | Cross-panel horizontal gap | 20-40 px (viewBox units) | >40 = loose |
191
+ | Cross-panel vertical gap | 15-30 px | between adjacent rows |
192
+ | Title bottom edge → first panel | ≤ 30 px | No top-air |
193
+ | viewBox aspect ratio | 0.45 - 0.65 (single-row 4-panel) | Top-venue `figure*` ≈ 2:1 |
194
+ | Panel top padding (above title) | ≤ 16 px | Title touches edge |
195
+ | Panel bottom padding (below last element) | ≤ 16 px | No large dead space |
196
+ | Distance: glyph ↔ shape / label | ≤ 10 px | Tightly aligned, not floating |
197
+ | Nested `<rect>` levels in one panel | ≤ 2 | 3-level nesting = card bloat |
198
+
199
+ **Panel-equal-height trap**: 4 panels in one row need not be equal height. Input/Output typically 200–300 px shorter than mechanism panels. **Forcing equal height = manufacturing whitespace.**
200
+
201
+ **How to measure**: after rendering, use browser dev tools. Or temporarily draw `<rect>` boundary markers inside the SVG to eyeball dead-space ratios.
202
+
203
+ ### 2.10 Mandatory ≥ 10-round self-check loop
204
+
205
+ After writing the HTML, **never deliver directly**. Run 10 iterations from the table below; each round:
206
+
207
+ 1. Render PNG with `chrome --headless --screenshot`.
208
+ 2. Open the PNG; write down "this round's focus dimension and 3 most non-compliant spots" (concrete only; "looks fine" is banned).
209
+ 3. Fix HTML / Python / regenerate the sub-figure.
210
+ 4. Re-render.
211
+
212
+ | Round | Focus | Must check |
213
+ |---|---|---|
214
+ | 1 | **Topology fidelity** | Aligned to paper's method? No missing / extra modules, arrows, losses, feedback? |
215
+ | 2 | **Compactness** | Measure each panel's whitespace; tighten one by one; trim viewBox |
216
+ | 3 | **Font sizes** | Spot-check every text against 26/22/18/16/14 floor; if below floor, delete words, do not shrink |
217
+ | 4 | **Color discipline** | Single palette family? No gradients / shadows / glass / glow / decorative backgrounds? |
218
+ | 5 | **Equation anchoring** | LaTeX truly rendered via MathJax? Chip glued to its module? |
219
+ | 6 | **Arrow routing** | Main flow / bypass / feedback do not cross text / equations / badges |
220
+ | 7 | **Python sub-figures** | `svg.fonttype="none"`, palette consistent with main, aspect matched to panel |
221
+ | 8 | **Visual hierarchy** | Main reading path obvious at first glance? 1–2 contribution panels highlighted? Non-contribution modules restrained? |
222
+ | 9 | **Anti-pattern scan** | Does it resemble SmartArt / PowerPoint / dashboard / web UI / poster? |
223
+ | 10 | **Paper context** | Drop into an ICML/CVPR two-column layout — does it feel native? Cover the title — does it still feel like this paper? |
224
+
225
+ **Never skip a round.** If a round finds nothing because earlier rounds already fixed it, explicitly record "this round 0 issues" — do not silently skip.
226
+
227
+ **Round 11+ optional**: only when a previous fix triggered a new violation (e.g. tightening clipped some text); add rounds until stable.
228
+
229
+ ### 2.11 Quick-rollback conditions
230
+
231
+ During iteration, if **any** of the following holds, roll back to §2.4 default blueprint and redraw; do not keep tuning:
232
+
233
+ - 3 rounds of compactness work still cannot hit the §2.9 numbers.
234
+ - The main illustration is essentially stacked text, not graphics.
235
+ - viewBox shrinking always leaves large whitespace → usually caused by forced panel-equal-height (§2.9 trap).
236
+
237
+ ## 3. File locations
238
+
239
+ - If the repo has `figures/` or `dist/figures/` → output there.
240
+ - Otherwise put the files next to the paper source.
241
+ - Default artifacts:
242
+ - `method_architecture.html` (main figure, inline SVG)
243
+ - `method_architecture.svg` (homologous standalone SVG, optional)
244
+ - `<paper>_components.py` (script generating Python sub-figures)
245
+ - `comp_*.svg` (Python sub-figure source files, for later editing)
246
+ - The main HTML does NOT go online (MathJax / KaTeX CDN is the only exception).
247
+
248
+ ## 4. Completion checklist
249
+
250
+ Confirm every item. Any miss → continue iterating:
251
+
252
+ - [ ] Single HTML file opens directly; pure white background; inline SVG dominates
253
+ - [ ] ≥ 3 real glyphs (heatmap / histogram / boxplot / scatter / matrix), with **≥ 1 Python-generated** if numerical
254
+ - [ ] Each main mechanism panel has a main illustration, NOT just title + equation + cards
255
+ - [ ] Python sub-figures use `svg.fonttype="none"`; text is editable
256
+ - [ ] Python sub-figures use the same palette family as the main figure; no matplotlib defaults
257
+ - [ ] **Compactness (§2.9)**: panel whitespace ≤ 15%, main illustration ≥ 65% of panel, cross-panel gap 20–40 px
258
+ - [ ] **Panels NOT forced equal-height**: Input/Output 200–300 px shorter than mechanism panels
259
+ - [ ] **viewBox aspect ratio 0.45–0.65** (single-row 4-panel); top/bottom padding ≤ 30 px
260
+ - [ ] Equations sit in dedicated slots, aligned with their module; not floating; not crushed by codebook / arrow / badge
261
+ - [ ] LaTeX equations render correctly (MathJax / KaTeX); no ASCII pseudo-equations
262
+ - [ ] Font sizes meet the floor: 26 / 22 / 18 / 16 / 14
263
+ - [ ] All English labels; single-component description ≤ 10 words
264
+ - [ ] 1–2 core contribution modules highlighted (restrained: no high saturation, no big badges)
265
+ - [ ] Palette: single family, 3–5 shades; **no gradients / shadows / glow / glass**
266
+ - [ ] Main-flow arrows do not cross text / equations / badges; long paths follow outer edges
267
+ - [ ] Input ↔ Output reuse a graphic motif or form a sensible before/after contrast
268
+ - [ ] No top stage-label / phase-label; no bottom caption / problem statement / footer text
269
+ - [ ] No right-side KPI column / result-card stack / dashboard / poster summary strip
270
+ - [ ] Does not resemble SmartArt / PowerPoint / web component / product banner
271
+ - [ ] **≥ 10 rounds of self-check completed (§2.10)**, with a PNG screenshot and 3 spot notes per round
272
+ - [ ] Every arrow maps to a real data-flow / control-flow / supervision signal
273
+ - [ ] No invented modules / losses / feedback loops absent from the paper
274
+
275
+ ## 5. Delivery contract
276
+
277
+ 1. Single HTML file as the main figure (inline-SVG-dominant).
278
+ 2. `<paper>_components.py`: script generating all Python numerical / geometric sub-figures.
279
+ 3. `comp_*.svg`: Python sub-figure source files (so the user can edit later).
280
+ 4. Optional: standalone `method_architecture.svg`.
281
+ 5. Brief notes: main flow, auxiliary branches, Python sub-figure manifest, source files referenced.
282
+ 6. When ambiguity remains, **explicitly flag the undefined modules**; do not invent.
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "paper-architecture-web-drawing",
3
+ "description": "Use when the user wants a paper's abstract + method turned into a publication-ready architecture diagram, rendered as a single HTML file with inline SVG (and Python-generated SVG sub-figures for heatmaps / distributions / scatter / matrices). Triggers on: \"架构图\", \"结构图\", \"method figure\", \"overview figure\", \"pipeline diagram\", \"draw methodology\", \"网页绘图\". Enforces compactness numerics and a mandatory 10-round self-check loop. Do NOT use for line / bar / scatter data plots, posters, art, pure Mermaid sketches, or before the method is settled.",
4
+ "entry": "SKILL.md"
5
+ }