@research-copilot/plugin 1.1.15 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/.claude-plugin/plugin.json +3 -2
  2. package/dist/.codex-plugin/plugin.toml +2 -1
  3. package/dist/.cursor-plugin/plugin.json +3 -2
  4. package/dist/.gemini-plugin/plugin.json +3 -2
  5. package/dist/.opencode-plugin/plugin.json +3 -2
  6. package/dist/.windsurf-plugin/plugin.json +3 -2
  7. package/dist/agents/copilot-conductor.agent.md +60 -0
  8. package/dist/agents/copilot-experiment.agent.md +56 -0
  9. package/dist/agents/copilot-ideation.agent.md +45 -0
  10. package/dist/agents/copilot-literature.agent.md +34 -0
  11. package/dist/agents/copilot-polisher.agent.md +30 -0
  12. package/dist/agents/copilot-rebuttal.agent.md +35 -0
  13. package/dist/agents/copilot-reviewer.agent.md +35 -0
  14. package/dist/agents/copilot-writer.agent.md +39 -0
  15. package/dist/hooks/dispatch-reminder.json +17 -0
  16. package/dist/hooks/loop-armer.json +17 -0
  17. package/dist/hooks/research-copilot-guard.hook.md +51 -0
  18. package/dist/hooks/scientist-guardrails.json +17 -0
  19. package/dist/hooks/scripts/__tests__/__init__.py +0 -0
  20. package/dist/hooks/scripts/__tests__/test_post_tool_loop_armer.py +88 -0
  21. package/dist/hooks/scripts/__tests__/test_research_copilot_guard_main_session.py +150 -0
  22. package/dist/hooks/scripts/__tests__/test_session_start_memory_injector.py +66 -0
  23. package/dist/hooks/scripts/__tests__/test_user_prompt_dispatch_reminder.py +37 -0
  24. package/dist/hooks/scripts/_copilot_hook_lib.py +564 -0
  25. package/dist/hooks/scripts/copilot_subagent_stop.py +203 -0
  26. package/dist/hooks/scripts/copilot_write_guard.py +96 -0
  27. package/dist/hooks/scripts/post_tool_loop_armer.py +61 -0
  28. package/dist/hooks/scripts/research_copilot_guard.py +208 -0
  29. package/dist/hooks/scripts/scientist_guardrails.py +29 -0
  30. package/dist/hooks/scripts/session_start_memory_injector.py +188 -0
  31. package/dist/hooks/scripts/user_prompt_dispatch_reminder.py +40 -0
  32. package/dist/hooks/session-memory-injector.json +17 -0
  33. package/dist/hooks/tests/__init__.py +0 -0
  34. package/dist/hooks/tests/conftest.py +61 -0
  35. package/dist/hooks/tests/fixtures/transcript_copilot_experiment_complete.jsonl +2 -0
  36. package/dist/hooks/tests/fixtures/transcript_copilot_experiment_state_jump.jsonl +2 -0
  37. package/dist/hooks/tests/fixtures/transcript_copilot_literature.jsonl +2 -0
  38. package/dist/hooks/tests/fixtures/transcript_main_only.jsonl +2 -0
  39. package/dist/hooks/tests/fixtures/transcript_malformed_state_output.jsonl +2 -0
  40. package/dist/hooks/tests/integration_run.ps1 +65 -0
  41. package/dist/hooks/tests/test_copilot_hook_lib.py +398 -0
  42. package/dist/hooks/tests/test_copilot_subagent_stop.py +186 -0
  43. package/dist/hooks/tests/test_copilot_write_guard.py +137 -0
  44. package/dist/hooks/tests/test_session_start_snapshot.py +116 -0
  45. package/dist/hooks/tests/test_state_machine_consistency.py +75 -0
  46. package/dist/skills/arxivsub-skill/SKILL.md +98 -0
  47. package/dist/skills/arxivsub-skill/skill.json +5 -0
  48. package/dist/skills/de-ai-checker/SKILL.md +110 -0
  49. package/dist/skills/de-ai-checker/skill.json +5 -0
  50. package/dist/skills/deep-interview/SKILL.md +91 -0
  51. package/dist/skills/deep-interview/skill.json +5 -0
  52. package/dist/skills/grill-with-docs/SKILL.md +120 -0
  53. package/dist/skills/grill-with-docs/skill.json +5 -0
  54. package/dist/skills/init-mcp/SKILL.md +83 -0
  55. package/dist/skills/init-mcp/skill.json +5 -0
  56. package/dist/skills/model-escalation/SKILL.md +93 -0
  57. package/dist/skills/model-escalation/skill.json +5 -0
  58. package/dist/skills/paper-architecture-web-drawing/SKILL.md +282 -0
  59. package/dist/skills/paper-architecture-web-drawing/skill.json +5 -0
  60. package/dist/skills/paper-deai/SKILL.md +53 -0
  61. package/dist/skills/paper-deai/skill.json +5 -0
  62. package/dist/skills/paper-en2zh/SKILL.md +29 -0
  63. package/dist/skills/paper-en2zh/skill.json +5 -0
  64. package/dist/skills/paper-expand/SKILL.md +43 -0
  65. package/dist/skills/paper-expand/skill.json +5 -0
  66. package/dist/skills/paper-experiment-analysis/SKILL.md +38 -0
  67. package/dist/skills/paper-experiment-analysis/skill.json +5 -0
  68. package/dist/skills/paper-figure-caption/SKILL.md +29 -0
  69. package/dist/skills/paper-figure-caption/skill.json +5 -0
  70. package/dist/skills/paper-logic-check/SKILL.md +30 -0
  71. package/dist/skills/paper-logic-check/skill.json +5 -0
  72. package/dist/skills/paper-polish/SKILL.md +34 -305
  73. package/dist/skills/paper-polish/skill.json +5 -0
  74. package/dist/skills/paper-review/SKILL.md +49 -0
  75. package/dist/skills/paper-review/skill.json +5 -0
  76. package/dist/skills/paper-sanity-check/SKILL.md +122 -0
  77. package/dist/skills/paper-sanity-check/skill.json +5 -0
  78. package/dist/skills/paper-shorten/SKILL.md +42 -0
  79. package/dist/skills/paper-shorten/skill.json +5 -0
  80. package/dist/skills/paper-table-caption/SKILL.md +29 -0
  81. package/dist/skills/paper-table-caption/skill.json +5 -0
  82. package/dist/skills/paper-translate/SKILL.md +48 -0
  83. package/dist/skills/paper-translate/skill.json +5 -0
  84. package/dist/skills/plugin-dev-agent-development/SKILL.md +95 -0
  85. package/dist/skills/plugin-dev-agent-development/skill.json +5 -0
  86. package/dist/skills/research-workflow/SKILL.md +116 -0
  87. package/dist/skills/research-workflow/skill.json +5 -0
  88. package/dist/skills/scientist-experiment-runner/SKILL.md +76 -0
  89. package/dist/skills/scientist-experiment-runner/skill.json +5 -0
  90. package/dist/skills/scientist-ideation/SKILL.md +52 -0
  91. package/dist/skills/scientist-ideation/skill.json +5 -0
  92. package/dist/skills/scientist-plotting/SKILL.md +49 -0
  93. package/dist/skills/scientist-plotting/skill.json +5 -0
  94. package/dist/skills/scientist-review/SKILL.md +40 -0
  95. package/dist/skills/scientist-review/skill.json +5 -0
  96. package/dist/skills/scientist-runtime-init/SKILL.md +46 -0
  97. package/dist/skills/scientist-runtime-init/skill.json +5 -0
  98. package/dist/skills/scientist-writeup/SKILL.md +60 -0
  99. package/dist/skills/scientist-writeup/skill.json +5 -0
  100. package/dist/skills/talk-normal/SKILL.md +73 -0
  101. package/dist/skills/talk-normal/skill.json +5 -0
  102. package/package.json +1 -1
  103. package/dist/agents/rc-experiment.md +0 -203
  104. package/dist/agents/rc-ideation.md +0 -224
  105. package/dist/agents/rc-literature.md +0 -228
  106. package/dist/agents/rc-plan.md +0 -189
  107. package/dist/agents/rc-polisher.md +0 -166
  108. package/dist/agents/rc-rebuttal.md +0 -194
  109. package/dist/agents/rc-reviewer.md +0 -187
  110. package/dist/agents/rc-update-spec.md +0 -231
  111. package/dist/agents/rc-verify.md +0 -234
  112. package/dist/agents/rc-writer.md +0 -161
  113. package/dist/skills/experiment-design/SKILL.md +0 -331
  114. package/dist/skills/full-research-workflow/SKILL.md +0 -363
  115. package/dist/skills/literature-search/SKILL.md +0 -244
  116. package/dist/skills/sanity-check/SKILL.md +0 -449
  117. package/dist/skills/submission-sprint/SKILL.md +0 -361
@@ -0,0 +1,203 @@
1
+ """SubagentStop hook: HANDOFF freshness (HARD), STATE_OUTPUT 6-field (SOFT),
2
+ state-machine no-jump (SOFT).
3
+
4
+ Decision contract:
5
+ - block: {"decision": "block", "reason": "..."}
6
+ - allow: {"hookSpecificOutput": {"permissionDecision": "allow"}}
7
+
8
+ 3-strike fuse: if CHECK 1 fails 3 times for the same (agent, file), the hook
9
+ releases the 3rd attempt with [HARD/RELEASE] to avoid lockout.
10
+
11
+ Falls open via safe_main().
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import sys
17
+ from pathlib import Path
18
+
19
+ import _copilot_hook_lib as lib
20
+
21
+
22
+ HANDOFF_FILES: dict[str, list[str]] = {
23
+ # research-copilot retired as a sub-agent (now the main-session conductor);
24
+ # state.md/decisions.md freshness is a CONDUCTOR-PROTOCOL standing order,
25
+ # not SubagentStop-enforced (the main session never fires SubagentStop).
26
+ "copilot-literature": ["literature.md"],
27
+ "copilot-ideation": ["ideas.md"],
28
+ "copilot-experiment": ["experiments.md"],
29
+ # writer/polisher/reviewer/rebuttal write handoff.md (append-only multi-writer)
30
+ # — not subject to HARD freshness; SOFT checks only.
31
+ "copilot-writer": [],
32
+ "copilot-polisher": [],
33
+ "copilot-reviewer": [],
34
+ "copilot-rebuttal": [],
35
+ }
36
+
37
+
38
+ def _file_handoff_last_updated(workspace: Path, filename: str) -> str | None:
39
+ f = workspace / ".copilot" / filename
40
+ if not f.is_file():
41
+ return None
42
+ try:
43
+ text = f.read_text(encoding="utf-8", errors="replace")
44
+ except OSError:
45
+ return None
46
+ h = lib.extract_handoff(text)
47
+ return h.get("last_updated") if h else None
48
+
49
+
50
+ def _iso_strictly_later(current: str | None, snapshot: str | None) -> bool:
51
+ """ISO 8601 strings sort lexicographically. None snapshot ('first boot')
52
+ is treated as 'any current value counts as later'."""
53
+ if not current:
54
+ return False
55
+ if snapshot is None:
56
+ return True
57
+ return current > snapshot
58
+
59
+
60
+ def _check_handoff_freshness(workspace: Path, agent: str
61
+ ) -> tuple[str, str, str | None]:
62
+ """Returns (status, message, file_that_failed).
63
+ status in {PASS, HARD_FAIL, SOFT_FAIL}.
64
+
65
+ HARD_FAIL = stale/missing handoff AND a snapshot file exists to compare against.
66
+ SOFT_FAIL = no snapshot file exists at all (first boot / hook reenabled).
67
+ PASS = all owned files have fresh handoff blocks.
68
+ """
69
+ files = HANDOFF_FILES.get(agent, [])
70
+ if not files:
71
+ return "PASS", "", None
72
+ snapshot_path = workspace / ".copilot" / lib.SNAPSHOT_NAME
73
+ snapshot_exists = snapshot_path.is_file()
74
+ snapshot = lib.read_snapshot(workspace)
75
+ for fname in files:
76
+ cur = _file_handoff_last_updated(workspace, fname)
77
+ snap = snapshot.get(fname)
78
+ if _iso_strictly_later(cur, snap):
79
+ continue
80
+ msg = (f"{agent} did not update .copilot/{fname} __HANDOFF__ "
81
+ f"block this session. Append/refresh the block and "
82
+ f"re-emit STATE_OUTPUT before exiting.")
83
+ return ("SOFT_FAIL" if not snapshot_exists else "HARD_FAIL"), msg, fname
84
+ return "PASS", "", None
85
+
86
+
87
+ def _read_last_assistant_text(transcript_path: str) -> str:
88
+ """Return the concatenated text content of the most recent assistant
89
+ message in the transcript JSONL. Empty string if none found.
90
+ """
91
+ if not transcript_path:
92
+ return ""
93
+ p = Path(transcript_path)
94
+ if not p.is_file():
95
+ return ""
96
+ try:
97
+ lines = p.read_text(encoding="utf-8", errors="replace").splitlines()
98
+ except OSError:
99
+ return ""
100
+ for line in reversed(lines[-100:]):
101
+ if not line.strip():
102
+ continue
103
+ try:
104
+ entry = json.loads(line)
105
+ except json.JSONDecodeError:
106
+ continue
107
+ if entry.get("role") != "assistant":
108
+ continue
109
+ chunks: list[str] = []
110
+ content = entry.get("content")
111
+ if isinstance(content, list):
112
+ for item in content:
113
+ if isinstance(item, dict) and item.get("type") == "text":
114
+ chunks.append(item.get("text", ""))
115
+ elif isinstance(content, str):
116
+ chunks.append(content)
117
+ if chunks:
118
+ return "\n".join(chunks)
119
+ return ""
120
+
121
+
122
+ def _run_soft_checks(workspace: Path, agent: str, transcript: str) -> None:
123
+ """CHECK 3 (STATE_OUTPUT 6 fields) + CHECK 4 (state transition legality).
124
+ Both are SOFT — append to violations.log but never block."""
125
+ text = _read_last_assistant_text(transcript)
126
+ so = lib.extract_state_output(text)
127
+ missing = lib.state_output_missing_fields(so)
128
+ if missing:
129
+ if so is None:
130
+ lib.log_violation(workspace, "SOFT", "WARN", agent,
131
+ "STATE_OUTPUT block absent from final reply")
132
+ else:
133
+ lib.log_violation(workspace, "SOFT", "WARN", agent,
134
+ f"STATE_OUTPUT missing fields: {missing}")
135
+ if so:
136
+ prev, curr = so.get("Previous"), so.get("Current")
137
+ if prev and curr and not lib.is_transition_legal(agent, prev, curr):
138
+ lib.log_violation(workspace, "SOFT", "WARN", agent,
139
+ f"transition {prev} -> {curr} not in allowed "
140
+ f"set {lib.STATE_MACHINE.get(agent, {}).get(prev, [])}")
141
+
142
+
143
+ def real_main() -> int:
144
+ raw = sys.stdin.read()
145
+ if not raw.strip():
146
+ print(json.dumps(lib.allow_decision()))
147
+ return 0
148
+ try:
149
+ payload = json.loads(raw)
150
+ except json.JSONDecodeError:
151
+ print(json.dumps(lib.allow_decision()))
152
+ return 0
153
+
154
+ agent = lib.detect_active_agent(payload.get("transcript_path", ""))
155
+ if not lib.is_copilot_agent(agent):
156
+ print(json.dumps(lib.allow_decision()))
157
+ return 0
158
+
159
+ workspace = Path.cwd()
160
+
161
+ if lib.env_guard_disabled():
162
+ lib.log_violation(workspace, "INFO", "DISABLED", agent,
163
+ "SubagentStop guard bypassed by env var")
164
+ print(json.dumps(lib.allow_decision()))
165
+ return 0
166
+
167
+ if lib.override_match(workspace, agent, "skip-handoff-check"):
168
+ lib.log_violation(workspace, "INFO", "OVERRIDE", agent,
169
+ "skip-handoff-check active")
170
+ print(json.dumps(lib.allow_decision()))
171
+ return 0
172
+
173
+ # CHECK 1 — HARD freshness with 3-strike fuse, or SOFT degrade at first boot
174
+ status, fail_msg, fail_file = _check_handoff_freshness(workspace, agent)
175
+ if status == "HARD_FAIL":
176
+ n = lib.counter_inc(workspace, agent, fail_file)
177
+ if n < 3:
178
+ lib.log_violation(workspace, "HARD", "BLOCK", agent,
179
+ f"{fail_msg} (strike {n}/3)", file=fail_file)
180
+ print(json.dumps(lib.block_decision(fail_msg)))
181
+ return 0
182
+ lib.log_violation(workspace, "HARD", "RELEASE", agent,
183
+ "3-strike fuse triggered, releasing", file=fail_file)
184
+ lib.counter_reset(workspace, agent, fail_file)
185
+ print(json.dumps(lib.allow_decision()))
186
+ return 0
187
+
188
+ if status == "SOFT_FAIL":
189
+ lib.log_violation(workspace, "INFO", "NO-SNAPSHOT", agent,
190
+ f"{fail_msg} (degraded: no .session_snapshot.json)",
191
+ file=fail_file)
192
+
193
+ if status == "PASS":
194
+ lib.counter_reset_all(workspace, agent)
195
+
196
+ # CHECK 3+4 SOFT — never block
197
+ _run_soft_checks(workspace, agent, payload.get("transcript_path", ""))
198
+ print(json.dumps(lib.allow_decision()))
199
+ return 0
200
+
201
+
202
+ if __name__ == "__main__":
203
+ raise SystemExit(lib.safe_main(real_main))
@@ -0,0 +1,96 @@
1
+ """PreToolUse hook: enforce owned-file partition (PIPELINE-OS §8).
2
+
3
+ When the active sub-agent is copilot-*, denies Write/Edit to non-owned
4
+ artifacts. Paths outside the research-artifact universe (.copilot/,
5
+ sections/*.tex, references.bib) are unconditionally allowed.
6
+
7
+ handoff.md special case (Task 10) is added in the next commit.
8
+
9
+ Falls open on any exception via safe_main().
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ import _copilot_hook_lib as lib
18
+
19
+
20
+ def real_main() -> int:
21
+ raw = sys.stdin.read()
22
+ if not raw.strip():
23
+ print(json.dumps(lib.allow_decision()))
24
+ return 0
25
+ try:
26
+ payload = json.loads(raw)
27
+ except json.JSONDecodeError:
28
+ print(json.dumps(lib.allow_decision()))
29
+ return 0
30
+
31
+ agent = lib.detect_active_agent(payload.get("transcript_path", ""))
32
+ if not lib.is_copilot_agent(agent):
33
+ print(json.dumps(lib.allow_decision()))
34
+ return 0
35
+
36
+ workspace = Path.cwd()
37
+
38
+ if lib.env_guard_disabled():
39
+ lib.log_violation(workspace, "INFO", "DISABLED", agent,
40
+ "guard bypassed by env var")
41
+ print(json.dumps(lib.allow_decision()))
42
+ return 0
43
+
44
+ if lib.override_match(workspace, agent, "skip-owned-check"):
45
+ lib.log_violation(workspace, "INFO", "OVERRIDE", agent,
46
+ "skip-owned-check active")
47
+ print(json.dumps(lib.allow_decision()))
48
+ return 0
49
+
50
+ file_path = str((payload.get("tool_input") or {}).get("file_path", ""))
51
+ if not file_path:
52
+ print(json.dumps(lib.allow_decision()))
53
+ return 0
54
+
55
+ norm = lib.normalize_path(file_path, workspace=workspace)
56
+
57
+ # handoff.md special case: append-only for the 4 multi-writers.
58
+ if norm.endswith(".copilot/handoff.md"):
59
+ tool_name = payload.get("tool_name", "")
60
+ if agent in lib.HANDOFF_APPEND_ONLY_AGENTS:
61
+ if tool_name == "Write":
62
+ lib.log_violation(workspace, "HARD", "DENY", agent,
63
+ "Write (overwrite) to handoff.md; "
64
+ "use Edit to append", file=norm)
65
+ msg = ("Blocked by copilot-write-guard: handoff.md is "
66
+ "append-only. Use Edit to add a new block, not Write.")
67
+ print(json.dumps(lib.deny_decision(msg)))
68
+ return 0
69
+ # Edit allowed for these 4 agents — fall through to owned check
70
+ else:
71
+ lib.log_violation(workspace, "HARD", "DENY", agent,
72
+ "agent has no write right to handoff.md",
73
+ file=norm)
74
+ msg = (f"Blocked by copilot-write-guard: {agent} is not an "
75
+ f"owner of handoff.md.")
76
+ print(json.dumps(lib.deny_decision(msg)))
77
+ return 0
78
+
79
+ if lib.is_owned(agent, norm):
80
+ print(json.dumps(lib.allow_decision()))
81
+ return 0
82
+
83
+ if lib.is_known_research_artifact(norm):
84
+ lib.log_violation(workspace, "HARD", "DENY", agent,
85
+ "writing to non-owned artifact", file=norm)
86
+ msg = (f"Blocked by copilot-write-guard: {agent} may not write "
87
+ f"{norm}. See PIPELINE-OS §8.")
88
+ print(json.dumps(lib.deny_decision(msg)))
89
+ return 0
90
+
91
+ print(json.dumps(lib.allow_decision()))
92
+ return 0
93
+
94
+
95
+ if __name__ == "__main__":
96
+ raise SystemExit(lib.safe_main(real_main))
@@ -0,0 +1,61 @@
1
+ """PostToolUse hook: detect long background experiments and recommend
2
+ arming a CronCreate-based self-poll so the main session continues after
3
+ notifications. Sets `.copilot/.loop-armed` to avoid duplicate suggestions."""
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ import re
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ LONGRUN_PATTERNS = (
12
+ re.compile(r"\btrain(\.py|_)"),
13
+ re.compile(r"\bmain\.py\b"),
14
+ re.compile(r"\bai_scientist\b"),
15
+ re.compile(r"\btorchrun\b"),
16
+ re.compile(r"\bdeepspeed\b"),
17
+ re.compile(r"\bexperiments?/"),
18
+ re.compile(r"\baccelerate launch\b"),
19
+ )
20
+
21
+
22
+ def should_arm(event: dict) -> bool:
23
+ if event.get("tool_name") != "Bash":
24
+ return False
25
+ inp = event.get("tool_input") or {}
26
+ if not inp.get("run_in_background"):
27
+ return False
28
+ cmd = inp.get("command", "") or ""
29
+ return any(p.search(cmd) for p in LONGRUN_PATTERNS)
30
+
31
+
32
+ def main() -> int:
33
+ try:
34
+ raw = sys.stdin.read()
35
+ event = json.loads(raw) if raw.strip() else {}
36
+ except json.JSONDecodeError:
37
+ return 0
38
+
39
+ if not should_arm(event):
40
+ return 0
41
+
42
+ flag = Path.cwd() / ".copilot" / ".loop-armed"
43
+ if flag.exists():
44
+ return 0
45
+
46
+ sys.stdout.write(
47
+ "[loop-armer] Detected long-running background experiment.\n"
48
+ "[loop-armer] Recommend arming a self-poll so the loop continues across notifications:\n"
49
+ " CronCreate(cron=\"*/3 * * * *\", prompt=\"<<autonomous-loop>>\", recurring=true, durable=false)\n"
50
+ "[loop-armer] Or the user can paste:\n"
51
+ " /loop 1m If a background experiment task is still running, check its log tail and decide next step. Otherwise, delete this scheduled task.\n"
52
+ "[loop-armer] On EXECUTING -> END the agent MUST CronDelete the returned id and remove .copilot/.loop-armed.\n"
53
+ )
54
+ sys.stdout.flush()
55
+ flag.parent.mkdir(parents=True, exist_ok=True)
56
+ flag.write_text("", encoding="utf-8")
57
+ return 0
58
+
59
+
60
+ if __name__ == "__main__":
61
+ raise SystemExit(main())
@@ -0,0 +1,208 @@
1
+ """Research Copilot Workflow Guard hook (PreToolUse).
2
+
3
+ Polices the MAIN SESSION acting as conductor. The main session must delegate
4
+ domain work to copilot-* sub-agents and must publish a TaskCreate plan list
5
+ before dispatching. copilot-* sub-agents run freely (exempt).
6
+
7
+ Origin attribution uses the authoritative `agent_id` payload field: it is
8
+ present ONLY inside a sub-agent call, so its absence => main session. Any
9
+ ambiguity resolves to main (conservative — never silently exempt the conductor).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ import sys
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ READ_ONLY_PREFIXES = ("grep", "cat", "ls", "head", "tail", "find",
20
+ "Get-Content", "Select-String", "Get-ChildItem")
21
+ EXPERIMENT_KEYWORDS = ("train.py", "run_experiment", "wandb", "mlflow",
22
+ "tensorboard", "torchrun", "deepspeed", "accelerate")
23
+ EXPERIMENT_REGEX = re.compile(r"python[\w\s.-]*\b(train|experiment|run_exp)\b",
24
+ re.IGNORECASE)
25
+ RESEARCH_MCP_PREFIXES = (
26
+ "mcp__arxiv-search__",
27
+ "mcp__arxivsub-search__",
28
+ "mcp__google-scholar__",
29
+ "mcp__dblp-bib__",
30
+ )
31
+ # Conductor-owned artifacts: the main session MAY write these.
32
+ CONDUCTOR_OWNED_ARTIFACTS = (".copilot/state.md", ".copilot/decisions.md")
33
+ # Delegated artifact files the main session must NOT write. Matched segment-anchored
34
+ # (see _path_matches) so 'references.bib' does NOT match 'old_references.bib'. The
35
+ # sections/*.tex case is handled separately by a path-segment check.
36
+ DELEGATED_ARTIFACT_FILES = (".copilot/ideas.md", ".copilot/experiments.md",
37
+ ".copilot/literature.md", "references.bib")
38
+ READ_ONLY_TOOLS = ("Read", "Grep", "Glob", "TaskCreate", "TaskUpdate",
39
+ "TaskList", "TaskGet", "Skill", "AskUserQuestion")
40
+ COPILOT_SUBAGENT_PREFIX = "copilot-"
41
+
42
+
43
+ def allow() -> dict[str, Any]:
44
+ return {"hookSpecificOutput": {"permissionDecision": "allow"}}
45
+
46
+
47
+ def deny(message: str) -> dict[str, Any]:
48
+ return {"hookSpecificOutput": {"permissionDecision": "deny",
49
+ "permissionDecisionReason": message},
50
+ "systemMessage": message}
51
+
52
+
53
+ def is_main_session(payload: dict[str, Any]) -> bool:
54
+ """Main session iff `agent_id` absent/empty (per Claude Code hooks docs)."""
55
+ return not payload.get("agent_id")
56
+
57
+
58
+ def is_exempt_subagent(payload: dict[str, Any]) -> bool:
59
+ if is_main_session(payload):
60
+ return False
61
+ return str(payload.get("agent_type") or "").startswith(COPILOT_SUBAGENT_PREFIX)
62
+
63
+
64
+ def is_read_only(command: str) -> bool:
65
+ stripped = command.strip()
66
+ return any(stripped.startswith(prefix) for prefix in READ_ONLY_PREFIXES)
67
+
68
+
69
+ def _norm(path: str) -> str:
70
+ return str(path).replace("\\", "/")
71
+
72
+
73
+ def _path_matches(path: str, target: str) -> bool:
74
+ """True iff `path` equals `target` or ends with `/target` (segment-anchored).
75
+ Prevents substring false-positives like 'references.bib' matching
76
+ 'old_references.bib', or '.copilot/ideas.md' matching an unrelated path."""
77
+ p = _norm(path)
78
+ return p == target or p.endswith("/" + target)
79
+
80
+
81
+ def _iter_transcript_tool_uses(transcript_path: str | None):
82
+ if not transcript_path:
83
+ return
84
+ p = Path(transcript_path)
85
+ if not p.is_file():
86
+ return
87
+ try:
88
+ text = p.read_text(encoding="utf-8", errors="replace")
89
+ except OSError:
90
+ return
91
+ for line in text.splitlines():
92
+ line = line.strip()
93
+ if not line:
94
+ continue
95
+ try:
96
+ rec = json.loads(line)
97
+ except json.JSONDecodeError:
98
+ continue
99
+ if isinstance(rec, dict) and rec.get("type") == "tool_use":
100
+ yield {"name": rec.get("name", ""), "input": rec.get("input", {}) or {}}
101
+ continue
102
+ content = None
103
+ if isinstance(rec, dict):
104
+ content = rec.get("content")
105
+ if content is None:
106
+ msg = rec.get("message")
107
+ if isinstance(msg, dict):
108
+ content = msg.get("content")
109
+ if isinstance(content, list):
110
+ for item in content:
111
+ if isinstance(item, dict) and item.get("type") == "tool_use":
112
+ yield {"name": item.get("name", ""),
113
+ "input": item.get("input", {}) or {}}
114
+
115
+
116
+ def check_m1_delegation(tool_name: str, tool_input: dict[str, Any]) -> str | None:
117
+ """M1 delegation gate: deny main-session execution-class work."""
118
+ # Experiment scripts via shell.
119
+ if tool_name in ("Bash", "PowerShell"):
120
+ command = str((tool_input or {}).get("command", ""))
121
+ if not command or is_read_only(command):
122
+ return None
123
+ if any(kw in command for kw in EXPERIMENT_KEYWORDS) or EXPERIMENT_REGEX.search(command):
124
+ return ("Blocked by research-copilot-guard (M1 delegation gate): the "
125
+ "conductor must not run experiments inline. Delegate via "
126
+ "Agent(subagent_type='copilot-experiment').")
127
+ return None
128
+ # Paper-retrieval MCP tools.
129
+ if any(tool_name.startswith(p) for p in RESEARCH_MCP_PREFIXES):
130
+ return ("Blocked by research-copilot-guard (M1 delegation gate): the "
131
+ "conductor must not search papers inline. Delegate via "
132
+ "Agent(subagent_type='copilot-literature').")
133
+ # Writes to delegated research artifacts (segment-anchored, not substring).
134
+ if tool_name in ("Write", "Edit"):
135
+ path = _norm((tool_input or {}).get("file_path", ""))
136
+ if any(_path_matches(path, owned) for owned in CONDUCTOR_OWNED_ARTIFACTS):
137
+ return None # conductor owns state.md / decisions.md
138
+ segments = path.split("/")
139
+ is_sections_tex = "sections" in segments and path.endswith(".tex")
140
+ if is_sections_tex or any(_path_matches(path, f) for f in DELEGATED_ARTIFACT_FILES):
141
+ return ("Blocked by research-copilot-guard (M1 delegation gate): the "
142
+ "conductor must not write research artifacts (sections/*.tex, "
143
+ "references.bib, .copilot/{ideas,experiments,literature}.md) "
144
+ "inline. Delegate to the matching copilot-* sub-agent.")
145
+ return None
146
+
147
+
148
+ def check_m2_task_list(tool_name: str, tool_input: dict[str, Any],
149
+ transcript_path: str | None) -> str | None:
150
+ """M2 task-list gate: deny copilot-* dispatch with no TaskCreate this turn."""
151
+ if tool_name != "Agent":
152
+ return None
153
+ sub_type = str((tool_input or {}).get("subagent_type", ""))
154
+ if not sub_type.startswith(COPILOT_SUBAGENT_PREFIX):
155
+ return None
156
+ if not transcript_path:
157
+ return None # fail-open: cannot inspect
158
+ for entry in _iter_transcript_tool_uses(transcript_path):
159
+ if entry["name"] == "TaskCreate":
160
+ return None
161
+ return ("Blocked by research-copilot-guard (M2 task-list gate): dispatching "
162
+ "a copilot-* sub-agent requires a TaskCreate plan list (one task per "
163
+ "planned dispatch) in this turn. Call TaskCreate first, then Agent().")
164
+
165
+
166
+ def main() -> int:
167
+ raw = sys.stdin.read()
168
+ if not raw:
169
+ print(json.dumps(allow()))
170
+ return 0
171
+ try:
172
+ payload = json.loads(raw)
173
+ except json.JSONDecodeError:
174
+ print(json.dumps(allow()))
175
+ return 0
176
+ try:
177
+ decision = _decide(payload)
178
+ except Exception:
179
+ # Fail-open: any unexpected error yields allow, never traps the user
180
+ # (mirrors _copilot_hook_lib.safe_main's contract).
181
+ import traceback
182
+ sys.stderr.write(traceback.format_exc())
183
+ decision = allow()
184
+ print(json.dumps(decision))
185
+ return 0
186
+
187
+
188
+ def _decide(payload: dict[str, Any]) -> dict[str, Any]:
189
+ """Pure decision logic for a parsed payload. Raising is safe — main()
190
+ catches and fails open."""
191
+ # Exempt copilot-* sub-agents outright (they run experiments/searches/writes).
192
+ if is_exempt_subagent(payload):
193
+ return allow()
194
+ # Everything else (incl. ambiguous) is treated as MAIN SESSION -> police.
195
+ tool_name = payload.get("tool_name", "")
196
+ tool_input = payload.get("tool_input", {}) or {}
197
+ if tool_name in READ_ONLY_TOOLS:
198
+ return allow()
199
+ transcript_path = payload.get("transcript_path")
200
+ for check in (check_m1_delegation(tool_name, tool_input),
201
+ check_m2_task_list(tool_name, tool_input, transcript_path)):
202
+ if check:
203
+ return deny(check)
204
+ return allow()
205
+
206
+
207
+ if __name__ == "__main__":
208
+ raise SystemExit(main())
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import sys
5
+
6
+
7
+ def main() -> int:
8
+ workspace = Path.cwd()
9
+ packaged_runtime = workspace / ".github" / "runtimes" / "scientist-support" / "runtime"
10
+ source_runtime = workspace / "self" / "runtimes" / "scientist-support" / "runtime"
11
+ lines = [
12
+ "[scientist-guardrails] AI Scientist workflow loaded.",
13
+ "[scientist-guardrails] Upstream defaults assume Linux + CUDA and may not fully run on Windows hosts.",
14
+ "[scientist-guardrails] AI Scientist executes LLM-written code; prefer a container or sandbox before running full experiments.",
15
+ ]
16
+ if packaged_runtime.exists():
17
+ lines.append(f"[scientist-guardrails] Detected skill runtime: {packaged_runtime}")
18
+ elif source_runtime.exists():
19
+ lines.append(f"[scientist-guardrails] Detected skill runtime: {source_runtime}")
20
+ else:
21
+ lines.append("[scientist-guardrails] Scientist-support runtime was not found in the workspace.")
22
+ lines.append("[scientist-guardrails] Recommended first action: run scientist runtime validation before ideation or experiment launch.")
23
+ sys.stdout.write("\n".join(lines) + "\n")
24
+ sys.stdout.flush()
25
+ return 0
26
+
27
+
28
+ if __name__ == "__main__":
29
+ raise SystemExit(main())