@research-copilot/plugin 1.1.15 → 1.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.claude-plugin/plugin.json +3 -2
- package/dist/.codex-plugin/plugin.toml +2 -1
- package/dist/.cursor-plugin/plugin.json +3 -2
- package/dist/.gemini-plugin/plugin.json +3 -2
- package/dist/.opencode-plugin/plugin.json +3 -2
- package/dist/.windsurf-plugin/plugin.json +3 -2
- package/dist/agents/copilot-conductor.agent.md +60 -0
- package/dist/agents/copilot-experiment.agent.md +56 -0
- package/dist/agents/copilot-ideation.agent.md +45 -0
- package/dist/agents/copilot-literature.agent.md +34 -0
- package/dist/agents/copilot-polisher.agent.md +30 -0
- package/dist/agents/copilot-rebuttal.agent.md +35 -0
- package/dist/agents/copilot-reviewer.agent.md +35 -0
- package/dist/agents/copilot-writer.agent.md +39 -0
- package/dist/hooks/dispatch-reminder.json +17 -0
- package/dist/hooks/loop-armer.json +17 -0
- package/dist/hooks/research-copilot-guard.hook.md +51 -0
- package/dist/hooks/scientist-guardrails.json +17 -0
- package/dist/hooks/scripts/__tests__/__init__.py +0 -0
- package/dist/hooks/scripts/__tests__/test_post_tool_loop_armer.py +88 -0
- package/dist/hooks/scripts/__tests__/test_research_copilot_guard_main_session.py +150 -0
- package/dist/hooks/scripts/__tests__/test_session_start_memory_injector.py +66 -0
- package/dist/hooks/scripts/__tests__/test_user_prompt_dispatch_reminder.py +37 -0
- package/dist/hooks/scripts/_copilot_hook_lib.py +564 -0
- package/dist/hooks/scripts/copilot_subagent_stop.py +203 -0
- package/dist/hooks/scripts/copilot_write_guard.py +96 -0
- package/dist/hooks/scripts/post_tool_loop_armer.py +61 -0
- package/dist/hooks/scripts/research_copilot_guard.py +208 -0
- package/dist/hooks/scripts/scientist_guardrails.py +29 -0
- package/dist/hooks/scripts/session_start_memory_injector.py +188 -0
- package/dist/hooks/scripts/user_prompt_dispatch_reminder.py +40 -0
- package/dist/hooks/session-memory-injector.json +17 -0
- package/dist/hooks/tests/__init__.py +0 -0
- package/dist/hooks/tests/conftest.py +61 -0
- package/dist/hooks/tests/fixtures/transcript_copilot_experiment_complete.jsonl +2 -0
- package/dist/hooks/tests/fixtures/transcript_copilot_experiment_state_jump.jsonl +2 -0
- package/dist/hooks/tests/fixtures/transcript_copilot_literature.jsonl +2 -0
- package/dist/hooks/tests/fixtures/transcript_main_only.jsonl +2 -0
- package/dist/hooks/tests/fixtures/transcript_malformed_state_output.jsonl +2 -0
- package/dist/hooks/tests/integration_run.ps1 +65 -0
- package/dist/hooks/tests/test_copilot_hook_lib.py +398 -0
- package/dist/hooks/tests/test_copilot_subagent_stop.py +186 -0
- package/dist/hooks/tests/test_copilot_write_guard.py +137 -0
- package/dist/hooks/tests/test_session_start_snapshot.py +116 -0
- package/dist/hooks/tests/test_state_machine_consistency.py +75 -0
- package/dist/skills/arxivsub-skill/SKILL.md +98 -0
- package/dist/skills/arxivsub-skill/skill.json +5 -0
- package/dist/skills/de-ai-checker/SKILL.md +110 -0
- package/dist/skills/de-ai-checker/skill.json +5 -0
- package/dist/skills/deep-interview/SKILL.md +91 -0
- package/dist/skills/deep-interview/skill.json +5 -0
- package/dist/skills/grill-with-docs/SKILL.md +120 -0
- package/dist/skills/grill-with-docs/skill.json +5 -0
- package/dist/skills/init-mcp/SKILL.md +83 -0
- package/dist/skills/init-mcp/skill.json +5 -0
- package/dist/skills/model-escalation/SKILL.md +93 -0
- package/dist/skills/model-escalation/skill.json +5 -0
- package/dist/skills/paper-architecture-web-drawing/SKILL.md +282 -0
- package/dist/skills/paper-architecture-web-drawing/skill.json +5 -0
- package/dist/skills/paper-deai/SKILL.md +53 -0
- package/dist/skills/paper-deai/skill.json +5 -0
- package/dist/skills/paper-en2zh/SKILL.md +29 -0
- package/dist/skills/paper-en2zh/skill.json +5 -0
- package/dist/skills/paper-expand/SKILL.md +43 -0
- package/dist/skills/paper-expand/skill.json +5 -0
- package/dist/skills/paper-experiment-analysis/SKILL.md +38 -0
- package/dist/skills/paper-experiment-analysis/skill.json +5 -0
- package/dist/skills/paper-figure-caption/SKILL.md +29 -0
- package/dist/skills/paper-figure-caption/skill.json +5 -0
- package/dist/skills/paper-logic-check/SKILL.md +30 -0
- package/dist/skills/paper-logic-check/skill.json +5 -0
- package/dist/skills/paper-polish/SKILL.md +34 -305
- package/dist/skills/paper-polish/skill.json +5 -0
- package/dist/skills/paper-review/SKILL.md +49 -0
- package/dist/skills/paper-review/skill.json +5 -0
- package/dist/skills/paper-sanity-check/SKILL.md +122 -0
- package/dist/skills/paper-sanity-check/skill.json +5 -0
- package/dist/skills/paper-shorten/SKILL.md +42 -0
- package/dist/skills/paper-shorten/skill.json +5 -0
- package/dist/skills/paper-table-caption/SKILL.md +29 -0
- package/dist/skills/paper-table-caption/skill.json +5 -0
- package/dist/skills/paper-translate/SKILL.md +48 -0
- package/dist/skills/paper-translate/skill.json +5 -0
- package/dist/skills/plugin-dev-agent-development/SKILL.md +95 -0
- package/dist/skills/plugin-dev-agent-development/skill.json +5 -0
- package/dist/skills/research-workflow/SKILL.md +116 -0
- package/dist/skills/research-workflow/skill.json +5 -0
- package/dist/skills/scientist-experiment-runner/SKILL.md +76 -0
- package/dist/skills/scientist-experiment-runner/skill.json +5 -0
- package/dist/skills/scientist-ideation/SKILL.md +52 -0
- package/dist/skills/scientist-ideation/skill.json +5 -0
- package/dist/skills/scientist-plotting/SKILL.md +49 -0
- package/dist/skills/scientist-plotting/skill.json +5 -0
- package/dist/skills/scientist-review/SKILL.md +40 -0
- package/dist/skills/scientist-review/skill.json +5 -0
- package/dist/skills/scientist-runtime-init/SKILL.md +46 -0
- package/dist/skills/scientist-runtime-init/skill.json +5 -0
- package/dist/skills/scientist-writeup/SKILL.md +60 -0
- package/dist/skills/scientist-writeup/skill.json +5 -0
- package/dist/skills/talk-normal/SKILL.md +73 -0
- package/dist/skills/talk-normal/skill.json +5 -0
- package/package.json +1 -1
- package/dist/agents/rc-experiment.md +0 -203
- package/dist/agents/rc-ideation.md +0 -224
- package/dist/agents/rc-literature.md +0 -228
- package/dist/agents/rc-plan.md +0 -189
- package/dist/agents/rc-polisher.md +0 -166
- package/dist/agents/rc-rebuttal.md +0 -194
- package/dist/agents/rc-reviewer.md +0 -187
- package/dist/agents/rc-update-spec.md +0 -231
- package/dist/agents/rc-verify.md +0 -234
- package/dist/agents/rc-writer.md +0 -161
- package/dist/skills/experiment-design/SKILL.md +0 -331
- package/dist/skills/full-research-workflow/SKILL.md +0 -363
- package/dist/skills/literature-search/SKILL.md +0 -244
- package/dist/skills/sanity-check/SKILL.md +0 -449
- package/dist/skills/submission-sprint/SKILL.md +0 -361
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""SubagentStop hook: HANDOFF freshness (HARD), STATE_OUTPUT 6-field (SOFT),
|
|
2
|
+
state-machine no-jump (SOFT).
|
|
3
|
+
|
|
4
|
+
Decision contract:
|
|
5
|
+
- block: {"decision": "block", "reason": "..."}
|
|
6
|
+
- allow: {"hookSpecificOutput": {"permissionDecision": "allow"}}
|
|
7
|
+
|
|
8
|
+
3-strike fuse: if CHECK 1 fails 3 times for the same (agent, file), the hook
|
|
9
|
+
releases the 3rd attempt with [HARD/RELEASE] to avoid lockout.
|
|
10
|
+
|
|
11
|
+
Falls open via safe_main().
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
import _copilot_hook_lib as lib
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
HANDOFF_FILES: dict[str, list[str]] = {
|
|
23
|
+
# research-copilot retired as a sub-agent (now the main-session conductor);
|
|
24
|
+
# state.md/decisions.md freshness is a CONDUCTOR-PROTOCOL standing order,
|
|
25
|
+
# not SubagentStop-enforced (the main session never fires SubagentStop).
|
|
26
|
+
"copilot-literature": ["literature.md"],
|
|
27
|
+
"copilot-ideation": ["ideas.md"],
|
|
28
|
+
"copilot-experiment": ["experiments.md"],
|
|
29
|
+
# writer/polisher/reviewer/rebuttal write handoff.md (append-only multi-writer)
|
|
30
|
+
# — not subject to HARD freshness; SOFT checks only.
|
|
31
|
+
"copilot-writer": [],
|
|
32
|
+
"copilot-polisher": [],
|
|
33
|
+
"copilot-reviewer": [],
|
|
34
|
+
"copilot-rebuttal": [],
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _file_handoff_last_updated(workspace: Path, filename: str) -> str | None:
|
|
39
|
+
f = workspace / ".copilot" / filename
|
|
40
|
+
if not f.is_file():
|
|
41
|
+
return None
|
|
42
|
+
try:
|
|
43
|
+
text = f.read_text(encoding="utf-8", errors="replace")
|
|
44
|
+
except OSError:
|
|
45
|
+
return None
|
|
46
|
+
h = lib.extract_handoff(text)
|
|
47
|
+
return h.get("last_updated") if h else None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _iso_strictly_later(current: str | None, snapshot: str | None) -> bool:
|
|
51
|
+
"""ISO 8601 strings sort lexicographically. None snapshot ('first boot')
|
|
52
|
+
is treated as 'any current value counts as later'."""
|
|
53
|
+
if not current:
|
|
54
|
+
return False
|
|
55
|
+
if snapshot is None:
|
|
56
|
+
return True
|
|
57
|
+
return current > snapshot
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _check_handoff_freshness(workspace: Path, agent: str
|
|
61
|
+
) -> tuple[str, str, str | None]:
|
|
62
|
+
"""Returns (status, message, file_that_failed).
|
|
63
|
+
status in {PASS, HARD_FAIL, SOFT_FAIL}.
|
|
64
|
+
|
|
65
|
+
HARD_FAIL = stale/missing handoff AND a snapshot file exists to compare against.
|
|
66
|
+
SOFT_FAIL = no snapshot file exists at all (first boot / hook reenabled).
|
|
67
|
+
PASS = all owned files have fresh handoff blocks.
|
|
68
|
+
"""
|
|
69
|
+
files = HANDOFF_FILES.get(agent, [])
|
|
70
|
+
if not files:
|
|
71
|
+
return "PASS", "", None
|
|
72
|
+
snapshot_path = workspace / ".copilot" / lib.SNAPSHOT_NAME
|
|
73
|
+
snapshot_exists = snapshot_path.is_file()
|
|
74
|
+
snapshot = lib.read_snapshot(workspace)
|
|
75
|
+
for fname in files:
|
|
76
|
+
cur = _file_handoff_last_updated(workspace, fname)
|
|
77
|
+
snap = snapshot.get(fname)
|
|
78
|
+
if _iso_strictly_later(cur, snap):
|
|
79
|
+
continue
|
|
80
|
+
msg = (f"{agent} did not update .copilot/{fname} __HANDOFF__ "
|
|
81
|
+
f"block this session. Append/refresh the block and "
|
|
82
|
+
f"re-emit STATE_OUTPUT before exiting.")
|
|
83
|
+
return ("SOFT_FAIL" if not snapshot_exists else "HARD_FAIL"), msg, fname
|
|
84
|
+
return "PASS", "", None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _read_last_assistant_text(transcript_path: str) -> str:
|
|
88
|
+
"""Return the concatenated text content of the most recent assistant
|
|
89
|
+
message in the transcript JSONL. Empty string if none found.
|
|
90
|
+
"""
|
|
91
|
+
if not transcript_path:
|
|
92
|
+
return ""
|
|
93
|
+
p = Path(transcript_path)
|
|
94
|
+
if not p.is_file():
|
|
95
|
+
return ""
|
|
96
|
+
try:
|
|
97
|
+
lines = p.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
98
|
+
except OSError:
|
|
99
|
+
return ""
|
|
100
|
+
for line in reversed(lines[-100:]):
|
|
101
|
+
if not line.strip():
|
|
102
|
+
continue
|
|
103
|
+
try:
|
|
104
|
+
entry = json.loads(line)
|
|
105
|
+
except json.JSONDecodeError:
|
|
106
|
+
continue
|
|
107
|
+
if entry.get("role") != "assistant":
|
|
108
|
+
continue
|
|
109
|
+
chunks: list[str] = []
|
|
110
|
+
content = entry.get("content")
|
|
111
|
+
if isinstance(content, list):
|
|
112
|
+
for item in content:
|
|
113
|
+
if isinstance(item, dict) and item.get("type") == "text":
|
|
114
|
+
chunks.append(item.get("text", ""))
|
|
115
|
+
elif isinstance(content, str):
|
|
116
|
+
chunks.append(content)
|
|
117
|
+
if chunks:
|
|
118
|
+
return "\n".join(chunks)
|
|
119
|
+
return ""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _run_soft_checks(workspace: Path, agent: str, transcript: str) -> None:
|
|
123
|
+
"""CHECK 3 (STATE_OUTPUT 6 fields) + CHECK 4 (state transition legality).
|
|
124
|
+
Both are SOFT — append to violations.log but never block."""
|
|
125
|
+
text = _read_last_assistant_text(transcript)
|
|
126
|
+
so = lib.extract_state_output(text)
|
|
127
|
+
missing = lib.state_output_missing_fields(so)
|
|
128
|
+
if missing:
|
|
129
|
+
if so is None:
|
|
130
|
+
lib.log_violation(workspace, "SOFT", "WARN", agent,
|
|
131
|
+
"STATE_OUTPUT block absent from final reply")
|
|
132
|
+
else:
|
|
133
|
+
lib.log_violation(workspace, "SOFT", "WARN", agent,
|
|
134
|
+
f"STATE_OUTPUT missing fields: {missing}")
|
|
135
|
+
if so:
|
|
136
|
+
prev, curr = so.get("Previous"), so.get("Current")
|
|
137
|
+
if prev and curr and not lib.is_transition_legal(agent, prev, curr):
|
|
138
|
+
lib.log_violation(workspace, "SOFT", "WARN", agent,
|
|
139
|
+
f"transition {prev} -> {curr} not in allowed "
|
|
140
|
+
f"set {lib.STATE_MACHINE.get(agent, {}).get(prev, [])}")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def real_main() -> int:
|
|
144
|
+
raw = sys.stdin.read()
|
|
145
|
+
if not raw.strip():
|
|
146
|
+
print(json.dumps(lib.allow_decision()))
|
|
147
|
+
return 0
|
|
148
|
+
try:
|
|
149
|
+
payload = json.loads(raw)
|
|
150
|
+
except json.JSONDecodeError:
|
|
151
|
+
print(json.dumps(lib.allow_decision()))
|
|
152
|
+
return 0
|
|
153
|
+
|
|
154
|
+
agent = lib.detect_active_agent(payload.get("transcript_path", ""))
|
|
155
|
+
if not lib.is_copilot_agent(agent):
|
|
156
|
+
print(json.dumps(lib.allow_decision()))
|
|
157
|
+
return 0
|
|
158
|
+
|
|
159
|
+
workspace = Path.cwd()
|
|
160
|
+
|
|
161
|
+
if lib.env_guard_disabled():
|
|
162
|
+
lib.log_violation(workspace, "INFO", "DISABLED", agent,
|
|
163
|
+
"SubagentStop guard bypassed by env var")
|
|
164
|
+
print(json.dumps(lib.allow_decision()))
|
|
165
|
+
return 0
|
|
166
|
+
|
|
167
|
+
if lib.override_match(workspace, agent, "skip-handoff-check"):
|
|
168
|
+
lib.log_violation(workspace, "INFO", "OVERRIDE", agent,
|
|
169
|
+
"skip-handoff-check active")
|
|
170
|
+
print(json.dumps(lib.allow_decision()))
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
# CHECK 1 — HARD freshness with 3-strike fuse, or SOFT degrade at first boot
|
|
174
|
+
status, fail_msg, fail_file = _check_handoff_freshness(workspace, agent)
|
|
175
|
+
if status == "HARD_FAIL":
|
|
176
|
+
n = lib.counter_inc(workspace, agent, fail_file)
|
|
177
|
+
if n < 3:
|
|
178
|
+
lib.log_violation(workspace, "HARD", "BLOCK", agent,
|
|
179
|
+
f"{fail_msg} (strike {n}/3)", file=fail_file)
|
|
180
|
+
print(json.dumps(lib.block_decision(fail_msg)))
|
|
181
|
+
return 0
|
|
182
|
+
lib.log_violation(workspace, "HARD", "RELEASE", agent,
|
|
183
|
+
"3-strike fuse triggered, releasing", file=fail_file)
|
|
184
|
+
lib.counter_reset(workspace, agent, fail_file)
|
|
185
|
+
print(json.dumps(lib.allow_decision()))
|
|
186
|
+
return 0
|
|
187
|
+
|
|
188
|
+
if status == "SOFT_FAIL":
|
|
189
|
+
lib.log_violation(workspace, "INFO", "NO-SNAPSHOT", agent,
|
|
190
|
+
f"{fail_msg} (degraded: no .session_snapshot.json)",
|
|
191
|
+
file=fail_file)
|
|
192
|
+
|
|
193
|
+
if status == "PASS":
|
|
194
|
+
lib.counter_reset_all(workspace, agent)
|
|
195
|
+
|
|
196
|
+
# CHECK 3+4 SOFT — never block
|
|
197
|
+
_run_soft_checks(workspace, agent, payload.get("transcript_path", ""))
|
|
198
|
+
print(json.dumps(lib.allow_decision()))
|
|
199
|
+
return 0
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
if __name__ == "__main__":
|
|
203
|
+
raise SystemExit(lib.safe_main(real_main))
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""PreToolUse hook: enforce owned-file partition (PIPELINE-OS §8).
|
|
2
|
+
|
|
3
|
+
When the active sub-agent is copilot-*, denies Write/Edit to non-owned
|
|
4
|
+
artifacts. Paths outside the research-artifact universe (.copilot/,
|
|
5
|
+
sections/*.tex, references.bib) are unconditionally allowed.
|
|
6
|
+
|
|
7
|
+
handoff.md special case (Task 10) is added in the next commit.
|
|
8
|
+
|
|
9
|
+
Falls open on any exception via safe_main().
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import _copilot_hook_lib as lib
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def real_main() -> int:
|
|
21
|
+
raw = sys.stdin.read()
|
|
22
|
+
if not raw.strip():
|
|
23
|
+
print(json.dumps(lib.allow_decision()))
|
|
24
|
+
return 0
|
|
25
|
+
try:
|
|
26
|
+
payload = json.loads(raw)
|
|
27
|
+
except json.JSONDecodeError:
|
|
28
|
+
print(json.dumps(lib.allow_decision()))
|
|
29
|
+
return 0
|
|
30
|
+
|
|
31
|
+
agent = lib.detect_active_agent(payload.get("transcript_path", ""))
|
|
32
|
+
if not lib.is_copilot_agent(agent):
|
|
33
|
+
print(json.dumps(lib.allow_decision()))
|
|
34
|
+
return 0
|
|
35
|
+
|
|
36
|
+
workspace = Path.cwd()
|
|
37
|
+
|
|
38
|
+
if lib.env_guard_disabled():
|
|
39
|
+
lib.log_violation(workspace, "INFO", "DISABLED", agent,
|
|
40
|
+
"guard bypassed by env var")
|
|
41
|
+
print(json.dumps(lib.allow_decision()))
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
if lib.override_match(workspace, agent, "skip-owned-check"):
|
|
45
|
+
lib.log_violation(workspace, "INFO", "OVERRIDE", agent,
|
|
46
|
+
"skip-owned-check active")
|
|
47
|
+
print(json.dumps(lib.allow_decision()))
|
|
48
|
+
return 0
|
|
49
|
+
|
|
50
|
+
file_path = str((payload.get("tool_input") or {}).get("file_path", ""))
|
|
51
|
+
if not file_path:
|
|
52
|
+
print(json.dumps(lib.allow_decision()))
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
norm = lib.normalize_path(file_path, workspace=workspace)
|
|
56
|
+
|
|
57
|
+
# handoff.md special case: append-only for the 4 multi-writers.
|
|
58
|
+
if norm.endswith(".copilot/handoff.md"):
|
|
59
|
+
tool_name = payload.get("tool_name", "")
|
|
60
|
+
if agent in lib.HANDOFF_APPEND_ONLY_AGENTS:
|
|
61
|
+
if tool_name == "Write":
|
|
62
|
+
lib.log_violation(workspace, "HARD", "DENY", agent,
|
|
63
|
+
"Write (overwrite) to handoff.md; "
|
|
64
|
+
"use Edit to append", file=norm)
|
|
65
|
+
msg = ("Blocked by copilot-write-guard: handoff.md is "
|
|
66
|
+
"append-only. Use Edit to add a new block, not Write.")
|
|
67
|
+
print(json.dumps(lib.deny_decision(msg)))
|
|
68
|
+
return 0
|
|
69
|
+
# Edit allowed for these 4 agents — fall through to owned check
|
|
70
|
+
else:
|
|
71
|
+
lib.log_violation(workspace, "HARD", "DENY", agent,
|
|
72
|
+
"agent has no write right to handoff.md",
|
|
73
|
+
file=norm)
|
|
74
|
+
msg = (f"Blocked by copilot-write-guard: {agent} is not an "
|
|
75
|
+
f"owner of handoff.md.")
|
|
76
|
+
print(json.dumps(lib.deny_decision(msg)))
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
if lib.is_owned(agent, norm):
|
|
80
|
+
print(json.dumps(lib.allow_decision()))
|
|
81
|
+
return 0
|
|
82
|
+
|
|
83
|
+
if lib.is_known_research_artifact(norm):
|
|
84
|
+
lib.log_violation(workspace, "HARD", "DENY", agent,
|
|
85
|
+
"writing to non-owned artifact", file=norm)
|
|
86
|
+
msg = (f"Blocked by copilot-write-guard: {agent} may not write "
|
|
87
|
+
f"{norm}. See PIPELINE-OS §8.")
|
|
88
|
+
print(json.dumps(lib.deny_decision(msg)))
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
print(json.dumps(lib.allow_decision()))
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
raise SystemExit(lib.safe_main(real_main))
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""PostToolUse hook: detect long background experiments and recommend
|
|
2
|
+
arming a CronCreate-based self-poll so the main session continues after
|
|
3
|
+
notifications. Sets `.copilot/.loop-armed` to avoid duplicate suggestions."""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
LONGRUN_PATTERNS = (
|
|
12
|
+
re.compile(r"\btrain(\.py|_)"),
|
|
13
|
+
re.compile(r"\bmain\.py\b"),
|
|
14
|
+
re.compile(r"\bai_scientist\b"),
|
|
15
|
+
re.compile(r"\btorchrun\b"),
|
|
16
|
+
re.compile(r"\bdeepspeed\b"),
|
|
17
|
+
re.compile(r"\bexperiments?/"),
|
|
18
|
+
re.compile(r"\baccelerate launch\b"),
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def should_arm(event: dict) -> bool:
|
|
23
|
+
if event.get("tool_name") != "Bash":
|
|
24
|
+
return False
|
|
25
|
+
inp = event.get("tool_input") or {}
|
|
26
|
+
if not inp.get("run_in_background"):
|
|
27
|
+
return False
|
|
28
|
+
cmd = inp.get("command", "") or ""
|
|
29
|
+
return any(p.search(cmd) for p in LONGRUN_PATTERNS)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def main() -> int:
|
|
33
|
+
try:
|
|
34
|
+
raw = sys.stdin.read()
|
|
35
|
+
event = json.loads(raw) if raw.strip() else {}
|
|
36
|
+
except json.JSONDecodeError:
|
|
37
|
+
return 0
|
|
38
|
+
|
|
39
|
+
if not should_arm(event):
|
|
40
|
+
return 0
|
|
41
|
+
|
|
42
|
+
flag = Path.cwd() / ".copilot" / ".loop-armed"
|
|
43
|
+
if flag.exists():
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
sys.stdout.write(
|
|
47
|
+
"[loop-armer] Detected long-running background experiment.\n"
|
|
48
|
+
"[loop-armer] Recommend arming a self-poll so the loop continues across notifications:\n"
|
|
49
|
+
" CronCreate(cron=\"*/3 * * * *\", prompt=\"<<autonomous-loop>>\", recurring=true, durable=false)\n"
|
|
50
|
+
"[loop-armer] Or the user can paste:\n"
|
|
51
|
+
" /loop 1m If a background experiment task is still running, check its log tail and decide next step. Otherwise, delete this scheduled task.\n"
|
|
52
|
+
"[loop-armer] On EXECUTING -> END the agent MUST CronDelete the returned id and remove .copilot/.loop-armed.\n"
|
|
53
|
+
)
|
|
54
|
+
sys.stdout.flush()
|
|
55
|
+
flag.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
flag.write_text("", encoding="utf-8")
|
|
57
|
+
return 0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
if __name__ == "__main__":
|
|
61
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Research Copilot Workflow Guard hook (PreToolUse).
|
|
2
|
+
|
|
3
|
+
Polices the MAIN SESSION acting as conductor. The main session must delegate
|
|
4
|
+
domain work to copilot-* sub-agents and must publish a TaskCreate plan list
|
|
5
|
+
before dispatching. copilot-* sub-agents run freely (exempt).
|
|
6
|
+
|
|
7
|
+
Origin attribution uses the authoritative `agent_id` payload field: it is
|
|
8
|
+
present ONLY inside a sub-agent call, so its absence => main session. Any
|
|
9
|
+
ambiguity resolves to main (conservative — never silently exempt the conductor).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
import sys
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
READ_ONLY_PREFIXES = ("grep", "cat", "ls", "head", "tail", "find",
|
|
20
|
+
"Get-Content", "Select-String", "Get-ChildItem")
|
|
21
|
+
EXPERIMENT_KEYWORDS = ("train.py", "run_experiment", "wandb", "mlflow",
|
|
22
|
+
"tensorboard", "torchrun", "deepspeed", "accelerate")
|
|
23
|
+
EXPERIMENT_REGEX = re.compile(r"python[\w\s.-]*\b(train|experiment|run_exp)\b",
|
|
24
|
+
re.IGNORECASE)
|
|
25
|
+
RESEARCH_MCP_PREFIXES = (
|
|
26
|
+
"mcp__arxiv-search__",
|
|
27
|
+
"mcp__arxivsub-search__",
|
|
28
|
+
"mcp__google-scholar__",
|
|
29
|
+
"mcp__dblp-bib__",
|
|
30
|
+
)
|
|
31
|
+
# Conductor-owned artifacts: the main session MAY write these.
|
|
32
|
+
CONDUCTOR_OWNED_ARTIFACTS = (".copilot/state.md", ".copilot/decisions.md")
|
|
33
|
+
# Delegated artifact files the main session must NOT write. Matched segment-anchored
|
|
34
|
+
# (see _path_matches) so 'references.bib' does NOT match 'old_references.bib'. The
|
|
35
|
+
# sections/*.tex case is handled separately by a path-segment check.
|
|
36
|
+
DELEGATED_ARTIFACT_FILES = (".copilot/ideas.md", ".copilot/experiments.md",
|
|
37
|
+
".copilot/literature.md", "references.bib")
|
|
38
|
+
READ_ONLY_TOOLS = ("Read", "Grep", "Glob", "TaskCreate", "TaskUpdate",
|
|
39
|
+
"TaskList", "TaskGet", "Skill", "AskUserQuestion")
|
|
40
|
+
COPILOT_SUBAGENT_PREFIX = "copilot-"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def allow() -> dict[str, Any]:
|
|
44
|
+
return {"hookSpecificOutput": {"permissionDecision": "allow"}}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def deny(message: str) -> dict[str, Any]:
|
|
48
|
+
return {"hookSpecificOutput": {"permissionDecision": "deny",
|
|
49
|
+
"permissionDecisionReason": message},
|
|
50
|
+
"systemMessage": message}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def is_main_session(payload: dict[str, Any]) -> bool:
|
|
54
|
+
"""Main session iff `agent_id` absent/empty (per Claude Code hooks docs)."""
|
|
55
|
+
return not payload.get("agent_id")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_exempt_subagent(payload: dict[str, Any]) -> bool:
|
|
59
|
+
if is_main_session(payload):
|
|
60
|
+
return False
|
|
61
|
+
return str(payload.get("agent_type") or "").startswith(COPILOT_SUBAGENT_PREFIX)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def is_read_only(command: str) -> bool:
|
|
65
|
+
stripped = command.strip()
|
|
66
|
+
return any(stripped.startswith(prefix) for prefix in READ_ONLY_PREFIXES)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _norm(path: str) -> str:
|
|
70
|
+
return str(path).replace("\\", "/")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _path_matches(path: str, target: str) -> bool:
|
|
74
|
+
"""True iff `path` equals `target` or ends with `/target` (segment-anchored).
|
|
75
|
+
Prevents substring false-positives like 'references.bib' matching
|
|
76
|
+
'old_references.bib', or '.copilot/ideas.md' matching an unrelated path."""
|
|
77
|
+
p = _norm(path)
|
|
78
|
+
return p == target or p.endswith("/" + target)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _iter_transcript_tool_uses(transcript_path: str | None):
|
|
82
|
+
if not transcript_path:
|
|
83
|
+
return
|
|
84
|
+
p = Path(transcript_path)
|
|
85
|
+
if not p.is_file():
|
|
86
|
+
return
|
|
87
|
+
try:
|
|
88
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
89
|
+
except OSError:
|
|
90
|
+
return
|
|
91
|
+
for line in text.splitlines():
|
|
92
|
+
line = line.strip()
|
|
93
|
+
if not line:
|
|
94
|
+
continue
|
|
95
|
+
try:
|
|
96
|
+
rec = json.loads(line)
|
|
97
|
+
except json.JSONDecodeError:
|
|
98
|
+
continue
|
|
99
|
+
if isinstance(rec, dict) and rec.get("type") == "tool_use":
|
|
100
|
+
yield {"name": rec.get("name", ""), "input": rec.get("input", {}) or {}}
|
|
101
|
+
continue
|
|
102
|
+
content = None
|
|
103
|
+
if isinstance(rec, dict):
|
|
104
|
+
content = rec.get("content")
|
|
105
|
+
if content is None:
|
|
106
|
+
msg = rec.get("message")
|
|
107
|
+
if isinstance(msg, dict):
|
|
108
|
+
content = msg.get("content")
|
|
109
|
+
if isinstance(content, list):
|
|
110
|
+
for item in content:
|
|
111
|
+
if isinstance(item, dict) and item.get("type") == "tool_use":
|
|
112
|
+
yield {"name": item.get("name", ""),
|
|
113
|
+
"input": item.get("input", {}) or {}}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def check_m1_delegation(tool_name: str, tool_input: dict[str, Any]) -> str | None:
|
|
117
|
+
"""M1 delegation gate: deny main-session execution-class work."""
|
|
118
|
+
# Experiment scripts via shell.
|
|
119
|
+
if tool_name in ("Bash", "PowerShell"):
|
|
120
|
+
command = str((tool_input or {}).get("command", ""))
|
|
121
|
+
if not command or is_read_only(command):
|
|
122
|
+
return None
|
|
123
|
+
if any(kw in command for kw in EXPERIMENT_KEYWORDS) or EXPERIMENT_REGEX.search(command):
|
|
124
|
+
return ("Blocked by research-copilot-guard (M1 delegation gate): the "
|
|
125
|
+
"conductor must not run experiments inline. Delegate via "
|
|
126
|
+
"Agent(subagent_type='copilot-experiment').")
|
|
127
|
+
return None
|
|
128
|
+
# Paper-retrieval MCP tools.
|
|
129
|
+
if any(tool_name.startswith(p) for p in RESEARCH_MCP_PREFIXES):
|
|
130
|
+
return ("Blocked by research-copilot-guard (M1 delegation gate): the "
|
|
131
|
+
"conductor must not search papers inline. Delegate via "
|
|
132
|
+
"Agent(subagent_type='copilot-literature').")
|
|
133
|
+
# Writes to delegated research artifacts (segment-anchored, not substring).
|
|
134
|
+
if tool_name in ("Write", "Edit"):
|
|
135
|
+
path = _norm((tool_input or {}).get("file_path", ""))
|
|
136
|
+
if any(_path_matches(path, owned) for owned in CONDUCTOR_OWNED_ARTIFACTS):
|
|
137
|
+
return None # conductor owns state.md / decisions.md
|
|
138
|
+
segments = path.split("/")
|
|
139
|
+
is_sections_tex = "sections" in segments and path.endswith(".tex")
|
|
140
|
+
if is_sections_tex or any(_path_matches(path, f) for f in DELEGATED_ARTIFACT_FILES):
|
|
141
|
+
return ("Blocked by research-copilot-guard (M1 delegation gate): the "
|
|
142
|
+
"conductor must not write research artifacts (sections/*.tex, "
|
|
143
|
+
"references.bib, .copilot/{ideas,experiments,literature}.md) "
|
|
144
|
+
"inline. Delegate to the matching copilot-* sub-agent.")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def check_m2_task_list(tool_name: str, tool_input: dict[str, Any],
|
|
149
|
+
transcript_path: str | None) -> str | None:
|
|
150
|
+
"""M2 task-list gate: deny copilot-* dispatch with no TaskCreate this turn."""
|
|
151
|
+
if tool_name != "Agent":
|
|
152
|
+
return None
|
|
153
|
+
sub_type = str((tool_input or {}).get("subagent_type", ""))
|
|
154
|
+
if not sub_type.startswith(COPILOT_SUBAGENT_PREFIX):
|
|
155
|
+
return None
|
|
156
|
+
if not transcript_path:
|
|
157
|
+
return None # fail-open: cannot inspect
|
|
158
|
+
for entry in _iter_transcript_tool_uses(transcript_path):
|
|
159
|
+
if entry["name"] == "TaskCreate":
|
|
160
|
+
return None
|
|
161
|
+
return ("Blocked by research-copilot-guard (M2 task-list gate): dispatching "
|
|
162
|
+
"a copilot-* sub-agent requires a TaskCreate plan list (one task per "
|
|
163
|
+
"planned dispatch) in this turn. Call TaskCreate first, then Agent().")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main() -> int:
|
|
167
|
+
raw = sys.stdin.read()
|
|
168
|
+
if not raw:
|
|
169
|
+
print(json.dumps(allow()))
|
|
170
|
+
return 0
|
|
171
|
+
try:
|
|
172
|
+
payload = json.loads(raw)
|
|
173
|
+
except json.JSONDecodeError:
|
|
174
|
+
print(json.dumps(allow()))
|
|
175
|
+
return 0
|
|
176
|
+
try:
|
|
177
|
+
decision = _decide(payload)
|
|
178
|
+
except Exception:
|
|
179
|
+
# Fail-open: any unexpected error yields allow, never traps the user
|
|
180
|
+
# (mirrors _copilot_hook_lib.safe_main's contract).
|
|
181
|
+
import traceback
|
|
182
|
+
sys.stderr.write(traceback.format_exc())
|
|
183
|
+
decision = allow()
|
|
184
|
+
print(json.dumps(decision))
|
|
185
|
+
return 0
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _decide(payload: dict[str, Any]) -> dict[str, Any]:
|
|
189
|
+
"""Pure decision logic for a parsed payload. Raising is safe — main()
|
|
190
|
+
catches and fails open."""
|
|
191
|
+
# Exempt copilot-* sub-agents outright (they run experiments/searches/writes).
|
|
192
|
+
if is_exempt_subagent(payload):
|
|
193
|
+
return allow()
|
|
194
|
+
# Everything else (incl. ambiguous) is treated as MAIN SESSION -> police.
|
|
195
|
+
tool_name = payload.get("tool_name", "")
|
|
196
|
+
tool_input = payload.get("tool_input", {}) or {}
|
|
197
|
+
if tool_name in READ_ONLY_TOOLS:
|
|
198
|
+
return allow()
|
|
199
|
+
transcript_path = payload.get("transcript_path")
|
|
200
|
+
for check in (check_m1_delegation(tool_name, tool_input),
|
|
201
|
+
check_m2_task_list(tool_name, tool_input, transcript_path)):
|
|
202
|
+
if check:
|
|
203
|
+
return deny(check)
|
|
204
|
+
return allow()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
if __name__ == "__main__":
|
|
208
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main() -> int:
|
|
8
|
+
workspace = Path.cwd()
|
|
9
|
+
packaged_runtime = workspace / ".github" / "runtimes" / "scientist-support" / "runtime"
|
|
10
|
+
source_runtime = workspace / "self" / "runtimes" / "scientist-support" / "runtime"
|
|
11
|
+
lines = [
|
|
12
|
+
"[scientist-guardrails] AI Scientist workflow loaded.",
|
|
13
|
+
"[scientist-guardrails] Upstream defaults assume Linux + CUDA and may not fully run on Windows hosts.",
|
|
14
|
+
"[scientist-guardrails] AI Scientist executes LLM-written code; prefer a container or sandbox before running full experiments.",
|
|
15
|
+
]
|
|
16
|
+
if packaged_runtime.exists():
|
|
17
|
+
lines.append(f"[scientist-guardrails] Detected skill runtime: {packaged_runtime}")
|
|
18
|
+
elif source_runtime.exists():
|
|
19
|
+
lines.append(f"[scientist-guardrails] Detected skill runtime: {source_runtime}")
|
|
20
|
+
else:
|
|
21
|
+
lines.append("[scientist-guardrails] Scientist-support runtime was not found in the workspace.")
|
|
22
|
+
lines.append("[scientist-guardrails] Recommended first action: run scientist runtime validation before ideation or experiment launch.")
|
|
23
|
+
sys.stdout.write("\n".join(lines) + "\n")
|
|
24
|
+
sys.stdout.flush()
|
|
25
|
+
return 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
if __name__ == "__main__":
|
|
29
|
+
raise SystemExit(main())
|