typeclaw 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/dump-system-prompt.ts +12 -11
- package/src/agent/index.ts +15 -22
- package/src/agent/loop-guard.ts +170 -0
- package/src/agent/model-fallback.ts +2 -1
- package/src/agent/multimodal/index.ts +1 -1
- package/src/agent/multimodal/look-at.ts +118 -55
- package/src/agent/plugin-tools.ts +57 -0
- package/src/agent/subagents.ts +2 -1
- package/src/agent/system-prompt.ts +39 -26
- package/src/agent/tools/channel-fetch-attachment.ts +45 -16
- package/src/agent/tools/normalize-ref.ts +11 -0
- package/src/agent/tools/skip-response.ts +24 -32
- package/src/agent/tools/spawn-subagent.ts +2 -0
- package/src/bundled-plugins/reviewer/index.ts +11 -0
- package/src/bundled-plugins/reviewer/reviewer.ts +171 -0
- package/src/bundled-plugins/reviewer/skills/code-review.ts +73 -0
- package/src/bundled-plugins/reviewer/skills/general.ts +68 -0
- package/src/channels/adapters/discord-bot-classify.ts +32 -24
- package/src/channels/adapters/github/inbound.ts +63 -7
- package/src/channels/adapters/github/index.ts +32 -0
- package/src/channels/adapters/kakaotalk-attachment.ts +140 -133
- package/src/channels/adapters/kakaotalk-classify.ts +8 -1
- package/src/channels/adapters/kakaotalk.ts +19 -11
- package/src/channels/adapters/slack-bot-classify.ts +30 -14
- package/src/channels/adapters/slack-bot.ts +3 -2
- package/src/channels/adapters/telegram-bot-classify.ts +36 -13
- package/src/channels/adapters/telegram-bot.ts +3 -3
- package/src/channels/outbound-flood-filter.ts +57 -0
- package/src/channels/router.ts +114 -15
- package/src/channels/types.ts +52 -1
- package/src/cli/builtins.ts +1 -0
- package/src/cli/index.ts +1 -0
- package/src/cli/mount.ts +157 -0
- package/src/cli/update.ts +6 -4
- package/src/config/mounts-mutation.ts +161 -0
- package/src/doctor/channel-checks.ts +328 -0
- package/src/doctor/checks.ts +2 -0
- package/src/init/dockerfile.ts +24 -7
- package/src/init/hatching.ts +1 -1
- package/src/plugin/index.ts +6 -0
- package/src/plugin/load-skill.ts +99 -0
- package/src/run/bundled-plugins.ts +2 -0
- package/src/run/index.ts +31 -1
- package/src/secrets/claude-credentials-json.ts +129 -0
- package/src/secrets/codex-auth-json.ts +67 -0
- package/src/secrets/export-claude-credentials-file.ts +279 -0
- package/src/secrets/export-codex-auth-file.ts +243 -0
- package/src/secrets/index.ts +16 -0
- package/src/server/command-runner.ts +2 -1
- package/src/server/index.ts +3 -2
- package/src/shared/index.ts +7 -1
- package/src/shared/local-time.ts +32 -0
- package/src/skills/typeclaw-channel-github/SKILL.md +47 -13
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +10 -11
- package/src/skills/typeclaw-channel-telegram-bot/SKILL.md +8 -0
- package/src/skills/typeclaw-claude-code/SKILL.md +5 -4
- package/src/skills/typeclaw-claude-code/references/auth-flow.md +35 -0
- package/src/skills/typeclaw-codex-cli/SKILL.md +2 -1
- package/src/skills/typeclaw-codex-cli/references/auth-flow.md +22 -0
- package/src/skills/typeclaw-kaomoji/SKILL.md +116 -0
- package/src/update/index.ts +95 -26
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
bashTool,
|
|
5
|
+
createLoadSkillTool,
|
|
6
|
+
findTool,
|
|
7
|
+
grepTool,
|
|
8
|
+
type LoadableSkill,
|
|
9
|
+
lsTool,
|
|
10
|
+
readTool,
|
|
11
|
+
type Subagent,
|
|
12
|
+
webfetchTool,
|
|
13
|
+
websearchTool,
|
|
14
|
+
} from '@/plugin'
|
|
15
|
+
|
|
16
|
+
import { CODE_REVIEW_SKILL } from './skills/code-review'
|
|
17
|
+
import { GENERAL_REVIEW_SKILL } from './skills/general'
|
|
18
|
+
|
|
19
|
+
// The curated set of review-domain skills the reviewer can load on
|
|
20
|
+
// demand via its `load_skill` tool. Order is the order the model sees
|
|
21
|
+
// in the tool description; put the most common case first so the
|
|
22
|
+
// menu's first impression is the right one for the typical caller.
|
|
23
|
+
//
|
|
24
|
+
// Ship list is intentionally small for the first release. Adding a
|
|
25
|
+
// skill is a one-line append here plus a new file under `./skills/`;
|
|
26
|
+
// no runtime change required.
|
|
27
|
+
export const REVIEWER_SKILLS: readonly LoadableSkill[] = [CODE_REVIEW_SKILL, GENERAL_REVIEW_SKILL]
|
|
28
|
+
|
|
29
|
+
// TODO(#452): Restrict the reviewer's `bash` to git and a curated set of
|
|
30
|
+
// read-only `gh` subcommands once per-subagent bash allowlist support lands.
|
|
31
|
+
// Today the read-only contract is enforced only by this system prompt, the
|
|
32
|
+
// same way `explorer` enforces its own read-only bash usage. The reviewer
|
|
33
|
+
// inherits TypeClaw's global bash guards (`secret-exfil-bash`, `git-exfil`)
|
|
34
|
+
// but has no positive allowlist. See https://github.com/typeclaw/typeclaw/issues/452.
|
|
35
|
+
export const REVIEWER_SYSTEM_PROMPT = `You are a review specialist running inside TypeClaw. Your job: produce a careful, structured review of a target the caller hands you — a code change, a written plan, a design document, a docs update, a draft argument, or anything else that benefits from another pair of eyes — and return findings the caller can act on.
|
|
36
|
+
|
|
37
|
+
You exist to do what \`explorer\` and \`scout\` cannot: deep, model-heavy analysis. Your model has been chosen for quality, not speed — spend tokens on thinking. Read carefully. Cross-check. Form a real opinion.
|
|
38
|
+
|
|
39
|
+
=== READ-ONLY — NO SIDE EFFECTS ===
|
|
40
|
+
You are STRICTLY PROHIBITED from:
|
|
41
|
+
- Creating, modifying, or deleting files (no write/edit tools available)
|
|
42
|
+
- Posting to GitHub, Slack, Discord, email, or any channel — the parent owns posting
|
|
43
|
+
- Pushing, merging, rebasing, or otherwise mutating remote state
|
|
44
|
+
- Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or any write operation
|
|
45
|
+
- Spawning further subagents — you are at the end of the delegation chain
|
|
46
|
+
|
|
47
|
+
Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings.
|
|
48
|
+
|
|
49
|
+
## Tools
|
|
50
|
+
|
|
51
|
+
The runtime exposes these tools to you by these EXACT names — call them by name, do not paraphrase:
|
|
52
|
+
|
|
53
|
+
- \`read\` — read a file when you know the path
|
|
54
|
+
- \`grep\` — search file contents by text or regex
|
|
55
|
+
- \`find\` — locate files by name pattern
|
|
56
|
+
- \`ls\` — list a directory's immediate contents
|
|
57
|
+
- \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`, \`yq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate.
|
|
58
|
+
- \`websearch\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
|
|
59
|
+
- \`webfetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
|
|
60
|
+
- \`load_skill\` — load a curated review skill by name. See the section below.
|
|
61
|
+
|
|
62
|
+
Launch independent tools in parallel. A finding backed by reading the artifact AND a primary source AND an adjacent piece of context is stronger than any one of them alone.
|
|
63
|
+
|
|
64
|
+
## Loading a review skill
|
|
65
|
+
|
|
66
|
+
You are domain-neutral. Specific review craft — what to look for in code, in a plan, in a design, in docs, in a piece of writing — lives in dedicated skills you load on demand.
|
|
67
|
+
|
|
68
|
+
The first thing you do for any review is:
|
|
69
|
+
|
|
70
|
+
1. **Read the payload and identify the target's domain.** What kind of artifact is this? A pull request? A design doc? An RFC? A plan? A piece of marketing copy? Inspect the payload, glance at the target if necessary (one \`read\` or one \`gh pr view\` is fine), then decide.
|
|
71
|
+
2. **Call \`load_skill\` with the matching skill name.** The \`load_skill\` tool's description lists the available skills and what each is for — pick the one whose description fits the target. If none of the domain skills fit, load \`general\`.
|
|
72
|
+
3. **Apply that skill's guidance on top of the universal contract below.** The skill tells you what to look for in this domain, what to ignore, and how to map severity for this kind of artifact. The universal output contract (severity, evidence, suggestion, verdict, \`<review>\` block) does not change.
|
|
73
|
+
|
|
74
|
+
You can load more than one skill if the target genuinely spans domains (e.g. a design doc with code examples — load \`design\`-something AND \`code-review\`). Do this sparingly; each extra skill loaded costs context for marginal gain.
|
|
75
|
+
|
|
76
|
+
Do NOT proceed past step 1 without loading a skill unless you have explicitly decided that no domain skill applies AND that the universal contract alone is sufficient. State the decision in your \`<summary>\` if you take this path.
|
|
77
|
+
|
|
78
|
+
## Universal review philosophy
|
|
79
|
+
|
|
80
|
+
These rules apply to every review regardless of domain.
|
|
81
|
+
|
|
82
|
+
1. **Form findings, not opinions.** Each finding is one issue. State severity (\`blocker\` / \`concern\` / \`nit\` / \`praise\`). Cite specific evidence — a file:line, a diff hunk, a quoted passage. Suggest a concrete alternative.
|
|
83
|
+
2. **Evidence is mandatory.** If you cannot point at a specific location and quote the offending content, the finding is too vague — sharpen it or drop it.
|
|
84
|
+
3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`websearch\`/\`webfetch\` before agreeing or disagreeing. Cite the source in the finding.
|
|
85
|
+
4. **One finding, one concern.** Do not bundle unrelated issues into a single finding. The parent parses findings; mixed-concern findings break that.
|
|
86
|
+
5. **Praise is rare.** Call out non-obvious good work — a tricky invariant carefully preserved, a clear name for a subtle concept, a test that catches an easy-to-miss regression. Do not pad reviews with positivity.
|
|
87
|
+
6. **No generic LLM review noise.** "Consider adding tests" / "improve error handling" / "use better variable names" with no specific location to point at is noise. If you cannot point at a line, do not raise the finding.
|
|
88
|
+
7. **Do not restate the target.** "This function reads a file" is not a finding. "This document discusses X" is not a finding.
|
|
89
|
+
8. **Respect settled conventions.** Style/formatting that a formatter would catch (\`prettier\`, \`oxfmt\`, \`gofmt\`, \`black\`, \`ruff\`, etc.) is not your concern. Project conventions that the target follows are not findings; only deviations are.
|
|
90
|
+
|
|
91
|
+
## Severity scale (universal)
|
|
92
|
+
|
|
93
|
+
- \`blocker\` — Must fix before this lands. Correctness defect, security hole, broken contract, fatal logical error, deal-breaking design flaw, audience-fit problem so severe the artifact cannot be used.
|
|
94
|
+
- \`concern\` — Should fix. Likely-bad outcome, unsupported load-bearing claim, missing test on new behavior, convention violation that will compound, ambiguity that will mislead.
|
|
95
|
+
- \`nit\` — Optional. Style, naming, micro-improvement. The author can decline; do not push back.
|
|
96
|
+
- \`praise\` — Non-obvious good design or careful work worth calling out. Rare on purpose.
|
|
97
|
+
|
|
98
|
+
The loaded skill may refine what counts as each severity for its domain.
|
|
99
|
+
|
|
100
|
+
## Output discipline
|
|
101
|
+
|
|
102
|
+
End every response with a single \`<review>\` block. Use this exact structure:
|
|
103
|
+
|
|
104
|
+
<review>
|
|
105
|
+
<summary>
|
|
106
|
+
[One paragraph: what the target is (in your words), what it is trying to achieve, your overall read. Name the skill(s) you loaded and why. If the target is too large to review meaningfully in one pass, say so here and propose a chunking strategy; produce findings for what you did review.]
|
|
107
|
+
</summary>
|
|
108
|
+
<findings>
|
|
109
|
+
<finding severity="blocker|concern|nit|praise" location="path/to/file.ts:42, diff hunk, paragraph reference, or general">
|
|
110
|
+
<issue>One-sentence statement of the problem.</issue>
|
|
111
|
+
<evidence>Specific quote from the target or a brief description of the observed behavior.</evidence>
|
|
112
|
+
<suggestion>Concrete fix: what to do instead.</suggestion>
|
|
113
|
+
</finding>
|
|
114
|
+
<!-- Repeat per finding. Order: blocker > concern > nit > praise. -->
|
|
115
|
+
</findings>
|
|
116
|
+
<verdict>approve | request-changes | comment</verdict>
|
|
117
|
+
</review>
|
|
118
|
+
|
|
119
|
+
\`approve\` = no blockers; concerns are minor or already addressed.
|
|
120
|
+
\`request-changes\` = at least one blocker, or a load-bearing concern that needs an answer before this lands.
|
|
121
|
+
\`comment\` = neither — useful observations without a clear approve/reject signal (typical for early drafts, exploratory documents, partial reviews).
|
|
122
|
+
|
|
123
|
+
## Rules
|
|
124
|
+
|
|
125
|
+
- Every path you cite MUST be absolute (start with \`/\`) when reviewing local files. PR-diff locations use the diff's own \`path:line\` form. Document references quote the passage.
|
|
126
|
+
- If the target requires information you cannot access (a private system, a file outside this checkout, the caller's stated intent), say so explicitly in \`<summary>\` and review what you can.
|
|
127
|
+
- If you cannot identify the target at all from the payload, return one \`blocker\` finding asking the caller to clarify the target, and a \`comment\` verdict.
|
|
128
|
+
|
|
129
|
+
You have one shot. The parent receives your final assistant message verbatim — make it complete and self-contained.`
|
|
130
|
+
|
|
131
|
+
export const reviewerPayloadSchema = z
|
|
132
|
+
.object({
|
|
133
|
+
requestId: z.string().optional(),
|
|
134
|
+
prompt: z.string().optional(),
|
|
135
|
+
description: z.string().optional(),
|
|
136
|
+
})
|
|
137
|
+
.passthrough()
|
|
138
|
+
|
|
139
|
+
export type ReviewerPayload = z.infer<typeof reviewerPayloadSchema>
|
|
140
|
+
|
|
141
|
+
export function createReviewerSubagent(): Subagent<ReviewerPayload> {
|
|
142
|
+
const loadSkillTool = createLoadSkillTool({
|
|
143
|
+
skills: REVIEWER_SKILLS,
|
|
144
|
+
description: `Load a curated review skill by name. Each skill explains what to look for in one kind of artifact (code, plan, design, docs, etc.) and refines the universal severity scale for that domain. Call this BEFORE forming findings so your review is grounded in the right craft, not generic prose.
|
|
145
|
+
|
|
146
|
+
Available skills:
|
|
147
|
+
${REVIEWER_SKILLS.map((s) => `- \`${s.name}\` — ${s.description}`).join('\n')}
|
|
148
|
+
|
|
149
|
+
If none of the listed skills fit the target, load \`general\` and explain in \`<summary>\` why no domain skill applied.`,
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
systemPrompt: REVIEWER_SYSTEM_PROMPT,
|
|
154
|
+
// `deep` is a conventional profile name (see src/config/config.ts). If the
|
|
155
|
+
// user has not configured `models.deep` in typeclaw.json, `resolveProfile`
|
|
156
|
+
// falls back to `default` with a one-time warning — safe degradation.
|
|
157
|
+
profile: 'deep',
|
|
158
|
+
tools: [readTool, grepTool, findTool, lsTool, bashTool, websearchTool, webfetchTool],
|
|
159
|
+
customTools: [loadSkillTool],
|
|
160
|
+
payloadSchema: reviewerPayloadSchema,
|
|
161
|
+
visibility: 'public',
|
|
162
|
+
inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
163
|
+
toolResultBudget: {
|
|
164
|
+
// Higher than explorer (256KB) because a reviewer typically reads larger
|
|
165
|
+
// diffs and multiple files plus web sources; lower than operator (1MB)
|
|
166
|
+
// because we are read-only and producing analysis, not building.
|
|
167
|
+
maxTotalBytes: 512_000,
|
|
168
|
+
toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'websearch', 'webfetch', 'load_skill'],
|
|
169
|
+
},
|
|
170
|
+
}
|
|
171
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { LoadableSkill } from '@/plugin'
|
|
2
|
+
|
|
3
|
+
export const CODE_REVIEW_SKILL_NAME = 'code-review'
|
|
4
|
+
|
|
5
|
+
export const CODE_REVIEW_SKILL_DESCRIPTION =
|
|
6
|
+
'Review code: a pull request, a commit, a single file, or a module. Covers correctness, security, architecture fit, test coverage, performance, error handling, API surface, naming, and project conventions.'
|
|
7
|
+
|
|
8
|
+
export const CODE_REVIEW_SKILL_CONTENT = `# code-review
|
|
9
|
+
|
|
10
|
+
You have been asked to review code. Apply this guidance on top of the reviewer's neutral output contract (severity-tagged findings, evidence quotes, suggestions, verdict).
|
|
11
|
+
|
|
12
|
+
## How to acquire the target
|
|
13
|
+
|
|
14
|
+
- **PR URL or number** — fetch the diff and the description:
|
|
15
|
+
- \`gh pr diff <n>\` for the unified diff
|
|
16
|
+
- \`gh pr view <n>\` for title, body, labels, linked issues, checks
|
|
17
|
+
- \`gh api /repos/<owner>/<repo>/pulls/<n>\` for the structured payload when you need machine-readable fields
|
|
18
|
+
- **Commit SHA** — \`git show <sha>\` and \`git show <sha> --stat\` for the scope.
|
|
19
|
+
- **File path / module path** — \`read\` the file directly; \`ls\` the parent directory to understand its neighbors; \`grep\` for callers of any function the file exports.
|
|
20
|
+
- **Branch name** — \`git log <branch> ^main --oneline\` to enumerate commits, then \`git diff main...<branch>\` for the cumulative change.
|
|
21
|
+
|
|
22
|
+
## How to build context
|
|
23
|
+
|
|
24
|
+
A finding without context is noise. Before forming findings:
|
|
25
|
+
|
|
26
|
+
1. **Read the change description.** PR body, commit messages, linked issues. The author told you what they intended — verify the code matches.
|
|
27
|
+
2. **Read adjacent code.** A change to one function means reading callers and callees. A change to a class means reading the rest of the class and its subclasses.
|
|
28
|
+
3. **Read the project's conventions.** \`AGENTS.md\`, \`CONTRIBUTING.md\`, \`CLAUDE.md\`, \`README.md\`, the test layout, the linter config. Deviation from established convention is a finding worth raising; following convention is not worth praising.
|
|
29
|
+
4. **Read the tests.** Existing tests show what the project considers important to verify. New tests show what the author considers important to lock in. The gap between them is often where the bugs hide.
|
|
30
|
+
|
|
31
|
+
## What to look for
|
|
32
|
+
|
|
33
|
+
Prioritize in this order:
|
|
34
|
+
|
|
35
|
+
1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
|
|
36
|
+
2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
|
|
37
|
+
3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
|
|
38
|
+
4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason.
|
|
39
|
+
5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.
|
|
40
|
+
6. **Performance.** Quadratic loops in hot paths, missing indexes, unbounded memory accumulation, N+1 queries, blocking I/O in async hot paths. Performance findings need evidence: cite the loop, the data scale, the actual hot path. "Could be slow" without evidence is not a finding.
|
|
41
|
+
7. **API surface.** Breaking changes to exported types, function signatures, CLI flags, env vars, on-disk schemas. Are they documented? Versioned? Migration noted in CHANGELOG / release notes?
|
|
42
|
+
8. **Naming.** Names that lie (a function called \`getUser\` that mutates), names that hide intent (\`data\`, \`info\`, \`tmp\`), names that don't match the project's vocabulary.
|
|
43
|
+
|
|
44
|
+
## What NOT to find
|
|
45
|
+
|
|
46
|
+
- **Formatter / linter territory.** If the project has \`prettier\`, \`oxfmt\`, \`gofmt\`, \`black\`, \`ruff\`, \`eslint\`, etc., assume it ran. Do not raise spacing, trailing commas, single-vs-double quotes, line length, or import order.
|
|
47
|
+
- **Settled convention objections.** If the project uses tabs, four-space indent, camelCase vs snake_case, etc., and the change matches, that is not a finding. Only the deviation is.
|
|
48
|
+
- **Generic best-practice essays.** "Consider adding more tests" without naming a specific untested branch is noise. "Improve error handling" without pointing at a specific swallowed error is noise.
|
|
49
|
+
- **Restating the code.** "This function reads the file and returns its contents" is not a finding.
|
|
50
|
+
|
|
51
|
+
## Severity hints specific to code
|
|
52
|
+
|
|
53
|
+
- **blocker** — Correctness bug that will misbehave for users. Security vulnerability. Broken backward compatibility without migration. Crashing path on common input. Deleted tests without justification.
|
|
54
|
+
- **concern** — Likely-bad outcome that hasn't bitten yet (missing timeout, unbounded retry, edge case ignored). Test gap on the new behavior. Architectural deviation that compounds.
|
|
55
|
+
- **nit** — Naming, micro-readability, suboptimal-but-correct code. Optional. The author can decline and you should not push back.
|
|
56
|
+
- **praise** — Non-obvious good design: a tricky invariant carefully preserved, a test that catches a subtle regression, a name that captures the domain precisely. Rare on purpose.
|
|
57
|
+
|
|
58
|
+
## Verdict mapping
|
|
59
|
+
|
|
60
|
+
- **approve** — Zero blockers. Concerns are minor, isolated, or already discussed.
|
|
61
|
+
- **request-changes** — At least one blocker, OR a load-bearing concern that needs an answer before this lands.
|
|
62
|
+
- **comment** — Mixed signal: useful observations without a clear approve/reject. Common on large refactors where you reviewed part of the change, or on early-draft PRs where the author asked for direction more than approval.
|
|
63
|
+
|
|
64
|
+
## Final output
|
|
65
|
+
|
|
66
|
+
Return findings inside the reviewer's neutral \`<review>\` block. Do NOT invent your own output format. The parent agent parses the structured shape.
|
|
67
|
+
`
|
|
68
|
+
|
|
69
|
+
export const CODE_REVIEW_SKILL: LoadableSkill = {
|
|
70
|
+
name: CODE_REVIEW_SKILL_NAME,
|
|
71
|
+
description: CODE_REVIEW_SKILL_DESCRIPTION,
|
|
72
|
+
content: CODE_REVIEW_SKILL_CONTENT,
|
|
73
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { LoadableSkill } from '@/plugin'
|
|
2
|
+
|
|
3
|
+
export const GENERAL_REVIEW_SKILL_NAME = 'general'
|
|
4
|
+
|
|
5
|
+
export const GENERAL_REVIEW_SKILL_DESCRIPTION =
|
|
6
|
+
'Fallback for review targets that do not fit a specific domain skill: a written argument, a proposal, a draft, a mixed-format artifact. Apply the universal review philosophy without domain-specific shortcuts.'
|
|
7
|
+
|
|
8
|
+
export const GENERAL_REVIEW_SKILL_CONTENT = `# general
|
|
9
|
+
|
|
10
|
+
You have been asked to review something that does not clearly fit a specific domain skill (not a code PR, not a plan, not a design doc, not docs — or it is a mix). Apply the universal review philosophy on top of the reviewer's neutral output contract.
|
|
11
|
+
|
|
12
|
+
## How to acquire the target
|
|
13
|
+
|
|
14
|
+
- **A URL** — \`webfetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
|
|
15
|
+
- **A file path** — \`read\` it. \`ls\` the parent directory if siblings might be relevant.
|
|
16
|
+
- **Inline text in the payload** — read the payload carefully; quote from it when forming evidence.
|
|
17
|
+
- **A reference to something the caller has** — ask the caller to provide it. Return a single \`blocker\` finding describing what you need and a \`comment\` verdict.
|
|
18
|
+
|
|
19
|
+
## How to read carefully
|
|
20
|
+
|
|
21
|
+
A general review is the hardest because there are no domain shortcuts. Replace shortcuts with discipline:
|
|
22
|
+
|
|
23
|
+
1. **State the target's purpose in your own words.** What is the artifact trying to achieve? Who is it for? Put this in \`<summary>\`. If you cannot state it after reading, that itself is a finding — the artifact does not communicate its purpose.
|
|
24
|
+
2. **Identify the load-bearing claims.** What does the artifact assert that, if wrong, would invalidate the whole thing? List them mentally before looking for issues.
|
|
25
|
+
3. **Stress-test the load-bearing claims.** For each one: is the evidence sufficient? Are the assumptions stated? Are the counter-arguments addressed?
|
|
26
|
+
4. **Stress-test the boundaries.** Where does the artifact's argument or design stop applying? Does it acknowledge that boundary, or does it overgeneralize?
|
|
27
|
+
5. **Stress-test the audience fit.** Will the intended reader understand it? Is the prerequisite knowledge stated? Are the unstated assumptions reasonable for that audience?
|
|
28
|
+
|
|
29
|
+
## What to look for
|
|
30
|
+
|
|
31
|
+
- **Internal contradiction.** Two statements that cannot both be true. The artifact must reconcile them or pick one.
|
|
32
|
+
- **Unsupported claims.** Any assertion the artifact relies on but does not justify. The author may have a reason — say so and ask, do not assume incompetence.
|
|
33
|
+
- **Hidden assumptions.** Things the argument quietly requires to be true but does not state. These are the most common failure mode in general writing.
|
|
34
|
+
- **Missing alternatives.** If the artifact recommends X, did it explain why not Y? A serious proposal acknowledges the alternatives it rejected.
|
|
35
|
+
- **Scope drift.** The artifact promises to cover A but spends half its bytes on B. Either the scope is wrong or the title is wrong.
|
|
36
|
+
- **Verifiability.** If the artifact claims success criteria, are they measurable? "Better performance" with no metric is unverifiable.
|
|
37
|
+
- **Logical structure.** Premises → reasoning → conclusion. Where the chain breaks, point at the break.
|
|
38
|
+
|
|
39
|
+
## What NOT to find
|
|
40
|
+
|
|
41
|
+
- **Stylistic preferences.** Sentence rhythm, word choice variation, paragraph length. Skip unless they actively impede understanding.
|
|
42
|
+
- **Re-summarizing the artifact as a finding.** "This document discusses X" is not a review.
|
|
43
|
+
- **Generic feedback.** "Could be clearer" without pointing at a specific passage is noise.
|
|
44
|
+
- **Disagreements that are taste, not error.** If the author chose path A and you would have chosen B, that is not a finding unless A is actually worse for a stated reason.
|
|
45
|
+
|
|
46
|
+
## Severity hints
|
|
47
|
+
|
|
48
|
+
- **blocker** — A logical break, a fatal contradiction, a load-bearing claim that is verifiably false, an audience-fit problem so severe the intended reader cannot use the artifact.
|
|
49
|
+
- **concern** — An unsupported claim that needs justification, a missing alternative that weakens the recommendation, a scope ambiguity that will mislead readers.
|
|
50
|
+
- **nit** — A small clarity issue, a passage that could be tightened, a minor inconsistency.
|
|
51
|
+
- **praise** — A non-obvious insight, a tricky trade-off well-handled, a passage that earns the reader's trust. Rare.
|
|
52
|
+
|
|
53
|
+
## Verdict mapping
|
|
54
|
+
|
|
55
|
+
- **approve** — No blockers. The artifact stands on its own.
|
|
56
|
+
- **request-changes** — At least one blocker.
|
|
57
|
+
- **comment** — Useful observations without a clean accept/reject. Common for early drafts, exploratory documents, or partial reviews.
|
|
58
|
+
|
|
59
|
+
## Final output
|
|
60
|
+
|
|
61
|
+
Return findings inside the reviewer's neutral \`<review>\` block. Do NOT invent your own output format.
|
|
62
|
+
`
|
|
63
|
+
|
|
64
|
+
export const GENERAL_REVIEW_SKILL: LoadableSkill = {
|
|
65
|
+
name: GENERAL_REVIEW_SKILL_NAME,
|
|
66
|
+
description: GENERAL_REVIEW_SKILL_DESCRIPTION,
|
|
67
|
+
content: GENERAL_REVIEW_SKILL_CONTENT,
|
|
68
|
+
}
|
|
@@ -6,7 +6,7 @@ import type {
|
|
|
6
6
|
} from 'agent-messenger/discordbot'
|
|
7
7
|
|
|
8
8
|
import type { ChannelAdapterConfig } from '@/channels/schema'
|
|
9
|
-
import type { InboundMessage } from '@/channels/types'
|
|
9
|
+
import type { InboundAttachment, InboundMessage } from '@/channels/types'
|
|
10
10
|
|
|
11
11
|
export type InboundDropReason =
|
|
12
12
|
| 'self_author' // event.author.id === botUserId; we never route our own messages back to ourselves
|
|
@@ -35,7 +35,7 @@ export function classifyInbound(
|
|
|
35
35
|
if (botUserId !== null && event.author.id === botUserId) {
|
|
36
36
|
return { kind: 'drop', reason: 'self_author' }
|
|
37
37
|
}
|
|
38
|
-
const text =
|
|
38
|
+
const { text, attachments } = splitInbound(event)
|
|
39
39
|
if (text === '') return { kind: 'drop', reason: 'empty_content' }
|
|
40
40
|
|
|
41
41
|
const isDm = event.guild_id === undefined
|
|
@@ -80,6 +80,7 @@ export function classifyInbound(
|
|
|
80
80
|
chat: event.channel_id,
|
|
81
81
|
thread: null,
|
|
82
82
|
text,
|
|
83
|
+
...(attachments.length > 0 ? { attachments } : {}),
|
|
83
84
|
externalMessageId: event.id,
|
|
84
85
|
authorId: event.author.id,
|
|
85
86
|
// Discord's post-2023 username system allows pure-numeric handles (e.g.
|
|
@@ -107,38 +108,45 @@ function isReplyToBot(event: DiscordGatewayMessageCreateEvent, botUserId: string
|
|
|
107
108
|
return (event.mentions ?? []).some((m) => m.id === botUserId)
|
|
108
109
|
}
|
|
109
110
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
const
|
|
114
|
-
|
|
111
|
+
type SplitInbound = { text: string; attachments: InboundAttachment[] }
|
|
112
|
+
|
|
113
|
+
function splitInbound(event: DiscordGatewayMessageCreateEvent): SplitInbound {
|
|
114
|
+
const attachments = describeDiscordMedia(event)
|
|
115
|
+
if (attachments.length === 0) return { text: event.content, attachments: [] }
|
|
116
|
+
const summary = attachments.map(renderPlaceholder).join('\n')
|
|
117
|
+
const text = event.content === '' ? summary : `${event.content}\n${summary}`
|
|
118
|
+
return { text, attachments }
|
|
115
119
|
}
|
|
116
120
|
|
|
117
|
-
function
|
|
121
|
+
function describeDiscordMedia(event: DiscordGatewayMessageCreateEvent): InboundAttachment[] {
|
|
118
122
|
return [
|
|
119
|
-
...(event.attachments ?? []).map(
|
|
120
|
-
...(event.embeds ?? []).map(
|
|
121
|
-
...(event.sticker_items ?? []).map(
|
|
122
|
-
]
|
|
123
|
+
...(event.attachments ?? []).map(describeAttachment),
|
|
124
|
+
...(event.embeds ?? []).map(describeEmbed),
|
|
125
|
+
...(event.sticker_items ?? []).map(describeSticker),
|
|
126
|
+
].map((attachment, index) => ({ ...attachment, id: index + 1 }))
|
|
123
127
|
}
|
|
124
128
|
|
|
125
|
-
function
|
|
126
|
-
return
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
attachment.
|
|
130
|
-
|
|
129
|
+
function describeAttachment(attachment: DiscordFile): Omit<InboundAttachment, 'id'> {
|
|
130
|
+
return {
|
|
131
|
+
kind: 'file',
|
|
132
|
+
ref: attachment.url,
|
|
133
|
+
filename: attachment.filename,
|
|
134
|
+
...(attachment.content_type !== undefined ? { mimetype: attachment.content_type } : {}),
|
|
135
|
+
}
|
|
131
136
|
}
|
|
132
137
|
|
|
133
|
-
function
|
|
138
|
+
function describeEmbed(embed: DiscordGatewayEmbed): Omit<InboundAttachment, 'id'> {
|
|
134
139
|
const label = embed.title ?? embed.description ?? embed.url ?? embed.type ?? 'embed'
|
|
135
|
-
return
|
|
140
|
+
return { kind: 'embed', ref: embed.url ?? '', filename: label }
|
|
136
141
|
}
|
|
137
142
|
|
|
138
|
-
function
|
|
139
|
-
return
|
|
143
|
+
function describeSticker(sticker: DiscordGatewayStickerItem): Omit<InboundAttachment, 'id'> {
|
|
144
|
+
return { kind: 'sticker', ref: '', filename: sticker.name }
|
|
140
145
|
}
|
|
141
146
|
|
|
142
|
-
function
|
|
143
|
-
|
|
147
|
+
function renderPlaceholder(attachment: InboundAttachment): string {
|
|
148
|
+
const parts: string[] = [`Discord attachment #${attachment.id}: ${attachment.kind}`]
|
|
149
|
+
if (attachment.mimetype !== undefined) parts.push(attachment.mimetype)
|
|
150
|
+
if (attachment.filename !== undefined) parts.push(`name=${attachment.filename}`)
|
|
151
|
+
return `[${parts.join(' ')}]`
|
|
144
152
|
}
|
|
@@ -44,11 +44,17 @@ export function createGithubWebhookHandler(options: GithubWebhookHandlerOptions)
|
|
|
44
44
|
if (!isGithubEventAllowed(options.allowlist(), event, action)) return ok()
|
|
45
45
|
|
|
46
46
|
const selfId = options.selfId()
|
|
47
|
-
const
|
|
48
|
-
|
|
47
|
+
const selfLogin = options.selfLogin()
|
|
48
|
+
const author = readAuthor(event, payload)
|
|
49
|
+
if (author !== null && isSelfAuthor(author, selfId, selfLogin)) {
|
|
50
|
+
options.logger.info(
|
|
51
|
+
`[github] dropped self-authored ${event}${action !== null ? `.${action}` : ''} from @${author.login}`,
|
|
52
|
+
)
|
|
53
|
+
return ok()
|
|
54
|
+
}
|
|
49
55
|
|
|
50
56
|
const teamIsBotMember = await resolveTeamMembership(event, payload, options)
|
|
51
|
-
const classified = classifyGithubInbound(event, payload,
|
|
57
|
+
const classified = classifyGithubInbound(event, payload, selfLogin, {
|
|
52
58
|
teamIsBotMember,
|
|
53
59
|
})
|
|
54
60
|
if (classified === null) return ok()
|
|
@@ -357,13 +363,63 @@ function readRepository(payload: Record<string, unknown>): { owner: string; name
|
|
|
357
363
|
return { owner: ownerLogin, name }
|
|
358
364
|
}
|
|
359
365
|
|
|
360
|
-
function readAuthor(payload: Record<string, unknown>): GithubUser | null {
|
|
361
|
-
const
|
|
362
|
-
for (const candidate of candidates) {
|
|
366
|
+
function readAuthor(event: string, payload: Record<string, unknown>): GithubUser | null {
|
|
367
|
+
for (const candidate of eventAuthorCandidates(event, payload)) {
|
|
363
368
|
const user = readUser(readRecord(candidate)?.user)
|
|
364
369
|
if (user !== null) return user
|
|
365
370
|
}
|
|
366
|
-
|
|
371
|
+
// Every GitHub webhook payload carries `sender` — the actor who triggered the
|
|
372
|
+
// delivery. It is the universal fallback so events not enumerated above (and
|
|
373
|
+
// any future ones the user adds to eventAllowlist) still drop self-authored
|
|
374
|
+
// deliveries instead of slipping past the guard.
|
|
375
|
+
return readUser(payload.sender)
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Maps each event to the entity whose `user` is the true author of THIS event,
|
|
379
|
+
// listed before broader containers. A pull_request_review payload ships both
|
|
380
|
+
// `pull_request` (the PR author) and `review` (the reviewer); the self-author
|
|
381
|
+
// drop must see the reviewer, so `review` must come first. PR #455's flat order
|
|
382
|
+
// (`pull_request` before `review`) made a self-review on someone else's PR
|
|
383
|
+
// resolve to the PR author, slip past the drop, and loop (see PR #460).
|
|
384
|
+
//
|
|
385
|
+
// `pull_request` and `pull_request_review_thread` carry only the `pull_request`
|
|
386
|
+
// container, whose `user` is the PR OPENER — not the actor of this delivery.
|
|
387
|
+
// For these events the self-author question is "who triggered the action?"
|
|
388
|
+
// (review_requested, edited, reopened, resolved, …), which is always
|
|
389
|
+
// `payload.sender`, never the opener. Mapping them to `[]` makes readAuthor
|
|
390
|
+
// skip the opener and fall through to the `sender` fallback. PR #462's
|
|
391
|
+
// `['pull_request']` resolved to the opener, so a human action on a
|
|
392
|
+
// bot-opened PR matched the bot and was wrongly dropped (the inbound landed
|
|
393
|
+
// as awareness-only "Recent context" and the agent never replied).
|
|
394
|
+
const PRIMARY_AUTHOR_KEYS: Record<string, readonly string[]> = {
|
|
395
|
+
issue_comment: ['comment'],
|
|
396
|
+
pull_request_review_comment: ['comment'],
|
|
397
|
+
discussion_comment: ['comment'],
|
|
398
|
+
commit_comment: ['comment'],
|
|
399
|
+
pull_request_review: ['review'],
|
|
400
|
+
pull_request_review_thread: [],
|
|
401
|
+
issues: ['issue'],
|
|
402
|
+
pull_request: [],
|
|
403
|
+
discussion: ['discussion'],
|
|
404
|
+
release: ['release'],
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const FALLBACK_AUTHOR_KEYS = ['comment', 'review', 'issue', 'pull_request', 'discussion', 'release'] as const
|
|
408
|
+
|
|
409
|
+
function eventAuthorCandidates(event: string, payload: Record<string, unknown>): unknown[] {
|
|
410
|
+
const keys = PRIMARY_AUTHOR_KEYS[event] ?? FALLBACK_AUTHOR_KEYS
|
|
411
|
+
return keys.map((key) => payload[key])
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Matches by id OR login. Issue #452 captured a self-responding loop where
|
|
415
|
+
// the id-only guard didn't fire and the bot replied to its own comments ~8
|
|
416
|
+
// times in a row. Login is the second line of defense and aligns with the
|
|
417
|
+
// slack/discord/telegram/kakaotalk adapters, which all drop self-authored
|
|
418
|
+
// events at the classifier layer.
|
|
419
|
+
function isSelfAuthor(author: GithubUser, selfId: string | null, selfLogin: string | null): boolean {
|
|
420
|
+
if (selfId !== null && String(author.id) === selfId) return true
|
|
421
|
+
if (selfLogin !== null && author.login === selfLogin) return true
|
|
422
|
+
return false
|
|
367
423
|
}
|
|
368
424
|
|
|
369
425
|
type GithubUser = { login: string; id: number; type?: string }
|
|
@@ -53,6 +53,14 @@ export type GithubAdapterOptions = {
|
|
|
53
53
|
// Test-only: replaces the wall-clock sleep used for the registration
|
|
54
54
|
// delay above. Production leaves it undefined and we use `setTimeout`.
|
|
55
55
|
sleep?: (ms: number) => Promise<void>
|
|
56
|
+
// How often to proactively refresh the token and update GH_TOKEN
|
|
57
|
+
// when the adapter is running but has not made an outbound API call
|
|
58
|
+
// recently. Zero disables the background refresh entirely.
|
|
59
|
+
// Default: 30 minutes.
|
|
60
|
+
tokenRefreshIntervalMs?: number
|
|
61
|
+
// Test-only: replaces `setInterval` so tests can control when the
|
|
62
|
+
// background refresh fires without waiting on real wall-clock time.
|
|
63
|
+
setInterval?: (handler: () => void, ms: number) => { clear: () => void }
|
|
56
64
|
}
|
|
57
65
|
|
|
58
66
|
export type GithubAdapter = {
|
|
@@ -68,6 +76,7 @@ const consoleLogger: GithubAdapterLogger = {
|
|
|
68
76
|
}
|
|
69
77
|
|
|
70
78
|
const DEFAULT_WEBHOOK_REGISTRATION_DELAY_MS = 2_000
|
|
79
|
+
const DEFAULT_TOKEN_REFRESH_INTERVAL_MS = 30 * 60 * 1000
|
|
71
80
|
|
|
72
81
|
export function createGithubAdapter(options: GithubAdapterOptions): GithubAdapter {
|
|
73
82
|
const logger = options.logger ?? consoleLogger
|
|
@@ -83,6 +92,7 @@ export function createGithubAdapter(options: GithubAdapterOptions): GithubAdapte
|
|
|
83
92
|
let selfLogin: string | null = null
|
|
84
93
|
let started = false
|
|
85
94
|
let managedHooks: ReadonlyArray<{ repo: string; hookId: number }> = []
|
|
95
|
+
let tokenRefreshTimer: { clear: () => void } | null = null
|
|
86
96
|
const workspaceByChat = new Map<string, string>()
|
|
87
97
|
|
|
88
98
|
const rememberWorkspace = (workspace: string, chat: string): void => {
|
|
@@ -168,6 +178,24 @@ export function createGithubAdapter(options: GithubAdapterOptions): GithubAdapte
|
|
|
168
178
|
// automatically when within 5 minutes of expiry.
|
|
169
179
|
process.env.GH_TOKEN = await auth.token()
|
|
170
180
|
started = true
|
|
181
|
+
// Keep GH_TOKEN warm even when the adapter is only receiving inbound
|
|
182
|
+
// webhooks and not making outbound API calls. This prevents `gh` CLI
|
|
183
|
+
// calls from the agent from failing with 401 after the token expires.
|
|
184
|
+
const tokenRefreshIntervalMs = options.tokenRefreshIntervalMs ?? DEFAULT_TOKEN_REFRESH_INTERVAL_MS
|
|
185
|
+
if (tokenRefreshIntervalMs > 0) {
|
|
186
|
+
const refresh = () => {
|
|
187
|
+
tokenFn().catch((err) => {
|
|
188
|
+
logger.error(`[github] periodic token refresh failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
189
|
+
})
|
|
190
|
+
}
|
|
191
|
+
const setIntervalFn =
|
|
192
|
+
options.setInterval ??
|
|
193
|
+
((handler: () => void, ms: number) => {
|
|
194
|
+
const timer = setInterval(handler, ms)
|
|
195
|
+
return { clear: () => clearInterval(timer) }
|
|
196
|
+
})
|
|
197
|
+
tokenRefreshTimer = setIntervalFn(refresh, tokenRefreshIntervalMs)
|
|
198
|
+
}
|
|
171
199
|
logger.info(`[github] webhook listening on port ${options.configRef().webhookPort} as @${self.login}`)
|
|
172
200
|
// Best-effort: App-only preflight that compares the installation's granted
|
|
173
201
|
// permissions against the configured eventAllowlist and warns about gaps.
|
|
@@ -241,6 +269,10 @@ export function createGithubAdapter(options: GithubAdapterOptions): GithubAdapte
|
|
|
241
269
|
logDeregistrationOutcome(logger, deregistration)
|
|
242
270
|
managedHooks = []
|
|
243
271
|
}
|
|
272
|
+
if (tokenRefreshTimer !== null) {
|
|
273
|
+
tokenRefreshTimer.clear()
|
|
274
|
+
tokenRefreshTimer = null
|
|
275
|
+
}
|
|
244
276
|
await auth.dispose()
|
|
245
277
|
delete process.env.GH_TOKEN
|
|
246
278
|
server = null
|