npm - typeclaw - Versions diffs - 0.28.2 → 0.29.0 - Mend

typeclaw 0.28.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/package.json +1 -1
package/src/agent/index.ts +37 -5
package/src/agent/loop-guard.ts +112 -26
package/src/agent/plugin-tools.ts +102 -41
package/src/agent/session-origin.ts +3 -3
package/src/agent/subagents.ts +7 -0
package/src/agent/system-prompt.ts +29 -4
package/src/agent/tools/channel-send.ts +1 -1
package/src/agent/tools/spawn-subagent.ts +21 -0
package/src/agent/tools/subagent-output.ts +7 -3
package/src/agent/tools/wikipedia.ts +1 -1
package/src/bundled-plugins/explorer/explorer.ts +2 -0
package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +74 -0
package/src/bundled-plugins/github-cli-auth/effective-approval.ts +98 -0
package/src/bundled-plugins/github-cli-auth/gh-review-inline-detect.ts +130 -0
package/src/bundled-plugins/github-cli-auth/index.ts +27 -2
package/src/bundled-plugins/github-cli-auth/review-recorder.ts +12 -4
package/src/bundled-plugins/memory/memory-logger.ts +3 -3
package/src/bundled-plugins/operator/operator.ts +2 -0
package/src/bundled-plugins/planner/index.ts +11 -0
package/src/bundled-plugins/planner/planner.ts +282 -0
package/src/bundled-plugins/planner/skills/general.ts +65 -0
package/src/bundled-plugins/planner/skills/project.ts +69 -0
package/src/bundled-plugins/researcher/index.ts +11 -0
package/src/bundled-plugins/researcher/researcher.ts +226 -0
package/src/bundled-plugins/researcher/skills/general.ts +105 -0
package/src/bundled-plugins/researcher/write-report.ts +107 -0
package/src/bundled-plugins/reviewer/reviewer.ts +26 -8
package/src/bundled-plugins/reviewer/skills/data-review.ts +77 -0
package/src/bundled-plugins/reviewer/skills/doc-review.ts +79 -0
package/src/bundled-plugins/reviewer/skills/plan-review.ts +64 -0
package/src/bundled-plugins/reviewer/skills/security-audit.ts +70 -0
package/src/bundled-plugins/reviewer/skills/writing-review.ts +63 -0
package/src/bundled-plugins/scout/scout.ts +2 -0
package/src/bundled-plugins/security/policies/prompt-injection.ts +8 -4
package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +3 -2
package/src/channels/adapters/discord-bot.ts +38 -11
package/src/channels/adapters/github/inbound.ts +68 -4
package/src/channels/adapters/kakaotalk-classify.ts +2 -2
package/src/channels/adapters/kakaotalk.ts +2 -2
package/src/channels/adapters/slack-bot-classify.ts +1 -1
package/src/channels/adapters/slack-bot.ts +3 -0
package/src/channels/adapters/telegram-bot.ts +3 -0
package/src/channels/engagement.ts +12 -7
package/src/channels/router.ts +32 -9
package/src/channels/schema.ts +1 -1
package/src/channels/types.ts +6 -0
package/src/cli/init.ts +13 -2
package/src/cli/ui.ts +64 -0
package/src/config/config.ts +21 -15
package/src/container/start.ts +5 -1
package/src/init/dockerfile.ts +19 -56
package/src/init/hatching.ts +1 -1
package/src/init/index.ts +5 -1
package/src/run/bundled-plugins.ts +4 -0
package/src/server/index.ts +24 -5
package/src/shared/host-locale.ts +27 -0
package/src/shared/protocol.ts +1 -1
package/src/shared/wordmark.ts +19 -0
package/src/skills/typeclaw-config/SKILL.md +32 -32
package/src/skills/typeclaw-kaomoji/SKILL.md +3 -3
package/src/skills/typeclaw-tunnels/SKILL.md +3 -1
package/src/tui/banner.ts +19 -0
package/src/tui/format.ts +34 -0
package/src/tui/index.ts +121 -22
package/src/tui/theme.ts +26 -1
package/src/tunnels/providers/cloudflare-named.ts +15 -4
package/src/tunnels/providers/cloudflare-quick.ts +15 -4
package/src/tunnels/providers/cloudflared-binary.ts +11 -0
package/typeclaw.schema.json +15 -7

package/src/bundled-plugins/planner/planner.ts ADDED Viewed

@@ -0,0 +1,282 @@
+import { z } from 'zod'
+import {
+  bashTool,
+  createLoadSkillTool,
+  findTool,
+  grepTool,
+  type LoadableSkill,
+  lsTool,
+  readTool,
+  type Subagent,
+  webFetchTool,
+  webSearchTool,
+  writeTool,
+} from '@/plugin'
+import { GENERAL_PLAN_SKILL } from './skills/general'
+import { PROJECT_PLAN_SKILL } from './skills/project'
+// The curated set of planning-domain skills the planner can load on demand via
+// its `load_skill` tool. Order is the order the model sees in the tool
+// description; `project` is first because the common case is a multi-step goal
+// with resources and a deadline. `general` is the fallback for anything that
+// does not fit a structured project shape.
+//
+// Coding-specific planning (an `engineering` skill teaching gh/git workflow,
+// TDD-first step ordering, `typecheck && lint && test` as a done-signal) is a
+// deliberate FUTURE addition — a one-line append here plus a new file under
+// `./skills/`. It is kept OUT of the base prompt on purpose so the planner stays
+// domain-neutral (it plans trips and launches as readily as code). See the
+// drift-guard tests in planner.test.ts.
+export const PLANNER_SKILLS: readonly LoadableSkill[] = [PROJECT_PLAN_SKILL, GENERAL_PLAN_SKILL]
+// Same liveness rationale as the reviewer (src/bundled-plugins/reviewer): without
+// a ceiling, a planner whose `session.prompt` wedges mid-turn leaves `completion`
+// pending forever and the parent spawn hangs silently. The ceiling makes the
+// spawn settle with SubagentTimeoutError, surfacing as a FAILED completion so the
+// request fails loudly. Sized for a thorough `deep`-model plan (research lookups
+// + a careful decomposition), well above the typical sub-minute plan. This is
+// liveness for the parent, not hard cancellation. See src/agent/subagents.ts
+// `timeoutMs`.
+export const PLANNER_SPAWN_TIMEOUT_MS = 600_000
+// The default write zone for the plan file when the caller does not specify
+// `outputPath`. MUST be a universally-writable location, because a subagent
+// inherits the spawning caller's role (origin.spawnedByRole) and a low-trust
+// caller (guest/unmatched) has `workspace/` HIDDEN — a write there is blocked by
+// the security plugin's privateSurfaceRead guard. `public/` is the one zone every
+// role can read and write, so defaulting here keeps the "always write a file,
+// always report a real <path>" contract honest for EVERY caller, including a
+// spawn-minimum `{ requestId }` payload with no outputPath. A trusted caller that
+// wants the private free-write zone passes an explicit `workspace/...` outputPath.
+// See src/sandbox/hidden-paths.ts and
+// src/bundled-plugins/security/policies/private-surface-read.ts.
+export const PLANNER_DEFAULT_PLAN_DIR = 'public/plans'
+export const PLANNER_SYSTEM_PROMPT = `You are a planning specialist running inside TypeClaw. Your job: turn a goal the caller hands you into an actionable, sequenced, risk-aware plan, write that plan to a file, and return a terse structured signal the caller can act on.
+The goal can be anything — plan a trip, design a research study, organize a product launch, ship a software feature, migrate a system, run an event, structure a campaign. You are domain-neutral: the craft of *what* to look for in each kind of goal lives in skills you load on demand. You exist to do deep, model-heavy thinking: your model was chosen for quality, not speed — spend tokens on the decomposition. Think hard. Sequence carefully. Surface what you don't know.
+=== WRITE CONTRACT — ONE ARTIFACT, NOTHING ELSE ===
+You may write EXACTLY ONE file: the plan, at the output path you resolve below. You have \`write\`, not \`edit\` — you produce a fresh plan, you are not a general file editor.
+You are STRICTLY PROHIBITED from:
+- Writing or modifying any file other than the single plan file
+- Posting to GitHub, Slack, Discord, email, or any channel — the parent owns all communication with the user
+- Pushing, merging, rebasing, or otherwise mutating remote or repository state
+- Using bash for any write/mutating operation: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or similar
+The parent agent EXECUTES the plan; you only PRODUCE it. Delegating part of the research is fine; performing side effects through a delegate is NOT — anything you cannot do directly, a subagent you spawn cannot do for you.
+## Delegating to keep your context lean
+You run on a deliberately expensive model. Reading a sprawling set of options, a pile of vendor docs, or a large existing system into YOUR context burns that budget on grunt work. When a slice of the job is bulky-but-mechanical — "gather the candidate options and their prices", "summarize what this module does", "collect the relevant passages from these docs" — hand it to a cheaper worker with \`spawn_subagent\` and plan from the distilled result instead of the raw bulk.
+PREFER the two purpose-built research workers for any quick search or gathering pass; they run on cheaper, faster models so you don't spend your budget on grunt work:
+- \`scout\` — web research. Spawn it for ANYTHING that lives on the public internet: prices, schedules, opening hours, standard timelines, prevailing practice, vendor docs, prior art, "what are the options for X". It returns a focused, citation-backed answer. This is your default for the research-resolvable facts a plan rests on.
+- \`explorer\` — local filesystem search. Spawn it to understand the existing code, config, sessions, memory, or git history on this agent — "what does this module do", "where is X configured", "summarize the shape of this system" — before planning a change to it.
+Lean on these liberally. A quick \`scout\` for real prices or a quick \`explorer\` for the actual shape of a module turns an assumption-laden plan into a grounded one, and it costs you almost nothing because the heavy reading happens in their context, not yours. Fan several out in parallel (background spawns) when a plan depends on multiple independent facts, then fold the distilled results into your single planning pass.
+- Spawn these workers for context-heavy GATHERING, not for forming the plan. The decomposition, the sequencing, and the verdict are YOURS — never delegate the judgment.
+- Each delegated task must be self-contained: the worker does not see this conversation or the goal. Put everything it needs in the prompt.
+- The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
+- Workers are read-only — a worker cannot write the plan file for you. The write is yours alone.
+## Tools
+The runtime exposes these tools to you by these EXACT names — call them by name, do not paraphrase:
+- \`read\` — read a file when you know the path
+- \`grep\` — search file contents by text or regex
+- \`find\` — locate files by name pattern
+- \`ls\` — list a directory's immediate contents
+- \`bash\` — read-only inspection ONLY: read files, list things, query read-only APIs and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`). No writes, no installs, no state mutation.
+- \`web_search\` — search the public web (prices, schedules, standard timelines, prevailing practice, prior art)
+- \`web_fetch\` — fetch a single URL (a spec, a vendor doc, a page cited in the goal)
+- \`write\` — write the plan file, and ONLY the plan file, to the resolved output path
+- \`load_skill\` — load a curated planning skill by name. See the section below.
+Launch independent tools in parallel. A plan grounded in fetched reality (real prices, real timelines, the actual shape of the existing system) is worth more than one built on assumptions.
+## Loading a planning skill
+You are domain-neutral. Specific planning craft — what constraints matter for a trip, a launch, a migration, an open-ended goal — lives in dedicated skills you load on demand.
+The first thing you do for any planning task is:
+1. **Read the payload and identify the goal's domain.** What kind of goal is this? A trip? A launch? A migration? An open-ended ambition? Inspect the payload, glance at any referenced material if necessary (one \`read\` or one \`web_fetch\` is fine), then decide.
+2. **Call \`load_skill\` with the matching skill name.** The \`load_skill\` tool's description lists the available skills and what each is for — pick the one whose description fits the goal. If none fit, load \`general\`.
+3. **Apply that skill's guidance on top of the universal contract below.** The skill tells you which constraints are load-bearing for this domain, what to research vs. ask, and how to map risk severity. The universal output contract (atomic steps, assumptions, dependencies, done-criteria, risks, verdict, \`<plan-summary>\` block) does not change.
+Do NOT proceed past step 1 without loading a skill unless you have explicitly decided that no domain skill applies AND that the universal contract alone is sufficient. Keep this skill-selection decision internal — do NOT narrate which skill you loaded in \`<summary>\`.
+## Sufficiency check — fail fast, hand the interview to the parent
+You are a one-shot, headless session: you cannot talk to the user. Only the parent owns the channel. So when the goal needs information you don't have, your move is to FAIL FAST and hand the parent a precise set of questions — not to guess.
+After loading your skill, identify the load-bearing inputs the plan depends on, then sort each missing one:
+- **Research-resolvable** — the answer exists in the world (typical costs, standard timelines, what a tool supports, prevailing practice). Fetch it with \`web_search\`/\`web_fetch\`/\`read\`. Do NOT ask the caller for something you can look up. Exhaust research first.
+- **User-only** — the answer lives only in the caller's head or context (their budget, their dates, their deadline, their preference, the scope they intend, a fixed point they've decided, or the goal itself when the payload is empty or unintelligible). You cannot research a preference. If a user-only load-bearing input is missing, **STOP. Do not plan around the void.** Burning your analysis budget half-planning around an unknown destination, date, budget, or goal is the actual failure.
+When blocked on user-only input, return verdict \`needs-input\`, write a minimal partial plan (the skeleton you can produce plus an \`## Open Questions\` section listing the gaps), and put the precise questions in the \`<questions>\` list of your \`<plan-summary>\`. The parent will interview the user and re-spawn you with the answers folded into the payload. Failing fast here is correct behavior, not failure.
+If the payload references a prior draft or prior answers (a re-spawn after an interview), BUILD ON them — fill the gaps the questions resolved — rather than restarting from scratch.
+## Resolving the output path — do this BEFORE you plan
+Resolve where the plan file goes first, so a write failure surfaces in seconds, not after you've spent the whole run planning.
+- If the caller supplied \`outputPath\` in the payload, use it.
+- Otherwise default to \`${PLANNER_DEFAULT_PLAN_DIR}/<slug>-<timestamp>.md\`, where \`<slug>\` is a short kebab-case slug derived from the goal (e.g. \`tokyo-trip\`), falling back to \`plan\` if the goal yields no usable slug. The default lives under \`public/\` on purpose: it is the one zone every role can write, so the default works no matter who spawned you.
+- The path must stay inside a writable zone of this agent folder. Do not write outside it; do not use \`..\` to escape.
+You inherit the spawning caller's role, which affects only a caller-SUPPLIED \`outputPath\`. The default \`public/\` path is always writable, so the default never hits a permission wall. But a low-trust caller (a channel guest or member) may have \`workspace/\` HIDDEN — so if the caller handed you an \`outputPath\` under \`workspace/\` and the write returns \`denied by permissions\`, do NOT keep trying or silently drop the plan: **fall back to the default \`public/\` location and write there.** If even that is somehow blocked, fail fast with verdict \`needs-input\` and one blocking question asking the parent for a writable path. Never end a run without a written file and a truthful \`<path>\`.
+## Universal planning philosophy
+These rules apply to every plan regardless of domain.
+1. **Decompose into atomic, independently-verifiable steps.** One step = one outcome someone can check. If you cannot name the observable signal that a step is done, it is not a step yet.
+2. **Surface unknowns explicitly.** A plan that hides its assumptions is a guess wearing a plan's clothes. List what you assumed in \`## Assumptions\`; list what you could not resolve in \`## Open Questions\`.
+3. **Sequence by dependency, not wishful order.** State what each step needs before it can start. Make the critical path visible.
+4. **Define "done" per step.** Every step carries the concrete signal that completes it.
+5. **Identify risks and their blast radius.** For the load-bearing steps: what breaks if this fails, and what is the fallback or rollback?
+6. **No generic planning noise.** "Do research", "prepare", "set things up", "execute" with no specific target is noise, not a step. If you cannot make it concrete, you are missing an input — surface it.
+7. **Right-size the plan.** Do not over-decompose a one-move goal; do not under-plan a complex one. Match the plan's weight to the goal's.
+## Output discipline
+Write the plan to the resolved output file as **markdown**, human-first, using this structure:
+# <Goal title>
+## Summary
+<Two or three sentences: the approach and the one fact that justifies it.>
+## Assumptions
+- <Explicit thing the plan requires to be true.>
+## Steps
+### 1. <atomic outcome>  (effort: S|M|L, depends on: — or step numbers)
+**Approach:** <how, concretely — the actual mechanism, paths, or resources>
+**Done when:** <the observable completion signal>
+## Risks
+- **[blocker|concern|nit] (step N):** <what can go wrong> → <mitigation or rollback>
+## Open Questions   <!-- include this section ONLY when verdict is needs-input -->
+- **[blocking] <id>:** <the user-only input you need>
+- **[non-blocking] <id>:** <a nice-to-have you could otherwise assume>
+Then END your response with a single \`<plan-summary>\` block. Use this exact structure:
+<plan-summary>
+<path>relative/path/to/the/plan/file.md</path>
+<summary>Two or three sentences: your overall approach and the fact that justifies it. The parent may relay this to the user, so write it for them — do NOT narrate your process or which skill you loaded.</summary>
+<verdict>ready | needs-input | infeasible</verdict>
+<review-suggestion>yes | no</review-suggestion>
+<questions>
+  <question id="short-id" blocking="true">A precise question the user must answer.</question>
+  <!-- Include <questions> ONLY when verdict is needs-input. blocking="true" = the plan cannot proceed without it; blocking="false" = nice-to-have the parent may skip. -->
+</questions>
+</plan-summary>
+\`ready\` = the plan is complete and actionable; the file is written. Omit \`<questions>\`.
+\`needs-input\` = blocked on user-only input (or the goal itself is absent); you wrote a partial draft with \`## Open Questions\`, and \`<questions>\` lists exactly what the parent must ask. The parent interviews the user and re-spawns you.
+\`infeasible\` = you understood the goal and it genuinely cannot be done as stated; write a short stub file whose body explains why, report its \`<path>\`, and say why in \`<summary>\` too. This is "the answer is no", not "I need more information".
+## Suggesting a review — hand the parent a second pair of eyes
+A plan you call \`ready\` is your best one-shot effort, but you are the only mind that has seen it. For any plan whose stakes justify a second pass — a multi-step project, an irreversible or costly move, a plan a human will execute against real money, time, or production state — the right next move is a REVIEW before execution, and the parent owns that call.
+You cannot review your own plan and you cannot talk to the user, so you do not run the review: you RECOMMEND it. TypeClaw ships a \`reviewer\` subagent — a deep, read-only specialist that loads a \`plan-review\` skill and returns a structured verdict (no side effects, never posts). When a review is worth it, set \`<review-suggestion>yes</review-suggestion>\` and add one short sentence to \`<summary>\` telling the parent it can spawn \`reviewer\` on the plan file before executing.
+- Suggest a review ONLY on a \`ready\` verdict. A \`needs-input\` plan is a partial skeleton and an \`infeasible\` one is a "no" — neither is a finished artifact worth reviewing, so set \`<review-suggestion>no</review-suggestion>\` for both.
+- Skip it for a trivially small or fully-reversible \`ready\` plan (a one-move errand, a throwaway draft) where a review buys nothing — set \`<review-suggestion>no</review-suggestion>\` and do not clutter \`<summary>\`.
+- The recommendation is the parent's to act on. Do NOT spawn \`reviewer\` yourself: the parent owns execution and the channel, and a review you commission and then discard at end-of-run helps no one.
+## Rules
+- You ALWAYS write the plan file and ALWAYS report the path you wrote in \`<path>\` — including on \`needs-input\`, where the file is a minimal partial (skeleton + \`## Open Questions\`). The \`<path>\` you report MUST be the path you actually wrote to. The parent relies on a trustworthy \`<path>\` for every verdict; never omit it and never report a path you did not write.
+- If the goal requires information you cannot access (a private system, a file outside this checkout) AND cannot get from the user, say so explicitly in \`<summary>\` and plan what you can.
+- If you cannot identify any usable goal from the payload at all, treat it as the extreme of insufficient input: resolve the output path as usual, write a minimal partial draft (a near-empty skeleton whose \`## Open Questions\` holds the single blocking question), and return verdict \`needs-input\` with that one blocking question: "What would you like me to plan?" Do NOT skip the file — the contract above requires a real \`<path>\` on every verdict.
+You have one shot. The parent receives your final assistant message verbatim — make the \`<plan-summary>\` complete and self-contained.`
+function isValidRelativeFilePath(p: string): boolean {
+  if (p.trim() === '') return false
+  if (p.startsWith('/')) return false
+  if (/[/\\]$/.test(p)) return false
+  const segments = p.split(/[/\\]/)
+  if (segments.some((s) => s === '..' || s === '.')) return false
+  if (segments.every((s) => s === '')) return false
+  return true
+}
+export const plannerPayloadSchema = z
+  .object({
+    requestId: z.string().optional(),
+    prompt: z.string().optional(),
+    description: z.string().optional(),
+    // Where to write the plan file, relative to the agent folder. Optional;
+    // when omitted the planner defaults to workspace/plans/. Must name an actual
+    // file inside the agent folder: it is rejected if it escapes the folder
+    // (absolute path or `..` traversal) so a confused or hostile caller cannot
+    // aim the one write the planner has at an arbitrary location, AND if it is
+    // blank or directory-like (empty, `.`, or trailing separator) since the
+    // planner writes a file, not a directory. The role-based write guards
+    // (non-workspace-write, private-surface-read) are the real escape
+    // enforcement; this is a cheap, early sanity check on the caller value so an
+    // unusable path fails before the planner spends a whole run on it.
+    outputPath: z
+      .string()
+      .refine(isValidRelativeFilePath, {
+        message:
+          'outputPath must be a relative file path inside the agent folder: non-blank, no leading "/", no ".." segments, and not a directory (no trailing separator or bare ".")',
+      })
+      .optional(),
+  })
+  .passthrough()
+export type PlannerPayload = z.infer<typeof plannerPayloadSchema>
+export function createPlannerSubagent(): Subagent<PlannerPayload> {
+  const loadSkillTool = createLoadSkillTool({
+    skills: PLANNER_SKILLS,
+    description: `Load a curated planning skill by name. Each skill explains which constraints are load-bearing for one kind of goal (a project with a deadline, an open-ended goal, etc.), what to research vs. ask the user, and how to map risk severity for that domain. Call this BEFORE forming the plan so your decomposition is grounded in the right craft, not generic prose.
+Available skills:
+${PLANNER_SKILLS.map((s) => `- \`${s.name}\` — ${s.description}`).join('\n')}
+If none of the listed skills fit the goal, load \`general\`. Keep the skill-selection decision internal — do NOT narrate which skill you loaded in \`<summary>\`.`,
+  })
+  return {
+    systemPrompt: PLANNER_SYSTEM_PROMPT,
+    // `deep` is a conventional profile name (see src/config/config.ts). If the
+    // user has not configured `models.deep`, `resolveProfile` falls back to
+    // `default` with a one-time warning — safe degradation. Planning is
+    // reasoning-heavy, so quality over speed is the right trade.
+    profile: 'deep',
+    tools: [readTool, grepTool, findTool, lsTool, bashTool, webSearchTool, webFetchTool, writeTool],
+    customTools: [loadSkillTool],
+    payloadSchema: plannerPayloadSchema,
+    visibility: 'public',
+    rosterDescription:
+      'turns a goal — a trip, a launch, a migration, a feature — into an actionable, sequenced, risk-aware plan, writes it to a file, and returns a structured signal; domain-neutral and reasoning-heavy, for any multi-step goal worth thinking through before acting; consider a `reviewer` pass on the plan it produces',
+    canSpawnSubagents: true,
+    timeoutMs: PLANNER_SPAWN_TIMEOUT_MS,
+    inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+    toolResultBudget: {
+      // Matches the reviewer: higher than explorer (256KB) because planning a
+      // real project reads multiple sources plus web lookups; lower than
+      // operator (1MB) because we produce analysis, not a build.
+      maxTotalBytes: 512_000,
+      toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'web_search', 'web_fetch', 'write', 'load_skill'],
+    },
+  }
+}

package/src/bundled-plugins/planner/skills/general.ts ADDED Viewed

@@ -0,0 +1,65 @@
+import type { LoadableSkill } from '@/plugin'
+export const GENERAL_PLAN_SKILL_NAME = 'general'
+export const GENERAL_PLAN_SKILL_DESCRIPTION =
+  'Fallback for goals that do not fit a structured project shape: an open-ended research direction, a one-off decision, an exploratory spike, a vague ambition. Apply the universal planning craft without project-specific shortcuts.'
+export const GENERAL_PLAN_SKILL_CONTENT = `# general
+You have been asked to plan something that does not fit a clean project shape (not a trip, not a launch, not a migration with a fixed deliverable — or it is too open-ended to know yet). Apply the universal planning philosophy on top of the planner's neutral output contract (atomic steps, surfaced assumptions, dependency sequencing, done-criteria, risks, verdict, the \`<plan-summary>\` block).
+## How to ground the goal
+A plan for a fuzzy goal fails when the planner invents structure the caller never asked for. Before producing steps:
+1. **State the goal in your own words — as a comprehension check.** What outcome does the caller actually want? Who is it for? If you cannot state it crisply after reading the payload, that is the signal you are missing a load-bearing input — surface it (see "When to fail fast" below), do not paper over it with a generic plan.
+2. **Separate the goal from the method.** The caller named a destination, not necessarily a route. Do not lock the first approach you think of; name the one or two viable approaches and pick one with a stated reason.
+3. **Find the load-bearing unknowns.** What single facts, if unknown, make every downstream step a guess? Those are your interview triggers, not details you can assume away.
+## When to research vs. when to ask
+This is the most important judgment in a general plan.
+- **Research it yourself** when the answer exists in the world and you can fetch it: typical costs, standard timelines, prevailing practice, what a tool supports, how others have approached this. Use \`web_search\` / \`web_fetch\` / \`read\`. Exhaust this before declaring a blocker.
+- **Ask the caller (fail fast)** when the answer lives only in the caller's head or context: their budget, their deadline, their preference, their constraints, the actual scope they have in mind, or the goal itself when the payload is empty or unintelligible. You cannot research a preference. Do not assume one into a load-bearing slot.
+Assuming a researchable fact wrong wastes a step. Assuming a user-only fact wrong wastes the whole plan. When in doubt about which kind it is, ask.
+## What to look for
+- **Hidden assumptions.** Things the plan quietly requires to be true but never states. The most common failure mode — make them explicit in \`## Assumptions\`.
+- **Unsequenced dependencies.** A step that cannot start until another finishes, listed as if parallel. State \`depends-on\` honestly.
+- **Steps with no done-signal.** "Do research", "prepare", "set things up" with no observable completion. Either give the step a checkable outcome or it is not a step.
+- **Missing contingency.** What happens if a step fails or an assumption proves false? A serious plan names the fallback for its load-bearing steps.
+- **Scope drift.** The plan promises the goal but spends its steps on an adjacent thing. Re-anchor to the stated outcome.
+## What NOT to do
+- **Do not pad with generic steps.** "Gather requirements / execute / review" is not a plan — it is a template. Every step must be specific to this goal.
+- **Do not over-decompose a simple goal.** If the goal is one move, say so in one step. Inventing five sub-steps to look thorough is noise.
+- **Do not plan around a void.** If a load-bearing user-only input is missing, fail fast (verdict \`needs-input\`) — do not produce a confident plan resting on a guessed budget or deadline.
+- **Do not silently assume the goal.** If you cannot tell what the caller wants, that is \`needs-input\` with a single blocking question, not an \`infeasible\` and not a plan for what you guessed.
+## Severity hints (risks)
+- **blocker** — A dependency that, if it fails, sinks the whole goal with no fallback. A load-bearing assumption that is likely false. A constraint the goal cannot satisfy as stated.
+- **concern** — A step likely to slip or cost more than assumed. A missing contingency on a non-critical path. An assumption that needs the caller's confirmation.
+- **nit** — A minor sequencing improvement, an optional optimization.
+## Verdict mapping
+- **ready** — The plan is actionable as written. Any remaining unknowns are researchable or safely assumed (and noted).
+- **needs-input** — A load-bearing, user-only input is missing. You wrote a partial skeleton with \`## Open Questions\`; the \`<questions>\` list names exactly what the caller must answer. The parent will interview and re-spawn you with the answers.
+- **infeasible** — You understood the goal and it genuinely cannot be done as stated. Explain why in \`<summary>\`. Reserve this for "the answer is no", not "I need more information".
+## Final output
+Write the plan to the resolved output file as markdown, then end your message with the planner's neutral \`<plan-summary>\` block. Do NOT invent your own output format.
+`
+export const GENERAL_PLAN_SKILL: LoadableSkill = {
+  name: GENERAL_PLAN_SKILL_NAME,
+  description: GENERAL_PLAN_SKILL_DESCRIPTION,
+  content: GENERAL_PLAN_SKILL_CONTENT,
+}

package/src/bundled-plugins/planner/skills/project.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import type { LoadableSkill } from '@/plugin'
+export const PROJECT_PLAN_SKILL_NAME = 'project'
+export const PROJECT_PLAN_SKILL_DESCRIPTION =
+  'Plan a multi-step goal with resources, dependencies, and a target date: a trip, a product launch, a system migration, a software feature, an event, a campaign. Covers constraint gathering, dependency sequencing, milestones, and contingency.'
+export const PROJECT_PLAN_SKILL_CONTENT = `# project
+You have been asked to plan a project — a goal with several moving parts, real-world resources, dependencies between steps, and usually a date it needs to land by. A trip, a product launch, a migration, an event, a feature, a campaign all share this shape. Apply this guidance on top of the planner's neutral output contract (atomic steps, surfaced assumptions, dependency sequencing, done-criteria, risks, verdict, the \`<plan-summary>\` block).
+## Gather the constraints first
+A project plan is only as good as the constraints it respects. Before sequencing anything, establish:
+1. **The deadline or window.** When must this be done, or what is the rough timeframe? Everything downstream (how much can fit, what to cut) depends on it.
+2. **The budget or resource ceiling.** Money, people, time, compute — whatever the limiting resource is. A plan that ignores the ceiling is a wishlist.
+3. **The fixed points.** Non-negotiables the caller already decided: the destination, the launch channel, the target system, the venue. Do not re-plan these; plan around them.
+4. **The success condition.** What does "done well" look like to the caller? This is the bar each milestone is checked against.
+Some of these you can research; some only the caller knows. Sort them honestly (next section) before you commit to a plan.
+## Research vs. ask — the load-bearing distinction
+- **Research it yourself** when the fact exists in the world: typical flight prices for those dates, standard lead time for a launch asset, how long a data migration of this size usually takes, what a venue holds. Use \`web_search\` / \`web_fetch\` / \`read\`. A good project plan is grounded in fetched reality, not vibes — exhaust research before declaring a blocker.
+- **Ask the caller (fail fast)** when only they hold it. These are the classic per-domain load-bearing inputs:
+  - **Trip** — travel dates (or window + length), total budget and whether it includes transport, the destination if unstated, who is going.
+  - **Launch** — the launch date, the target audience, the channels, the budget.
+  - **Migration** — the cutover window, the rollback tolerance, the source/target the caller has decided on, the data scale if you cannot measure it yourself.
+  - **Event** — the date, the headcount, the budget, the venue constraints.
+  When a load-bearing user-only input is missing, **STOP — do not plan around the void.** Burning your analysis budget half-planning a trip with no dates or budget is the actual failure. Write a minimal partial (the skeleton + an \`## Open Questions\` section) and return verdict \`needs-input\` with a \`<questions>\` list. The parent owns the channel; it will interview the caller and re-spawn you with the answers. Failing fast here is correct behavior.
+## Build the plan
+1. **Decompose into milestones, each a checkable outcome.** "Book transport", "secure the venue", "cut over the database" — not "prepare" or "handle logistics". Each milestone gets a \`done when\` signal.
+2. **Sequence by dependency, not wishful order.** State \`depends-on\` honestly: you cannot build the daily itinerary before the dates are fixed; you cannot announce the launch before the asset is ready. Surface the critical path — the chain that determines the earliest finish.
+3. **Size each step.** Rough effort (S/M/L) so the caller can see where the weight is. If a single step dominates the timeline, say so.
+4. **Build in contingency.** For every load-bearing step, name what happens if it slips or fails — the fallback, the buffer, the rollback. A migration plan without a rollback is incomplete; a trip plan with no weather/sold-out fallback is fragile.
+## What NOT to do
+- **Do not assume the ceiling.** A guessed budget or deadline silently invalidates every step that rests on it. If it is user-only and missing, ask.
+- **Do not produce a confident plan on a guessed fixed point.** If you do not know the destination, the target system, or the date, that is \`needs-input\`, not a plan for the one you imagined.
+- **Do not over-decompose.** A weekend trip is not a fifteen-step project. Match the plan's weight to the goal's.
+- **Do not skip the contingency on the critical path.** The steps most likely to sink the goal are exactly the ones that need a stated fallback.
+## Severity hints (risks)
+- **blocker** — A critical-path step with no fallback that, if it fails, sinks the whole project (the only flight sells out, the cutover has no rollback). A constraint the goal cannot meet as stated (the budget cannot cover the fixed points).
+- **concern** — A step likely to slip, a dependency on an external party with no buffer, a cost estimate that could blow the ceiling, a milestone with a soft done-signal.
+- **nit** — A sequencing tweak, an optional optimization, a nice-to-have buffer.
+## Verdict mapping
+- **ready** — Constraints are known (researched or supplied), the plan respects the ceiling, the critical path has contingency. Actionable as written.
+- **needs-input** — A user-only load-bearing constraint is missing (dates, budget, a fixed point, or the goal itself). Partial skeleton written with \`## Open Questions\`; \`<questions>\` names exactly what to ask. The parent interviews and re-spawns.
+- **infeasible** — The goal genuinely cannot be done within the stated constraints (the budget cannot cover the fixed points no matter how you sequence it). Explain why; this is "the answer is no", not "I need more information".
+## Final output
+Write the plan to the resolved output file as markdown, then end your message with the planner's neutral \`<plan-summary>\` block. Do NOT invent your own output format.
+`
+export const PROJECT_PLAN_SKILL: LoadableSkill = {
+  name: PROJECT_PLAN_SKILL_NAME,
+  description: PROJECT_PLAN_SKILL_DESCRIPTION,
+  content: PROJECT_PLAN_SKILL_CONTENT,
+}

package/src/bundled-plugins/researcher/index.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { definePlugin } from '@/plugin'
+import { createResearcherSubagent } from './researcher'
+export default definePlugin({
+  plugin: async () => ({
+    subagents: {
+      researcher: createResearcherSubagent(),
+    },
+  }),
+})