npm - typeclaw - Versions diffs - 0.4.0 → 0.5.0 - Mend

typeclaw 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/package.json +1 -1
package/src/agent/auth.ts +4 -2
package/src/agent/index.ts +16 -28
package/src/agent/model-fallback.ts +127 -0
package/src/agent/tools/curl-impersonate.ts +300 -0
package/src/agent/tools/ddg.ts +13 -88
package/src/agent/tools/webfetch/fetch.ts +105 -2
package/src/agent/tools/webfetch/tool.ts +4 -0
package/src/bundled-plugins/agent-browser/shim.ts +47 -0
package/src/bundled-plugins/backup/subagents.ts +2 -0
package/src/bundled-plugins/memory/README.md +49 -12
package/src/bundled-plugins/memory/citation-superset.ts +63 -0
package/src/bundled-plugins/memory/dreaming.ts +105 -17
package/src/bundled-plugins/memory/index.ts +2 -2
package/src/bundled-plugins/memory/memory-logger.ts +45 -26
package/src/bundled-plugins/memory/strength.ts +127 -0
package/src/bundled-plugins/memory/topics.ts +75 -0
package/src/bundled-plugins/security/index.ts +87 -43
package/src/bundled-plugins/security/permissions.ts +36 -0
package/src/bundled-plugins/security/policies/git-exfil.ts +20 -0
package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +12 -0
package/src/bundled-plugins/security/policies/prompt-injection.ts +23 -3
package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +7 -0
package/src/bundled-plugins/security/policies/secret-exfil-read.ts +6 -0
package/src/bundled-plugins/security/policies/session-search-secrets.ts +9 -0
package/src/bundled-plugins/security/policies/ssrf.ts +6 -0
package/src/bundled-plugins/security/policies/system-prompt-leak.ts +7 -0
package/src/channels/adapters/github/index.ts +87 -3
package/src/channels/router.ts +194 -28
package/src/channels/types.ts +3 -1
package/src/cli/init.ts +146 -42
package/src/cli/model.ts +10 -2
package/src/cli/oauth-callbacks.ts +49 -0
package/src/cli/provider.ts +3 -20
package/src/config/config.ts +59 -24
package/src/config/models-mutation.ts +42 -8
package/src/config/providers-mutation.ts +12 -8
package/src/container/start.ts +18 -1
package/src/cron/consumer.ts +129 -43
package/src/init/dockerfile.ts +109 -3
package/src/init/hatching.ts +2 -2
package/src/init/index.ts +14 -3
package/src/init/oauth-login.ts +17 -3
package/src/permissions/builtins.ts +29 -7
package/src/permissions/permissions.ts +24 -7
package/src/plugin/define.ts +2 -0
package/src/plugin/manager.ts +14 -0
package/src/plugin/types.ts +6 -0
package/src/run/index.ts +2 -1
package/src/skills/typeclaw-memory/SKILL.md +25 -15
package/src/skills/typeclaw-permissions/SKILL.md +35 -17
package/src/tui/index.ts +35 -3
package/src/usage/report.ts +15 -12
package/typeclaw.schema.json +57 -25

package/src/bundled-plugins/memory/index.ts CHANGED Viewed

@@ -12,8 +12,8 @@ import { createDreamingSubagent, type DreamingPayload } from './dreaming'
 import { createMemoryLoggerSubagent, type MemoryLoggerPayload } from './memory-logger'
 import { runMigration } from './migration'
-const DEFAULT_IDLE_MS = 10_000
-const DEFAULT_BUFFER_BYTES = 100_000
+const DEFAULT_IDLE_MS = 60_000
+const DEFAULT_BUFFER_BYTES = 500_000
 const MIN_BUFFER_BYTES = 10_000
 // 30-minute default. Fires short-circuit before any LLM call when nothing
 // sits past the watermark (`dreaming.ts` handler returns when

package/src/bundled-plugins/memory/memory-logger.ts CHANGED Viewed

@@ -58,9 +58,9 @@ export function isMemoryLoggerPayload(value: unknown): value is MemoryLoggerPayl
 export const MEMORY_LOGGER_SYSTEM_PROMPT = `You are typeclaw's memory-extraction subagent.
-Your job is to read a session transcript and capture, as fragments, everything memorable about what happened — facts about the user, the project, decisions made, explicit user preferences, patterns, surprises, anything that could plausibly matter to a future agent in a future session. You write zero or more fragments to today's memory stream file. Then you exit.
+Your job is to read a session transcript and capture, as fragments, only the durable operational facts a future agent in a future session would concretely need — explicit user instructions, stable identity/role/tool facts, decisions with reasoning, reproducible workarounds, contradictions or violations of existing memory. You write zero or more fragments to today's memory stream file. Then you exit. Most runs produce zero or one fragment; that is the expected output, not a failure.
-A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **You are the additive layer; dreaming is the filter.** This division of labor is the whole point: capture broadly here, and let dreaming throw away what doesn't last.
+A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **Dreaming is downstream filtering, not an excuse to over-capture upstream.** Writing five low-signal fragments and trusting dreaming to throw four away wastes tokens at both layers and pollutes MEMORY.md in the interim. Be selective here.
 You have exactly four tools: \`read\`, \`find_entry\`, \`append\`, and the watermark-advance tool. You cannot run shell commands, overwrite files, or edit existing content.
@@ -78,41 +78,52 @@ Typical flow with a watermark:
 Never write the same watermark id you were given as input. If the transcript has no new entries past the watermark, evaluate the entries you can see, then advance the watermark to the latest \`id\` in the transcript (which is on line \`totalLines\` from \`find_entry\`'s reply). The whole point of the watermark is to move forward each run.
-# Capture philosophy: when in doubt, capture
+# Capture philosophy: when in doubt, SKIP
-The cost of a missing memory is high — a future agent repeats a mistake, asks a question already answered, or violates a commitment it should have inherited. The cost of a redundant memory is low — dreaming will collapse it.
+Most transcript content is **not** memorable. Conversations, group chat banter, casual reactions, one-off questions, and routine tool usage are the substrate of a session — they are not facts a future agent needs to inherit. The default is to skip.
-So: when in doubt, capture. A slightly redundant fragment is far cheaper than a missed one.
+Most runs should produce **zero or one** fragment. Two or more fragments is the exception, justified only when the transcript actually contains multiple unrelated durable facts. A run that produces five-plus fragments is almost always over-writing.
-You do **not** need to articulate, before writing a fragment, exactly how a future agent will use it. Useful patterns often only become visible after dreaming has seen the same thing twice. Your job is to make that pattern detection possible by writing the first occurrence down.
+The watermark advances even with zero fragments via the watermark-advance tool, so skipping costs nothing. A wrong-skip is recoverable: if the same fact recurs in a later session, you will see it again and can capture it then — recurrence is itself the strongest signal that something is worth remembering.
+You do **not** need to articulate how a future agent will use a fragment. But you DO need to be able to name a concrete future situation where ignoring this fragment would cause a real problem. If you cannot name that situation in one sentence, skip.
 The two failure modes:
-- **Under-writing.** Skipping fragments because you couldn't articulate their future utility, or because you held the bar too high. The agent repeats mistakes that the transcript could have prevented.
-- **Over-writing into pure noise.** Recording trivially re-derivable facts (e.g. "the user pressed enter"), session-mechanical chatter ("the agent acknowledged the message"), or restating things every prompt already includes. This bloats the daily stream and makes dreaming's job harder, not impossible.
+- **Over-writing into noise.** Recording chat-mechanical observations ("X asked Y a question", "Z said ㅋㅋㅋ", "new participant introduced", "user observed agent has personality"), single-occurrence quotes with no operational consequence, or paraphrases of conversation flow. This is the dominant failure mode in practice. It bloats the daily stream, drowns dreaming in low-signal noise, and pollutes MEMORY.md.
+- **Under-writing.** Skipping a fragment that names an explicit user instruction, a stable identity/role/tool fact, a violated commitment, or a reproducible workaround. Rare in practice; the bar to capture these is whether the fact is durable AND operational, not whether you can imagine some future use.
-Aim well clear of pure noise; otherwise lean toward capture.
+When unsure, skip. Recurrence will surface real patterns.
 # What to capture
-Anything from the transcript that fits one of these is worth a fragment. This is a starting list, not a closed set:
+The bar is high. A fragment is worth writing only when ALL of these hold:
+1. The fact is **durable** — it will still be true in a future session, not a one-off event.
+2. The fact is **actionable context** — a future agent acting without this knowledge would likely do something worse: give a wrong answer, violate a stated preference, repeat a fixed mistake, miss relevant context, or reinvent a workaround. Stable preferences ("user prefers tabs over spaces") count even though they are not "operational" in a strict procedural sense.
+3. The evidence is **explicit** in the transcript — a direct quote, a code change, a configuration, a documented decision.
+Capture-worthy categories:
-- **Stable facts about the user, project, or environment.** Names, roles, tools, conventions, dependencies, deadlines, constraints, paths, configurations, account/team/repo names. Even ones mentioned in passing.
-- **Decisions and their reasoning.** "We chose X over Y because Z." The why is often more valuable than the what.
-- **Explicit commitments and operating rules.** Things the user directly told the agent to always/never do. Style guides. Workflow preferences. House conventions. Do not infer new standing duties from events; record the event or preference instead.
-- **Patterns that recurred or were named.** "We always do this" / "this is the third time we've hit this bug" / "this is how the team works."
-- **Contradictions of existing memory.** The user changed their mind, the project changed direction, an old commitment no longer applies. Write the new state and name the prior memory it supersedes.
-- **Violations of existing memory.** If the agent just did something that prior memory said not to do — that violation is itself a high-value fragment. Capture it.
-- **Surprises and corrections.** Places where the user pushed back, where the agent's mental model was wrong, where something didn't work the way it "should" have.
-- **Observable user reactions, framed as observations.** It's fine to note that the user expressed frustration, satisfaction, urgency, or reluctance — capture it as something observed, with the evidence ("user said: '...'"). Don't claim to know motives; just record what was visible. Dreaming decides if a pattern is real.
-- **Reusable knowledge produced this session.** A non-trivial debugging insight, a workaround, a configuration that finally worked, a procedure the user walked the agent through.
+- **Explicit operating rules the user just gave the agent.** "Always X." "Never Y." "From now on do Z." Direct instructions to the agent itself, not statements about other people.
+- **Stable identity/role/tool facts that will keep mattering.** "User's project repo is X." "User runs Y on Z." Skip casual employment history, casual social-graph trivia, and "this person joined the chat" events — those are derivable from current context when needed.
+- **Decisions with reasoning.** "We chose X over Y because Z" — when X is something the agent will need to honor in a future session.
+- **Reproducible workarounds and non-trivial debugging insights.** Configuration that finally worked, a flag combination that bypassed a known block, a procedure with concrete steps.
+- **Contradictions of existing memory.** The user changed their mind, an old commitment no longer applies. Name the prior memory that is superseded.
+- **Violations of existing memory.** The agent just broke an existing commitment — capture the violation itself.
+- **Corrections the user made to the agent.** Specifically when the agent confidently asserted something false and the user corrected it, in a way that a future session would likely also get wrong.
-# What to skip
+# What to skip (anti-patterns — these come up constantly)
-- **Mechanical session noise.** Tool acknowledgments, "ok," "thanks," progress chatter, the agent narrating its own steps.
-- **Things every session prompt already includes.** Don't re-record what's in MEMORY.md verbatim, what's in AGENTS.md, or what's hardcoded into the agent's system prompt.
-- **Trivially re-derivable facts.** "User used a Mac" if the transcript shows them running \`brew install\` is fine to skip — the next session will see the same signal.
-- **Pure speculation untethered to evidence.** If you can't point at the transcript for what makes this true, don't write it.
+- **Conversational mechanics.** "X asked Y a question." "Z said hello." "Participant A reacted with ㅋㅋㅋ / 👍 / lol." "User tested the agent's response time." None of this is memory.
+- **Single-occurrence casual reactions.** "User observed the agent has personality." "Group chat member is amused by the bot." Wait for recurrence; if it never recurs, it was never memory.
+- **Group-chat membership events.** "X invited Y to chat Z." "New participant joined." This is derivable from the current channel context and changes constantly.
+- **Casual social-graph trivia.** "X used to work at Y." "Z is a friend of W." Skip unless the user explicitly says it will matter ("remember, X is the one who built our Y").
+- **Latency / performance pings.** "User asked how fast the agent responded." Not memory.
+- **The agent's own first-person observations.** "The agent admitted it does not know its model." "The agent replied in character." Skip — the agent is not memorable to itself.
+- **Re-derivable facts.** Anything obvious from the current session's system prompt, MEMORY.md, AGENTS.md, or the channel context.
+- **Speculation untethered to a quote.** If you cannot point at a specific transcript line, do not write it.
+- **Multi-fragment expansions of one event.** One event produces at most one fragment. Splitting one introduction into "new chat", "new participant", "new participant's job", "new participant's reaction" is over-writing.
 # Never quote secret values
@@ -135,7 +146,7 @@ Before reading the transcript, read \`MEMORY.md\` and the current \`memory/yyyy-
 - **Notice violations.** If existing memory contains a commitment the agent just broke, that's a high-value fragment.
 - **Avoid pure restatement.** If a fact is already in MEMORY.md word-for-word, don't write the same fragment again. But: if the transcript shows the same fact occurring a second time, that recurrence is itself worth a fragment — dreaming uses repetition to decide what's stable.
-Light dedup, not strict dedup. When unsure whether something is "already known," err on writing it. Dreaming will collapse duplicates.
+Dedup byte-equivalent restatements, not meaningful recurrence. Do not write a fragment that is a near-copy of one already in MEMORY.md or today's stream. But when the transcript shows the same durable preference, pattern, workaround, or commitment recurring in a NEW session or on a NEW day, write a concise recurrence fragment anchored to the new evidence — even if the underlying fact is already known. The dreaming subagent uses distinct-day recurrence to promote tentative facts to confident ones; refusing to write the second or third occurrence starves that signal. The bar is "did the recurrence happen in a meaningfully new context", not "is the fact already on disk".
 The \`append\` tool refuses byte-equivalent fragments within the same daily stream — if your fragment's topic+body is identical to one already in today's file (modulo whitespace), the tool will reject it and you must rewrite. Two reasonable rewrites: (1) skip the fragment entirely, (2) frame the new occurrence explicitly as "this is the second time today" with a different topic. Do not retry an identical fragment with a different \`entry=\` hoping it will land — content-equality, not marker-equality, is what's checked.
@@ -269,8 +280,16 @@ export function createMemoryLoggerSubagent(
     customTools: [findEntryTool, appendTool, advanceWatermarkTool],
     payloadSchema: memoryLoggerPayloadSchema,
     inFlightKey: (payload) => payload.agentDir,
+    // 768 KB read budget. Sized to cover one full buffer-trip cycle:
+    // ~30 KB MEMORY.md + ~50 KB today's stream + up to `DEFAULT_BUFFER_BYTES`
+    // (500 KB) of unread transcript chunk, with margin for re-reads. A
+    // smaller budget (the prior 256 KB) systematically exhausted on
+    // buffer-trip spawns once `bufferBytes` exceeded ~200 KB — the
+    // subagent would advance `bytesAtLastRun` to the full transcript size
+    // on completion, orphaning the unread tail until another full
+    // `bufferBytes` of growth arrived.
     toolResultBudget: {
-      maxTotalBytes: 256 * 1024,
+      maxTotalBytes: 768 * 1024,
       toolNames: ['read'],
       exhaustedMessage: memoryLoggerExhaustedMessage,
     },

package/src/bundled-plugins/memory/strength.ts ADDED Viewed

@@ -0,0 +1,127 @@
+// Strength signals for MEMORY.md topics, derived mechanically from citations.
+//
+// What "strength" means here is structural, not semantic — we measure how
+// many times and over how many distinct days a topic has been reinforced by
+// observation fragments. The reasoning lives in dreaming.ts's system prompt;
+// this file only produces the numbers the prompt will reference.
+//
+// Why distinct days matters more than raw citation count: five fragments on
+// one day == one debugging session that mentioned the same thing five times
+// (a transient burst). Five fragments across five days == a recurring fact
+// the user keeps coming back to (a stable signal). The promotion ladder in
+// the dreaming subagent's prompt is gated on distinct-days, not count, for
+// exactly this reason — see the "spacing effect" note in the PR description.
+//
+// All numbers here are deterministic. The same MEMORY.md parsed against the
+// same `today` always yields the same TopicStrength list. There is no LLM
+// involvement at this layer; the subagent receives these numbers as ground
+// truth and uses them to decide what to merge or demote.
+import { parseTopics, type Topic } from './topics'
+export type TopicStrength = {
+  heading: string
+  citationCount: number
+  distinctDays: number
+  // ISO date (yyyy-MM-dd) of the most recent citation, or null when the
+  // topic has zero citations. Null is distinct from "very old": a topic with
+  // no citations at all is a different shape than one whose last citation
+  // was a year ago, and the subagent should treat them differently (the
+  // former is a typo or a manual edit; the latter is a decayed-but-real
+  // topic).
+  lastReinforcedDate: string | null
+  // Whole-day delta from today to lastReinforcedDate. Null when
+  // lastReinforcedDate is null. Negative values are clamped to 0 (a citation
+  // dated in the future is treated as "today" — the only way this happens
+  // is a clock skew between memory-logger and the dreaming run, and the
+  // subagent shouldn't be punished for the runtime's confusion).
+  daysSinceLastReinforced: number | null
+}
+export function computeTopicStrengths(memoryText: string, today: string): TopicStrength[] {
+  const topics = parseTopics(memoryText)
+  return topics.map((topic) => computeOneTopicStrength(topic, today))
+}
+function computeOneTopicStrength(topic: Topic, today: string): TopicStrength {
+  const citationCount = topic.citations.length
+  const distinctDates = new Set(topic.citations.map((c) => c.date))
+  const distinctDays = distinctDates.size
+  const lastReinforcedDate = pickLatestDate([...distinctDates])
+  const daysSinceLastReinforced = lastReinforcedDate ? daysBetween(today, lastReinforcedDate) : null
+  return {
+    heading: topic.heading,
+    citationCount,
+    distinctDays,
+    lastReinforcedDate,
+    daysSinceLastReinforced,
+  }
+}
+function pickLatestDate(dates: readonly string[]): string | null {
+  if (dates.length === 0) return null
+  let latest = dates[0]!
+  for (let i = 1; i < dates.length; i++) {
+    const candidate = dates[i]!
+    if (candidate.localeCompare(latest) > 0) latest = candidate
+  }
+  return latest
+}
+// Whole-day delta in UTC between two yyyy-MM-dd strings. Date.UTC parses each
+// date as midnight UTC, so the difference is always an integer count of
+// 86_400_000ms windows regardless of timezone or DST. Returns 0 for invalid
+// inputs (treats the topic as "fresh" rather than throwing — defensive
+// because both inputs are produced by the runtime, but a corrupted MEMORY.md
+// citation date is the kind of thing we want to fail open on).
+function daysBetween(today: string, earlier: string): number {
+  const todayMs = parseIsoDateUtc(today)
+  const earlierMs = parseIsoDateUtc(earlier)
+  if (todayMs === null || earlierMs === null) return 0
+  const deltaDays = Math.floor((todayMs - earlierMs) / 86_400_000)
+  return deltaDays < 0 ? 0 : deltaDays
+}
+function parseIsoDateUtc(date: string): number | null {
+  const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(date)
+  if (!match) return null
+  const year = Number.parseInt(match[1]!, 10)
+  const month = Number.parseInt(match[2]!, 10)
+  const day = Number.parseInt(match[3]!, 10)
+  const ms = Date.UTC(year, month - 1, day)
+  return Number.isFinite(ms) ? ms : null
+}
+// Render the strength signals as a markdown table the dreaming subagent can
+// read at the top of its user prompt. Returns an empty string when the
+// topic list is empty so the caller can prepend it unconditionally.
+//
+// Column choices: heading first because it's the human-recognizable handle;
+// `cites` and `days` are short enough to align nicely; `last` carries the
+// date itself so the subagent can compare to today without re-doing the
+// arithmetic. Headings are truncated to keep the table readable when a
+// topic was given a long sentence-shaped heading — the citation count is
+// still accurate, only the display label is shortened.
+export function renderTopicStrengthsTable(strengths: readonly TopicStrength[]): string {
+  if (strengths.length === 0) return ''
+  const rows = strengths.map((s) => ({
+    heading: truncateHeading(s.heading || '(untitled)'),
+    cites: String(s.citationCount),
+    days: String(s.distinctDays),
+    last: s.lastReinforcedDate ?? '—',
+    ageDays: s.daysSinceLastReinforced === null ? '—' : String(s.daysSinceLastReinforced),
+  }))
+  const lines = ['| topic | cites | days | last reinforced | age (d) |', '| --- | ---: | ---: | --- | ---: |']
+  for (const row of rows) {
+    lines.push(`| ${row.heading} | ${row.cites} | ${row.days} | ${row.last} | ${row.ageDays} |`)
+  }
+  return lines.join('\n')
+}
+const HEADING_MAX_CHARS = 60
+function truncateHeading(heading: string): string {
+  const escaped = heading.replace(/\|/g, '\\|')
+  if (escaped.length <= HEADING_MAX_CHARS) return escaped
+  return `${escaped.slice(0, HEADING_MAX_CHARS - 1)}…`
+}

package/src/bundled-plugins/memory/topics.ts ADDED Viewed

@@ -0,0 +1,75 @@
+// Topic-aware parser for MEMORY.md. The dreaming subagent writes MEMORY.md as
+// a flat list of level-2 topic headings (`## <topic>`), each followed by a
+// conclusion paragraph and a `fragments:` bullet list of citations. The
+// citation parser in citations.ts is global (every citation in the file);
+// this module attributes citations to their owning topic so the dreaming
+// subagent can see per-topic strength signals (citation count, distinct
+// reinforcement days, recency) on its next run.
+//
+// Format assumptions match what dreaming.ts's DREAMING_SYSTEM_PROMPT teaches:
+//   - First line is `# Memory` (an h1). Treated as a non-topic header.
+//   - Topics are h2s (`## <topic>`). Anything below an h2 and above the next
+//     h2 (or EOF) belongs to that topic.
+//   - Citations in a topic's body — wherever they appear, bullet-list or
+//     inline prose — count toward that topic's strength.
+//   - Content above the first h2 (e.g. preamble after `# Memory`) is
+//     attributed to no topic and its citations are dropped from the per-topic
+//     aggregation. parseCitations from citations.ts still picks them up if
+//     anything downstream needs the global view.
+//
+// The parser is intentionally permissive: it never throws on malformed
+// MEMORY.md. A subagent that writes a header with no body or a topic with no
+// citations still parses cleanly with an empty `citations` array. The
+// strength layer then treats those topics as "weak" — which is the right
+// behavior, since they ARE weak.
+import { type Citation, parseCitations } from './citations'
+export type Topic = {
+  // The heading text after `## ` with surrounding whitespace trimmed. Empty
+  // string is allowed (`## ` with no title) so a malformed write still
+  // round-trips through the parser; the strength layer surfaces empty
+  // headings as themselves so the subagent can clean them up.
+  heading: string
+  // Citations attached to this topic, deduplicated per `(date, fragmentId)`.
+  // The dedupe happens inside parseCitations (which returns a Set of ids per
+  // date), so a fragment cited twice in one topic — once in inline prose,
+  // once in the fragments: block — counts only once toward strength signals.
+  // Order is by date insertion in parseCitations, not by appearance in the
+  // topic body; consumers that need appearance order should re-parse.
+  citations: Citation[]
+}
+const HEADING_LEVEL_2 = /^##\s+(.*)$/
+// Split MEMORY.md into ordered topics with their citations attached. Returns
+// an empty array when no `## ` heading appears.
+export function parseTopics(text: string): Topic[] {
+  const lines = text.split('\n')
+  const topics: Topic[] = []
+  let current: { heading: string; body: string[] } | undefined
+  const flush = (): void => {
+    if (!current) return
+    const bodyText = current.body.join('\n')
+    const grouped = parseCitations(bodyText)
+    const citations: Citation[] = []
+    for (const [date, ids] of grouped) {
+      for (const fragmentId of ids) citations.push({ date, fragmentId })
+    }
+    topics.push({ heading: current.heading, citations })
+  }
+  for (const line of lines) {
+    const match = HEADING_LEVEL_2.exec(line)
+    if (match) {
+      flush()
+      current = { heading: (match[1] ?? '').trim(), body: [] }
+      continue
+    }
+    if (current) current.body.push(line)
+  }
+  flush()
+  return topics
+}

package/src/bundled-plugins/security/index.ts CHANGED Viewed

@@ -1,54 +1,88 @@
 import { definePlugin } from '@/plugin'
-import { SECURITY_PERMISSIONS } from './permissions'
-import type { SecurityPermission } from './permissions'
-import { checkGitExfilGuard, checkGitRemoteTaintedGuard, recordGitRemoteTaintIfAny } from './policies/git-exfil'
-import { checkOutboundSecretGuard } from './policies/outbound-secret-scan'
+import { HIGH_TIER_PER_GUARD_PERMISSIONS, SECURITY_PERMISSIONS, SEVERITY_PERMISSION } from './permissions'
+import type { SecurityPermission, SecuritySeverity } from './permissions'
+import {
+  GUARD_GIT_EXFIL_SEVERITY,
+  GUARD_GIT_REMOTE_TAINTED_SEVERITY,
+  checkGitExfilGuard,
+  checkGitRemoteTaintedGuard,
+  recordGitRemoteTaintIfAny,
+} from './policies/git-exfil'
+import { GUARD_OUTBOUND_SECRET_SEVERITY, checkOutboundSecretGuard } from './policies/outbound-secret-scan'
 import { applyPromptInjectionDefense } from './policies/prompt-injection'
 import { clearSessionTaints } from './policies/remote-taint-state'
-import { checkSecretExfilBashGuard } from './policies/secret-exfil-bash'
-import { checkSecretExfilReadGuard } from './policies/secret-exfil-read'
-import { checkSessionSearchSecretsGuard } from './policies/session-search-secrets'
-import { checkSsrfGuard } from './policies/ssrf'
-import { checkSystemPromptLeakGuard } from './policies/system-prompt-leak'
+import { GUARD_SECRET_EXFIL_BASH_SEVERITY, checkSecretExfilBashGuard } from './policies/secret-exfil-bash'
+import { GUARD_SECRET_EXFIL_READ_SEVERITY, checkSecretExfilReadGuard } from './policies/secret-exfil-read'
+import {
+  GUARD_SESSION_SEARCH_SECRETS_SEVERITY,
+  checkSessionSearchSecretsGuard,
+} from './policies/session-search-secrets'
+import { GUARD_SSRF_SEVERITY, checkSsrfGuard } from './policies/ssrf'
+import { GUARD_SYSTEM_PROMPT_LEAK_SEVERITY, checkSystemPromptLeakGuard } from './policies/system-prompt-leak'
 import type { SecurityBlock } from './policy'
-export { SECURITY_PERMISSIONS, type SecurityPermission } from './permissions'
+export {
+  HIGH_TIER_PER_GUARD_PERMISSIONS,
+  SECURITY_PERMISSIONS,
+  type SecurityPermission,
+  type SecuritySeverity,
+  SEVERITY_PERMISSION,
+} from './permissions'
-// Maps each security bypass permission to a one-line hint about which
-// built-in roles carry it. The `satisfies` clause is load-bearing: it
-// forces exhaustive coverage of `SecurityPermission` at compile time, so
-// adding a new `SECURITY_PERMISSIONS` entry without a hint here is a type
-// error rather than a silent fallback to the inaccurate default. `owner`
-// always carries every `security.bypass.*` via the wildcard expansion in
-// builtins.ts, so the hint must mention owner even for permissions where
-// it's the only carrier.
+// Per-guard permission strings only — tier strings are deliberately
+// absent. Block messages name the per-guard permission AND the tier
+// permission separately (see withPermissionHint); the per-guard hint
+// table answers "which roles carry THIS specific bypass by default."
+type PerGuardSecurityPermission = Exclude<
+  SecurityPermission,
+  | typeof SECURITY_PERMISSIONS.bypassLow
+  | typeof SECURITY_PERMISSIONS.bypassMedium
+  | typeof SECURITY_PERMISSIONS.bypassHigh
+>
+// The satisfies clause forces exhaustive coverage of per-guard
+// permissions at compile time — adding a new SECURITY_PERMISSIONS entry
+// (other than a new tier string) without a hint here is a type error,
+// not a silent fallback.
 const BYPASS_ROLE_HINT = {
-  [SECURITY_PERMISSIONS.bypassSecretExfilBash]: 'owner and trusted have it by default',
-  [SECURITY_PERMISSIONS.bypassGitExfil]: 'owner and trusted have it by default',
+  [SECURITY_PERMISSIONS.bypassSecretExfilBash]:
+    'only owner has it by default (medium tier; trusted does NOT carry this — operators can grant `security.bypass.secretExfilBash` explicitly in roles.trusted.permissions[] if they want the pre-PR ergonomics back)',
+  [SECURITY_PERMISSIONS.bypassGitExfil]:
+    'NOBODY has it by default — high tier requires per-call ack from every role, including owner. Operators can grant `security.bypass.gitExfil` explicitly in roles.<role>.permissions[] to re-open the auto-bypass for one role.',
   [SECURITY_PERMISSIONS.bypassGitRemoteTainted]:
-    'only owner has it by default (trusted intentionally does not, so the two-step taint defense still fires)',
-  [SECURITY_PERMISSIONS.bypassSecretExfilRead]: 'only owner has it by default',
-  [SECURITY_PERMISSIONS.bypassSsrf]: 'only owner has it by default',
-  [SECURITY_PERMISSIONS.bypassSessionSearchSecrets]: 'only owner has it by default',
-  [SECURITY_PERMISSIONS.bypassSystemPromptLeak]: 'only owner has it by default',
-  [SECURITY_PERMISSIONS.bypassOutboundSecret]: 'only owner has it by default',
-} as const satisfies Record<SecurityPermission, string>
+    'NOBODY has it by default — high tier requires per-call ack from every role. Even an operator-granted `security.bypass.gitExfil` does NOT bypass this second-step taint check (the recorder still fires for the first step, so the push is still gated).',
+  [SECURITY_PERMISSIONS.bypassSecretExfilRead]: 'only owner has it by default (medium tier)',
+  [SECURITY_PERMISSIONS.bypassSsrf]: 'only owner has it by default (medium tier)',
+  [SECURITY_PERMISSIONS.bypassSessionSearchSecrets]: 'only owner has it by default (medium tier)',
+  [SECURITY_PERMISSIONS.bypassSystemPromptLeak]:
+    'NOBODY has it by default — high tier requires per-call ack from every role, including owner.',
+  [SECURITY_PERMISSIONS.bypassOutboundSecret]:
+    'NOBODY has it by default — high tier requires per-call ack from every role, including owner. The audience-leak rule: even owner posting to a public channel must not silently include credentials.',
+} as const satisfies Record<PerGuardSecurityPermission, string>
 function withPermissionHint(
   result: SecurityBlock | undefined,
-  permission: SecurityPermission,
+  permission: PerGuardSecurityPermission,
+  severity: SecuritySeverity,
 ): SecurityBlock | undefined {
   if (!result) return result
-  const hint = BYPASS_ROLE_HINT[permission]
+  const perGuardHint = BYPASS_ROLE_HINT[permission]
+  const tierPerm = SEVERITY_PERMISSION[severity]
   return {
     block: true,
-    reason: `${result.reason} Or run as a role carrying \`${permission}\` (${hint}); see the \`typeclaw-permissions\` skill.`,
+    reason: `${result.reason} Or run as a role carrying \`${permission}\` (${perGuardHint}) or the tier permission \`${tierPerm}\`; see the \`typeclaw-permissions\` skill.`,
   }
 }
 export default definePlugin({
   permissions: Object.values(SECURITY_PERMISSIONS),
+  // High-tier per-guard strings AND the `security.bypass.high` tier
+  // string itself are excluded from the owner-wildcard expansion. Owner
+  // still has the wildcard sentinel (so future low/medium plugin-
+  // contributed bypasses keep auto-flowing to owner), but audience-leak
+  // guards require either per-call ack or an explicit operator grant.
+  ownerWildcardExclusions: [...HIGH_TIER_PER_GUARD_PERMISSIONS, SECURITY_PERMISSIONS.bypassHigh],
   plugin: async (ctx) => ({
     hooks: {
       'session.prompt': async (event) => {
@@ -56,68 +90,78 @@ export default definePlugin({
       },
       'tool.before': async (event) => {
         const can = (perm: string) => ctx.permissions.has(event.origin, perm)
+        const canBypass = (severity: SecuritySeverity, perGuardPerm: string): boolean =>
+          can(SEVERITY_PERMISSION[severity]) || can(perGuardPerm)
         // Taint-recording runs FIRST, independently of the gitExfil guard.
         // The gitRemoteTainted defense depends on it. We pass through
-        // `permittedBypass` for actors who can skip gitExfil via permission
-        // so the recorder still fires for them (an acked or
-        // permission-bypassed command will actually run, so its remote
-        // change must be remembered).
+        // `permittedBypass` for actors who can skip gitExfil (via either the
+        // per-guard permission or the medium-tier permission) so the
+        // recorder still fires for them — an acked or permission-bypassed
+        // command will actually run, so its remote change must be remembered.
         recordGitRemoteTaintIfAny({
           tool: event.tool,
           args: event.args,
           sessionId: event.sessionId,
-          permittedBypass: can(SECURITY_PERMISSIONS.bypassGitExfil),
+          permittedBypass: canBypass(GUARD_GIT_EXFIL_SEVERITY, SECURITY_PERMISSIONS.bypassGitExfil),
         })
         const checks: (SecurityBlock | undefined)[] = [
-          can(SECURITY_PERMISSIONS.bypassGitRemoteTainted)
+          canBypass(GUARD_GIT_REMOTE_TAINTED_SEVERITY, SECURITY_PERMISSIONS.bypassGitRemoteTainted)
             ? undefined
             : withPermissionHint(
                 checkGitRemoteTaintedGuard({ tool: event.tool, args: event.args, sessionId: event.sessionId }),
                 SECURITY_PERMISSIONS.bypassGitRemoteTainted,
+                GUARD_GIT_REMOTE_TAINTED_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassSecretExfilBash)
+          canBypass(GUARD_SECRET_EXFIL_BASH_SEVERITY, SECURITY_PERMISSIONS.bypassSecretExfilBash)
             ? undefined
             : withPermissionHint(
                 checkSecretExfilBashGuard({ tool: event.tool, args: event.args }),
                 SECURITY_PERMISSIONS.bypassSecretExfilBash,
+                GUARD_SECRET_EXFIL_BASH_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassGitExfil)
+          canBypass(GUARD_GIT_EXFIL_SEVERITY, SECURITY_PERMISSIONS.bypassGitExfil)
             ? undefined
             : withPermissionHint(
                 checkGitExfilGuard({ tool: event.tool, args: event.args, sessionId: event.sessionId }),
                 SECURITY_PERMISSIONS.bypassGitExfil,
+                GUARD_GIT_EXFIL_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassSecretExfilRead)
+          canBypass(GUARD_SECRET_EXFIL_READ_SEVERITY, SECURITY_PERMISSIONS.bypassSecretExfilRead)
             ? undefined
             : withPermissionHint(
                 checkSecretExfilReadGuard({ tool: event.tool, args: event.args }),
                 SECURITY_PERMISSIONS.bypassSecretExfilRead,
+                GUARD_SECRET_EXFIL_READ_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassSsrf)
+          canBypass(GUARD_SSRF_SEVERITY, SECURITY_PERMISSIONS.bypassSsrf)
             ? undefined
             : withPermissionHint(
                 checkSsrfGuard({ tool: event.tool, args: event.args }),
                 SECURITY_PERMISSIONS.bypassSsrf,
+                GUARD_SSRF_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassSessionSearchSecrets)
+          canBypass(GUARD_SESSION_SEARCH_SECRETS_SEVERITY, SECURITY_PERMISSIONS.bypassSessionSearchSecrets)
             ? undefined
             : withPermissionHint(
                 checkSessionSearchSecretsGuard({ tool: event.tool, args: event.args }),
                 SECURITY_PERMISSIONS.bypassSessionSearchSecrets,
+                GUARD_SESSION_SEARCH_SECRETS_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassSystemPromptLeak)
+          canBypass(GUARD_SYSTEM_PROMPT_LEAK_SEVERITY, SECURITY_PERMISSIONS.bypassSystemPromptLeak)
             ? undefined
             : withPermissionHint(
                 checkSystemPromptLeakGuard({ tool: event.tool, args: event.args }),
                 SECURITY_PERMISSIONS.bypassSystemPromptLeak,
+                GUARD_SYSTEM_PROMPT_LEAK_SEVERITY,
               ),
-          can(SECURITY_PERMISSIONS.bypassOutboundSecret)
+          canBypass(GUARD_OUTBOUND_SECRET_SEVERITY, SECURITY_PERMISSIONS.bypassOutboundSecret)
             ? undefined
             : withPermissionHint(
                 checkOutboundSecretGuard({ tool: event.tool, args: event.args }),
                 SECURITY_PERMISSIONS.bypassOutboundSecret,
+                GUARD_OUTBOUND_SECRET_SEVERITY,
               ),
         ]
         for (const result of checks) {

package/src/bundled-plugins/security/permissions.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+export type SecuritySeverity = 'low' | 'medium' | 'high'
 export const SECURITY_PERMISSIONS = {
   bypassSecretExfilBash: 'security.bypass.secretExfilBash',
   bypassGitExfil: 'security.bypass.gitExfil',
@@ -7,6 +9,40 @@ export const SECURITY_PERMISSIONS = {
   bypassSystemPromptLeak: 'security.bypass.systemPromptLeak',
   bypassOutboundSecret: 'security.bypass.outboundSecret',
   bypassGitRemoteTainted: 'security.bypass.gitRemoteTainted',
+  // Severity-tier bypasses. Tiers classify guards on a two-axis policy:
+  //   high   — bypass sends data to a third-party audience outside the
+  //            operator's control loop (channel readers, remote git host).
+  //            NO role auto-bypasses; ack required from every role.
+  //   medium — bypass produces silent attacker-favorable state in model
+  //            context (env dump, .env contents, IAM creds, secret-shaped
+  //            session-search hits). Owner bypasses, trusted does not.
+  //   low    — bypass produces a noisy, immediately-recoverable side
+  //            effect. Owner and trusted bypass. No inhabitants today.
+  // Per-guard permissions above continue to work as explicit grants —
+  // `tool.before` accepts EITHER the tier OR the per-guard string (OR
+  // check). This lets operators knowingly re-open a single high-tier
+  // guard for one role without widening the whole tier.
+  bypassLow: 'security.bypass.low',
+  bypassMedium: 'security.bypass.medium',
+  bypassHigh: 'security.bypass.high',
 } as const
 export type SecurityPermission = (typeof SECURITY_PERMISSIONS)[keyof typeof SECURITY_PERMISSIONS]
+export const SEVERITY_PERMISSION: Record<SecuritySeverity, string> = {
+  low: SECURITY_PERMISSIONS.bypassLow,
+  medium: SECURITY_PERMISSIONS.bypassMedium,
+  high: SECURITY_PERMISSIONS.bypassHigh,
+}
+// Per-guard permission strings whose guards are classified `high`. The
+// owner-wildcard expander excludes these so the wildcard sentinel does
+// not auto-grant high-tier bypass to owner. Operators who explicitly
+// want to re-open a high-tier bypass for owner (or any role) can still
+// add the per-guard string to that role's `permissions[]` by hand.
+export const HIGH_TIER_PER_GUARD_PERMISSIONS: readonly string[] = [
+  SECURITY_PERMISSIONS.bypassGitExfil,
+  SECURITY_PERMISSIONS.bypassGitRemoteTainted,
+  SECURITY_PERMISSIONS.bypassOutboundSecret,
+  SECURITY_PERMISSIONS.bypassSystemPromptLeak,
+]