npm - opencode-swarm - Versions diffs - 6.14.11 → 6.15.0 - Mend

opencode-swarm 6.14.11 → 6.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +3 -0
package/dist/commands/analyze.d.ts +5 -0
package/dist/commands/clarify.d.ts +5 -0
package/dist/commands/index.d.ts +3 -0
package/dist/commands/specify.d.ts +5 -0
package/dist/config/schema.d.ts +14 -0
package/dist/hooks/context-budget.d.ts +3 -0
package/dist/hooks/index.d.ts +2 -0
package/dist/hooks/message-priority.d.ts +105 -0
package/dist/hooks/model-limits.d.ts +96 -0
package/dist/index.js +734 -14
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -546,6 +546,9 @@ When truncation is active, a footer is appended:
 | `/swarm preflight` | Run phase preflight checks |
 | `/swarm config doctor [--fix]` | Config validation with optional auto-fix |
 | `/swarm sync-plan` | Force plan.md regeneration from plan.json |
+| `/swarm specify [description]` | Generate or import a feature specification |
+| `/swarm clarify [topic]` | Clarify and refine an existing feature specification |
+| `/swarm analyze` | Analyze spec.md vs plan.md for requirement coverage gaps |
 </details>

package/dist/commands/analyze.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Handle /swarm analyze command.
+ * Returns a prompt that triggers the critic to enter MODE: ANALYZE.
+ */
+export declare function handleAnalyzeCommand(_directory: string, args: string[]): Promise<string>;

package/dist/commands/clarify.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Handle /swarm clarify command.
+ * Returns a prompt that triggers the architect to enter MODE: CLARIFY-SPEC.
+ */
+export declare function handleClarifyCommand(_directory: string, args: string[]): Promise<string>;

package/dist/commands/index.d.ts CHANGED Viewed

@@ -1,7 +1,9 @@
 import type { AgentDefinition } from '../agents';
 export { handleAgentsCommand } from './agents';
+export { handleAnalyzeCommand } from './analyze';
 export { handleArchiveCommand } from './archive';
 export { handleBenchmarkCommand } from './benchmark';
+export { handleClarifyCommand } from './clarify';
 export { handleConfigCommand } from './config';
 export { handleDiagnoseCommand } from './diagnose';
 export { handleDoctorCommand } from './doctor';
@@ -12,6 +14,7 @@ export { handlePlanCommand } from './plan';
 export { handlePreflightCommand } from './preflight';
 export { handleResetCommand } from './reset';
 export { handleRetrieveCommand } from './retrieve';
+export { handleSpecifyCommand } from './specify';
 export { handleStatusCommand } from './status';
 export { handleSyncPlanCommand } from './sync-plan';
 /**

package/dist/commands/specify.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Handle /swarm specify command.
+ * Returns a prompt that triggers the architect to enter MODE: SPECIFY.
+ */
+export declare function handleSpecifyCommand(_directory: string, args: string[]): Promise<string>;

package/dist/config/schema.d.ts CHANGED Viewed

@@ -99,6 +99,7 @@ export declare const ContextBudgetConfigSchema: z.ZodObject<{
     critical_threshold: z.ZodDefault<z.ZodNumber>;
     model_limits: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodNumber>>;
     max_injection_tokens: z.ZodDefault<z.ZodNumber>;
+    tracked_agents: z.ZodDefault<z.ZodArray<z.ZodString>>;
     scoring: z.ZodOptional<z.ZodObject<{
         enabled: z.ZodDefault<z.ZodBoolean>;
         max_candidates: z.ZodDefault<z.ZodNumber>;
@@ -126,6 +127,12 @@ export declare const ContextBudgetConfigSchema: z.ZodObject<{
             json: z.ZodDefault<z.ZodNumber>;
         }, z.core.$strip>>;
     }, z.core.$strip>>;
+    enforce: z.ZodDefault<z.ZodBoolean>;
+    prune_target: z.ZodDefault<z.ZodNumber>;
+    preserve_last_n_turns: z.ZodDefault<z.ZodNumber>;
+    recent_window: z.ZodDefault<z.ZodNumber>;
+    enforce_on_agent_switch: z.ZodDefault<z.ZodBoolean>;
+    tool_output_mask_threshold: z.ZodDefault<z.ZodNumber>;
 }, z.core.$strip>;
 export type ContextBudgetConfig = z.infer<typeof ContextBudgetConfigSchema>;
 export declare const EvidenceConfigSchema: z.ZodObject<{
@@ -451,6 +458,7 @@ export declare const PluginConfigSchema: z.ZodObject<{
         critical_threshold: z.ZodDefault<z.ZodNumber>;
         model_limits: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodNumber>>;
         max_injection_tokens: z.ZodDefault<z.ZodNumber>;
+        tracked_agents: z.ZodDefault<z.ZodArray<z.ZodString>>;
         scoring: z.ZodOptional<z.ZodObject<{
             enabled: z.ZodDefault<z.ZodBoolean>;
             max_candidates: z.ZodDefault<z.ZodNumber>;
@@ -478,6 +486,12 @@ export declare const PluginConfigSchema: z.ZodObject<{
                 json: z.ZodDefault<z.ZodNumber>;
             }, z.core.$strip>>;
         }, z.core.$strip>>;
+        enforce: z.ZodDefault<z.ZodBoolean>;
+        prune_target: z.ZodDefault<z.ZodNumber>;
+        preserve_last_n_turns: z.ZodDefault<z.ZodNumber>;
+        recent_window: z.ZodDefault<z.ZodNumber>;
+        enforce_on_agent_switch: z.ZodDefault<z.ZodBoolean>;
+        tool_output_mask_threshold: z.ZodDefault<z.ZodNumber>;
     }, z.core.$strip>>;
     guardrails: z.ZodOptional<z.ZodObject<{
         enabled: z.ZodDefault<z.ZodBoolean>;

package/dist/hooks/context-budget.d.ts CHANGED Viewed

@@ -10,6 +10,9 @@ interface MessageInfo {
     role: string;
     agent?: string;
     sessionID?: string;
+    modelID?: string;
+    providerID?: string;
+    [key: string]: unknown;
 }
 interface MessagePart {
     type: string;

package/dist/hooks/index.d.ts CHANGED Viewed

@@ -5,7 +5,9 @@ export { createDelegationGateHook } from './delegation-gate';
 export { createDelegationTrackerHook } from './delegation-tracker';
 export { extractCurrentPhase, extractCurrentPhaseFromPlan, extractCurrentTask, extractCurrentTaskFromPlan, extractDecisions, extractIncompleteTasks, extractIncompleteTasksFromPlan, extractPatterns, } from './extractors';
 export { createGuardrailsHooks } from './guardrails';
+export { classifyMessage, classifyMessages, containsPlanContent, isDuplicateToolRead, isStaleError, isToolResult, MessagePriority, type MessagePriorityType, type MessageWithParts, } from './message-priority';
 export { consolidateSystemMessages } from './messages-transform';
+export { extractModelInfo, NATIVE_MODEL_LIMITS, PROVIDER_CAPS, resolveModelLimit, } from './model-limits';
 export { createPhaseMonitorHook } from './phase-monitor';
 export { createPipelineTrackerHook } from './pipeline-tracker';
 export { createSystemEnhancerHook } from './system-enhancer';

package/dist/hooks/message-priority.d.ts ADDED Viewed

@@ -0,0 +1,105 @@
+/**
+ * Message Priority Classifier Hook
+ *
+ * Provides zero-cost message priority classification to enable intelligent
+ * context pruning. Messages are tagged with priority tiers (0-4) so that
+ * low-priority messages are removed first during context budget pressure.
+ *
+ * Priority tiers:
+ * - CRITICAL (0): System prompt, plan state, active instructions
+ * - HIGH (1): User messages, current task context, tool definitions
+ * - MEDIUM (2): Recent assistant responses, recent tool results
+ * - LOW (3): Old assistant responses, old tool results, confirmations
+ * - DISPOSABLE (4): Duplicate reads, superseded writes, stale errors
+ */
+/**
+ * Message priority tiers for context pruning decisions.
+ * Lower values = higher priority (kept longer during pruning).
+ */
+export declare const MessagePriority: {
+    /** System prompt, plan state, active instructions - never prune */
+    readonly CRITICAL: 0;
+    /** User messages, current task context, tool definitions */
+    readonly HIGH: 1;
+    /** Recent assistant responses, recent tool results (within recentWindowSize) */
+    readonly MEDIUM: 2;
+    /** Old assistant responses, old tool results */
+    readonly LOW: 3;
+    /** Duplicate reads, superseded writes, stale errors - prune first */
+    readonly DISPOSABLE: 4;
+};
+export type MessagePriorityType = (typeof MessagePriority)[keyof typeof MessagePriority];
+/** Message structure matching the format from context-budget.ts */
+interface MessageInfo {
+    role?: string;
+    agent?: string;
+    sessionID?: string;
+    modelID?: string;
+    providerID?: string;
+    toolName?: string;
+    toolArgs?: unknown;
+    [key: string]: unknown;
+}
+interface MessagePart {
+    type?: string;
+    text?: string;
+    [key: string]: unknown;
+}
+export interface MessageWithParts {
+    info?: MessageInfo;
+    parts?: MessagePart[];
+}
+/**
+ * Checks if text contains .swarm/plan or .swarm/context references
+ * indicating swarm state that should be preserved.
+ *
+ * @param text - The text content to check
+ * @returns true if the text contains plan/context references
+ */
+export declare function containsPlanContent(text: string): boolean;
+/**
+ * Checks if a message is a tool result (assistant message with tool call).
+ *
+ * @param message - The message to check
+ * @returns true if the message appears to be a tool result
+ */
+export declare function isToolResult(message: MessageWithParts): boolean;
+/**
+ * Checks if two consecutive tool read calls are duplicates
+ * (same tool with same first argument).
+ *
+ * @param current - The current message
+ * @param previous - The previous message
+ * @returns true if this is a duplicate tool read
+ */
+export declare function isDuplicateToolRead(current: MessageWithParts, previous: MessageWithParts): boolean;
+/**
+ * Checks if a message contains an error pattern and is stale
+ * (more than the specified number of turns old).
+ *
+ * @param text - The message text to check
+ * @param turnsAgo - How many turns ago the message was sent
+ * @returns true if the message is a stale error
+ */
+export declare function isStaleError(text: string, turnsAgo: number): boolean;
+/**
+ * Classifies a message by priority tier for intelligent pruning.
+ *
+ * @param message - The message to classify
+ * @param index - Position in messages array (0-indexed)
+ * @param totalMessages - Total number of messages
+ * @param recentWindowSize - Number of recent messages to consider MEDIUM (default 10)
+ * @returns Priority tier (0=CRITICAL, 1=HIGH, 2=MEDIUM, 3=LOW, 4=DISPOSABLE)
+ */
+export declare function classifyMessage(message: MessageWithParts, index: number, totalMessages: number, recentWindowSize?: number): MessagePriorityType;
+/**
+ * Classifies a batch of messages with duplicate detection.
+ * This function should be called in order (oldest to newest) to properly
+ * detect consecutive duplicate tool reads.
+ *
+ * @param messages - Array of messages to classify
+ * @param recentWindowSize - Number of recent messages to consider MEDIUM (default 10)
+ * @returns Array of priority classifications matching message order
+ */
+export declare function classifyMessages(messages: MessageWithParts[], recentWindowSize?: number): MessagePriorityType[];
+export {};

package/dist/hooks/model-limits.d.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * Provider-Aware Model Limit Resolution
+ *
+ * Resolves context window limits based on the model and provider platform.
+ * The same model has different context limits depending on the provider:
+ * - Claude Sonnet 4.6: 200k native, 128k on Copilot
+ * - GPT-5: 400k native, 128k on Copilot
+ * - Copilot caps ALL models at 128k prompt, regardless of native limit
+ */
+/**
+ * Native model context limits (in tokens) when used on their native platform.
+ */
+export declare const NATIVE_MODEL_LIMITS: Record<string, number>;
+/**
+ * Provider-specific context caps that override native limits.
+ * These are typically lower than native limits (e.g., Copilot caps at 128k).
+ */
+export declare const PROVIDER_CAPS: Record<string, number>;
+/**
+ * Message structure from experimental.chat.messages.transform hook.
+ */
+interface MessageInfo {
+    role: string;
+    agent?: string;
+    sessionID?: string;
+    modelID?: string;
+    providerID?: string;
+    [key: string]: unknown;
+}
+interface MessagePart {
+    type: string;
+    text?: string;
+    [key: string]: unknown;
+}
+interface MessageWithParts {
+    info: MessageInfo;
+    parts: MessagePart[];
+}
+/**
+ * Extracts modelID and providerID from the most recent assistant message.
+ *
+ * @param messages - Array of messages from experimental.chat.messages.transform hook
+ * @returns Object containing modelID and/or providerID if found
+ *
+ * @example
+ * const info = extractModelInfo(messages);
+ * // Returns: { modelID: 'claude-sonnet-4-6', providerID: 'anthropic' }
+ * // Or: {} if no assistant messages or fields not found
+ */
+export declare function extractModelInfo(messages: MessageWithParts[]): {
+    modelID?: string;
+    providerID?: string;
+};
+/**
+ * Resolves the context limit for a given model/provider combination.
+ *
+ * Resolution order (first match wins):
+ * 1. Check configOverrides["provider/model"] (e.g., "copilot/claude-sonnet-4-6": 200000)
+ * 2. Check configOverrides[modelID] (e.g., "claude-sonnet-4-6": 200000)
+ * 3. Check PROVIDER_CAPS[providerID] (e.g., copilot → 128000)
+ * 4. Check NATIVE_MODEL_LIMITS with prefix matching (e.g., "claude-sonnet-4" matches "claude-sonnet-4-6-20260301")
+ * 5. Check configOverrides.default
+ * 6. Fall back to 128000
+ *
+ * @param modelID - The model identifier (e.g., "claude-sonnet-4-6", "gpt-5")
+ * @param providerID - The provider identifier (e.g., "copilot", "anthropic")
+ * @param configOverrides - User configuration overrides
+ * @returns The resolved context limit in tokens
+ *
+ * @example
+ * // Provider cap (copilot)
+ * resolveModelLimit("claude-sonnet-4-6", "copilot", {})
+ * // Returns: 128000
+ *
+ * @example
+ * // Native limit (anthropic)
+ * resolveModelLimit("claude-sonnet-4-6", "anthropic", {})
+ * // Returns: 200000
+ *
+ * @example
+ * // Override beats cap
+ * resolveModelLimit("gpt-5", "copilot", { "copilot/gpt-5": 200000 })
+ * // Returns: 200000
+ *
+ * @example
+ * // Prefix match for model variants
+ * resolveModelLimit("claude-sonnet-4-6-20260301", "anthropic", {})
+ * // Returns: 200000
+ *
+ * @example
+ * // Full fallback
+ * resolveModelLimit(undefined, undefined, {})
+ * // Returns: 128000
+ */
+export declare function resolveModelLimit(modelID?: string, providerID?: string, configOverrides?: Record<string, number>): number;
+export {};

package/dist/index.js CHANGED Viewed

@@ -14341,6 +14341,12 @@ function validateSwarmPath(directory, filename) {
   if (/\.\.[/\\]/.test(filename)) {
     throw new Error("Invalid filename: path traversal detected");
   }
+  if (/^[A-Za-z]:[\\/]/.test(filename)) {
+    throw new Error("Invalid filename: path escapes .swarm directory");
+  }
+  if (filename.startsWith("/")) {
+    throw new Error("Invalid filename: path escapes .swarm directory");
+  }
   const baseDir = path2.normalize(path2.resolve(directory, ".swarm"));
   const resolved = path2.normalize(path2.resolve(baseDir, filename));
   if (process.platform === "win32") {
@@ -31794,7 +31800,14 @@ var ContextBudgetConfigSchema = exports_external.object({
   critical_threshold: exports_external.number().min(0).max(1).default(0.9),
   model_limits: exports_external.record(exports_external.string(), exports_external.number().min(1000)).default({ default: 128000 }),
   max_injection_tokens: exports_external.number().min(100).max(50000).default(4000),
-  scoring: ScoringConfigSchema.optional()
+  tracked_agents: exports_external.array(exports_external.string()).default(["architect"]),
+  scoring: ScoringConfigSchema.optional(),
+  enforce: exports_external.boolean().default(true),
+  prune_target: exports_external.number().min(0).max(1).default(0.7),
+  preserve_last_n_turns: exports_external.number().min(0).max(100).default(4),
+  recent_window: exports_external.number().min(1).max(100).default(10),
+  enforce_on_agent_switch: exports_external.boolean().default(true),
+  tool_output_mask_threshold: exports_external.number().min(100).max(1e5).default(2000)
 });
 var EvidenceConfigSchema = exports_external.object({
   enabled: exports_external.boolean().default(true),
@@ -32530,6 +32543,108 @@ OUTPUT: Code scaffold for src/pages/Settings.tsx with component tree, typed prop
 ## WORKFLOW
+### MODE DETECTION (Priority Order)
+Evaluate the user's request and context in this exact order \u2014 the FIRST matching rule wins:
+1. **RESUME** \u2014 \`.swarm/plan.md\` exists and contains incomplete (unchecked) tasks \u2192 Resume at current task.
+2. **SPECIFY** \u2014 User says "specify", "requirements", "write a spec", "define feature", or invokes \`/swarm specify\`; OR no \`.swarm/spec.md\` exists AND no \`.swarm/plan.md\` exists \u2192 Enter MODE: SPECIFY.
+3. **CLARIFY-SPEC** \u2014 \`.swarm/spec.md\` exists AND contains \`[NEEDS CLARIFICATION]\` markers; OR user explicitly asks to clarify or refine the spec; OR \`/swarm clarify\` is invoked \u2192 Enter MODE: CLARIFY-SPEC.
+4. **CLARIFY** \u2014 Request is ambiguous and cannot proceed without user input \u2192 Ask up to 3 questions.
+5. **DISCOVER** \u2014 Pre-planning codebase scan is needed \u2192 Delegate to \`{{AGENT_PREFIX}}explorer\`.
+6. All other modes (CONSULT, PLAN, CRITIC-GATE, EXECUTE, PHASE-WRAP) \u2014 Follow their respective sections below.
+PRIORITY RULES:
+- RESUME always wins \u2014 a user with an in-progress plan never accidentally triggers SPECIFY.
+- SPECIFY fires before DISCOVER when no spec exists, giving the architect a chance to capture requirements before generating code.
+- CLARIFY-SPEC fires between SPECIFY and CLARIFY; it only activates when no incomplete (unchecked) tasks exist in plan.md \u2014 RESUME takes priority if they do.
+- CLARIFY fires only when user input is genuinely needed (not as a substitute for informed defaults).
+### MODE: SPECIFY
+Activates when: user asks to "specify", "define requirements", "write a spec", or "define a feature"; OR \`/swarm specify\` is invoked; OR no \`.swarm/spec.md\` exists and no \`.swarm/plan.md\` exists.
+1. Check if \`.swarm/spec.md\` already exists.
+   - If YES: ask the user "A spec already exists. Do you want to overwrite it or refine it?"
+     - Overwrite \u2192 proceed to generation (step 2)
+     - Refine \u2192 delegate to MODE: CLARIFY-SPEC
+   - If NO: proceed to generation (step 2)
+2. Delegate to \`{{AGENT_PREFIX}}explorer\` to scan the codebase for relevant context (existing patterns, related code, affected areas).
+3. Delegate to \`{{AGENT_PREFIX}}sme\` for domain research on the feature area to surface known constraints, best practices, and integration concerns.
+4. Generate \`.swarm/spec.md\` capturing:
+   - Feature description: WHAT users need and WHY \u2014 never HOW to implement
+   - User scenarios with acceptance criteria (Given/When/Then format)
+   - Functional requirements numbered FR-001, FR-002\u2026 using MUST/SHOULD language
+   - Success criteria numbered SC-001, SC-002\u2026 \u2014 measurable and technology-agnostic
+   - Key entities if data is involved (no schema or field definitions \u2014 entity names only)
+   - Edge cases and known failure modes
+   - \`[NEEDS CLARIFICATION]\` markers (max 3) for items where uncertainty could change scope, security, or core behavior; prefer informed defaults over asking
+5. Write the spec to \`.swarm/spec.md\`.
+6. Report a summary to the user (requirement count, scenario count, clarification markers) and suggest the next step: \`CLARIFY-SPEC\` (if markers exist) or \`PLAN\`.
+SPEC CONTENT RULES \u2014 the spec MUST NOT contain:
+- Technology stack, framework choices, library names
+- File paths, API endpoint designs, database schema, code structure
+- Implementation details or "how to build" language
+- Any reference to specific tools, languages, or platforms
+Each functional requirement MUST be independently testable.
+Focus on WHAT users need and WHY \u2014 never HOW to implement.
+No technology stack, APIs, or code structure in the spec.
+Each requirement must be independently testable.
+Prefer informed defaults over asking the user \u2014 use \`[NEEDS CLARIFICATION]\` only when uncertainty could change scope, security, or core behavior.
+EXTERNAL PLAN IMPORT PATH \u2014 when the user provides an existing implementation plan (markdown content, pasted text, or a reference to a file):
+1. Read and parse the provided plan content.
+2. Reverse-engineer \`.swarm/spec.md\` from the plan:
+   - Derive FR-### functional requirements from task descriptions
+   - Derive SC-### success criteria from acceptance criteria in tasks
+   - Identify user scenarios from the plan's phase/feature groupings
+   - Surface implicit assumptions as \`[NEEDS CLARIFICATION]\` markers
+3. Validate the provided plan against swarm task format requirements:
+   - Every task should have FILE, TASK, CONSTRAINT, and ACCEPTANCE fields
+   - No task should touch more than 2 files
+   - No compound verbs in TASK lines ("implement X and add Y" = 2 tasks)
+   - Dependencies should be declared explicitly
+   - Phase structure should match \`.swarm/plan.md\` format
+4. Report gaps, format issues, and improvement suggestions to the user.
+5. Ask: "Should I also flesh out any areas that seem underspecified?"
+   - If yes: delegate to \`{{AGENT_PREFIX}}sme\` for targeted research on weak areas, then propose specific improvements.
+6. Output: both a \`.swarm/spec.md\` (extracted from the plan) and a validated version of the user's plan.
+EXTERNAL PLAN RULES:
+- Surface ALL changes as suggestions \u2014 do not silently rewrite the user's plan.
+- The user's plan is the starting point, not a draft to replace.
+- Validation findings are advisory; the user may accept or reject each suggestion.
+### MODE: CLARIFY-SPEC
+Activates when: \`.swarm/spec.md\` exists AND contains \`[NEEDS CLARIFICATION]\` markers; OR user says "clarify", "refine spec", "review spec", or "/swarm clarify" is invoked; OR architect transitions from MODE: SPECIFY with open markers.
+CONSTRAINT: CLARIFY-SPEC must NEVER create a spec. If \`.swarm/spec.md\` does not exist, tell the user: "No spec found. Use \`/swarm specify\` to generate one first." and stop.
+1. Read \`.swarm/spec.md\`.
+2. Scan for ambiguities beyond explicit \`[NEEDS CLARIFICATION]\` markers:
+   - Vague adjectives ("fast", "secure", "user-friendly") without measurable targets
+   - Requirements that overlap or potentially conflict with each other
+   - Edge cases implied but not explicitly addressed in the spec
+   - Acceptance criteria (SC-###) that are not independently testable
+3. Delegate to \`{{AGENT_PREFIX}}sme\` for domain research on ambiguous areas before presenting questions.
+4. Present questions to the user ONE AT A TIME (max 8 per session):
+   - Offer 2\u20134 multiple-choice options for each question
+   - Mark the recommended option with reasoning (e.g., "Recommended: Option 2 because\u2026")
+   - Allow free-form input as an alternative to the options
+5. After each accepted answer:
+   - Immediately update \`.swarm/spec.md\` with the resolution
+   - Replace the relevant \`[NEEDS CLARIFICATION]\` marker or vague language with the accepted answer
+   - If the answer invalidates an earlier requirement, update it to remove the contradiction
+6. Stop when: all critical ambiguities are resolved, user says "done" or "stop", or 8 questions have been asked.
+7. Report: total questions asked, sections updated, remaining open ambiguities (if any), and suggest next step (\`PLAN\` if spec is clear, or continue clarifying).
+CLARIFY-SPEC RULES:
+- One question at a time \u2014 never ask multiple questions in the same message.
+- Do not modify any part of the spec that was not affected by the accepted answer.
+- Always write the accepted answer back to spec.md before presenting the next question.
+- Max 8 questions per session \u2014 if limit reached, report remaining ambiguities and stop.
+- Do not create or overwrite the spec file \u2014 only refine what exists.
 ### MODE: RESUME
 If .swarm/plan.md exists:
   1. Read plan.md header for "Swarm:" field
@@ -32556,6 +32671,7 @@ For complex tasks, make a second explorer call focused on risk/gap analysis:
 After explorer returns:
 - Run \`symbols\` tool on key files identified by explorer to understand public API surfaces
 - Run \`complexity_hotspots\` if not already run in Phase 0 (check context.md for existing analysis). Note modules with recommendation "security_review" or "full_gates" in context.md.
+- Check for project governance files using the \`glob\` tool with patterns \`project-instructions.md\`, \`docs/project-instructions.md\`, and \`INSTRUCTIONS.md\` (checked in that priority order \u2014 first match wins). If a file is found: read it and extract all MUST (mandatory constraints) and SHOULD (recommended practices) rules. Write the extracted rules as a summary to \`.swarm/context.md\` under a \`## Project Governance\` section \u2014 append if the section already exists, create it if not. If no MUST or SHOULD rules are found in the file, skip writing. If no governance file is found: skip silently. Existing DISCOVER steps are unchanged.
 ### MODE: CONSULT
 Check .swarm/context.md for cached guidance first.
@@ -32597,6 +32713,20 @@ This briefing is a HARD REQUIREMENT for ALL phases. Skipping it is a process vio
 ### MODE: PLAN
+SPEC GATE (soft \u2014 check before planning):
+- If \`.swarm/spec.md\` does NOT exist:
+  - Warn: "No spec found. A spec helps ensure the plan covers all requirements and gives the critic something to verify against. Would you like to create one first?"
+  - Offer two options:
+    1. "Create a spec first" \u2192 transition to MODE: SPECIFY
+    2. "Skip and plan directly" \u2192 continue with the steps below unchanged
+- If \`.swarm/spec.md\` EXISTS:
+  - Read it and use it as the primary input for planning
+  - Cross-reference requirements (FR-###) when decomposing tasks
+  - Ensure every FR-### maps to at least one task
+  - If a task has no corresponding FR-###, flag it as a potential gold-plating risk
+This is a SOFT gate. When the user chooses "Skip and plan directly", proceed to the steps below exactly as before \u2014 do NOT modify any planning behavior.
 Use the \`save_plan\` tool to create the implementation plan. Required parameters:
 - \`title\`: The real project name from the spec (NOT a placeholder like [Project])
 - \`swarm_id\`: The swarm identifier (e.g. "mega", "local", "paid")
@@ -32823,7 +32953,8 @@ Use the evidence manager tool to write a bundle at \`retro-{N}\` (where N is the
 3. Update context.md
 4. Write retrospective evidence: record phase_number, total_tool_calls, coder_revisions, reviewer_rejections, test_failures, security_findings, integration_issues, task_count, task_complexity, top_rejection_reasons, lessons_learned to .swarm/evidence/ via the evidence manager. Reset Phase Metrics in context.md to 0.
 4.5. Run \`evidence_check\` to verify all completed tasks have required evidence (review + test). If gaps found, note in retrospective lessons_learned. Optionally run \`pkg_audit\` if dependencies were modified during this phase. Optionally run \`schema_drift\` if API routes were modified during this phase.
-5. Run \`sbom_generate\` with scope='changed' to capture post-implementation dependency snapshot (saved to .swarm/evidence/sbom/). This is a non-blocking step - always proceeds to summary.
+5. Run \`sbom_generate\` with scope='changed' to capture post-implementation dependency snapshot (saved to \`.swarm/evidence/sbom/\`). This is a non-blocking step - always proceeds to summary.
+5.5. If \`.swarm/spec.md\` exists: delegate {{AGENT_PREFIX}}critic with DRIFT-CHECK context \u2014 include phase number, list of completed task IDs and descriptions, and evidence path (\`.swarm/evidence/\`). If SIGNIFICANT DRIFT is returned: surface as a warning to the user before proceeding. If spec.md does not exist: skip silently.
 6. Summarize to user
 7. Ask: "Ready for Phase [N+1]?"
@@ -32969,6 +33100,7 @@ REVIEW CHECKLIST:
 - Risk: Are high-risk changes identified? Is there a rollback path?
 - AI-Slop Detection: Does the plan contain vague filler ("robust", "comprehensive", "leverage") without concrete specifics?
 - Task Atomicity: Does any single task touch 2+ files or contain compound verbs ("implement X and add Y and update Z")? Flag as MAJOR \u2014 oversized tasks blow coder's context and cause downstream gate failures. Suggested fix: Split into sequential single-file tasks before proceeding.
+- Governance Compliance (conditional): If \`.swarm/context.md\` contains a \`## Project Governance\` section, read the MUST and SHOULD rules and validate the plan against them. MUST rule violations are CRITICAL severity. SHOULD rule violations are recommendation-level (note them but do not block approval). If no \`## Project Governance\` section exists in context.md, skip this check silently.
 OUTPUT FORMAT:
 VERDICT: APPROVED | NEEDS_REVISION | REJECTED
@@ -32984,7 +33116,99 @@ RULES:
 - MINOR issues can be noted but don't block APPROVED
 - No code writing
 - Don't reject for style/formatting \u2014 focus on substance
-- If the plan is fundamentally sound with only minor concerns, APPROVE it`;
+- If the plan is fundamentally sound with only minor concerns, APPROVE it
+---
+### MODE: ANALYZE
+Activates when: user says "analyze", "check spec", "analyze spec vs plan", or \`/swarm analyze\` is invoked.
+Note: ANALYZE produces a coverage report \u2014 its verdict vocabulary is distinct from the plan review above.
+  CLEAN = all MUST FR-### have covering tasks; GAPS FOUND = one or more FR-### have no covering task; DRIFT DETECTED = spec\u2013plan terminology or scope divergence found.
+ANALYZE uses CRITICAL/HIGH/MEDIUM/LOW severity (not CRITICAL/MAJOR/MINOR used by plan review).
+INPUT: \`.swarm/spec.md\` (requirements) and \`.swarm/plan.md\` (tasks). If either file is missing, report which is absent and stop \u2014 do not attempt analysis with incomplete input.
+STEPS:
+1. Read \`.swarm/spec.md\`. Extract all FR-### functional requirements and SC-### success criteria.
+2. Read \`.swarm/plan.md\`. Extract all tasks with their IDs and descriptions.
+3. Map requirements to tasks:
+   - For each FR-###: find the task(s) whose description mentions or addresses it (semantic match, not exact phrase).
+   - Partial coverage counts: a task that partially addresses a requirement is counted as covering it.
+   - Build a two-column coverage table: FR-### \u2192 [task IDs that cover it].
+4. Flag GAPS \u2014 requirements with no covering task:
+   - FR-### with MUST language and no covering task: CRITICAL severity.
+   - FR-### with SHOULD language and no covering task: HIGH severity.
+   - SC-### with no covering task: HIGH severity (untestable success criteria = unverifiable requirement).
+5. Flag GOLD-PLATING \u2014 tasks with no corresponding requirement:
+   - Exclude: project setup, CI configuration, documentation, testing infrastructure.
+   - Tasks doing work not tied to any FR-### or SC-###: MEDIUM severity.
+6. Check terminology consistency: flag terms used differently across spec.md and plan.md (e.g., "user" vs "account" for the same entity): LOW severity.
+7. Validate task format compliance:
+   - Tasks missing FILE, TASK, CONSTRAINT, or ACCEPTANCE fields: LOW severity.
+   - Tasks with compound verbs: LOW severity.
+OUTPUT FORMAT:
+VERDICT: CLEAN | GAPS FOUND | DRIFT DETECTED
+COVERAGE TABLE: [FR-### | Covering Tasks \u2014 list up to top 10; if more than 10 items, show "showing 10 of N" and note total count]
+GAPS: [top 10 gaps with severity \u2014 if more than 10 items, show "showing 10 of N"]
+GOLD-PLATING: [top 10 gold-plating findings \u2014 if more than 10 items, show "showing 10 of N"]
+TERMINOLOGY DRIFT: [top 10 inconsistencies \u2014 if more than 10 items, show "showing 10 of N"]
+SUMMARY: [1-2 sentence overall assessment]
+ANALYZE RULES:
+- READ-ONLY: do not create, modify, or delete any file during analysis.
+- Report only \u2014 no plan edits, no spec edits.
+- Partial coverage counts as coverage (do not penalize partially addressed requirements).
+- Report the highest-severity findings first within each section.
+- If both spec.md and plan.md are present but empty, report CLEAN with a note that both files are empty.
+---
+### MODE: DRIFT-CHECK
+Activates when: Architect delegates critic with DRIFT-CHECK context after completing a phase.
+Note: ANALYZE detects spec-execution divergence after implementation \u2014 distinct from plan-review (APPROVED/NEEDS_REVISION/REJECTED) and ANALYZE (CLEAN/GAPS FOUND/DRIFT DETECTED).
+DRIFT-CHECK uses CRITICAL/HIGH/MEDIUM/LOW severity (not CRITICAL/MAJOR/MINOR used by plan review).
+SIGNIFICANT DRIFT verdict = at least one CRITICAL or HIGH finding.
+MINOR DRIFT verdict = only MEDIUM or LOW findings.
+CLEAN verdict = no findings.
+INPUT: Phase number (provided in TASK description as "DRIFT-CHECK phase N"). If not provided, ask the user for the phase number before proceeding.
+EDGE CASES:
+- spec.md is missing: report "spec.md is missing \u2014 DRIFT-CHECK requires a spec to compare against" and stop.
+- plan.md is missing: report "plan.md is missing \u2014 cannot identify completed tasks for this phase" and stop.
+- Evidence files are missing: note the absence in the report but proceed with available data.
+- Invalid phase number (no tasks found for that phase): report "no tasks found for phase N" and stop.
+STEPS:
+1. Read \`.swarm/spec.md\`. Extract all FR-### requirements relevant to the phase being checked.
+2. Read \`.swarm/plan.md\`. Extract all tasks marked complete ([x]) for the specified phase.
+3. Read evidence files in \`.swarm/evidence/\` for the phase (retrospective, review outputs, test outputs).
+4. For each completed task: compare what was implemented (from evidence) against the FR-### requirements it was supposed to address. Look for:
+   - Scope additions: task implemented more than the FR-### required.
+   - Scope omissions: task implemented less than the FR-### required.
+   - Assumption changes: task used a different approach that may affect other requirements.
+5. Classify each finding by severity:
+   - CRITICAL: core requirement not implemented, or implementation contradicts requirement.
+   - HIGH: significant scope addition or omission that affects other requirements.
+   - MEDIUM: minor scope difference unlikely to affect other requirements.
+   - LOW: stylistic or naming inconsistency between spec and implementation.
+6. Produce the full drift report in your response. The Architect will save it to \`.swarm/evidence/phase-{N}-drift.md\`.
+OUTPUT FORMAT:
+VERDICT: CLEAN | MINOR DRIFT | SIGNIFICANT DRIFT
+FINDINGS: [list findings with severity, task ID, FR-### reference, description]
+SUMMARY: [1-2 sentence assessment]
+DRIFT-CHECK RULES:
+- Advisory: DRIFT-CHECK does NOT block phase transitions. It surfaces information for the Architect and user.
+- READ-ONLY: do not create, modify, or delete any file.
+- Output the full report in your response \u2014 do not attempt to write files directly.
+- If no spec.md exists, stop immediately and report the missing file.
+- Do not modify the spec.md or plan.md based on findings.`;
 function createCriticAgent(model, customPrompt, customAppendPrompt) {
   let prompt = CRITIC_PROMPT;
   if (customPrompt) {
@@ -33383,7 +33607,17 @@ RULES:
 - Be specific: exact names, paths, parameters, versions
 - Be concise: under 1500 characters
 - Be actionable: info Coder can use directly
-- No code writing`;
+- No code writing
+RESEARCH CACHING:
+Before fetching any URL or performing external research, check \`.swarm/context.md\` for a \`## Research Sources\` section.
+- If \`.swarm/context.md\` does not exist or the \`## Research Sources\` section is absent: proceed with fresh research.
+- If the URL or topic is listed there: reuse the cached summary \u2014 do not fetch the URL again.
+- If not listed (cache miss): fetch the URL, produce your normal response, then append this line at the end of your response:
+  CACHE-UPDATE: \`[YYYY-MM-DD] [URL or topic]: [1-2 sentence summary]\`
+  The Architect will save this line to \`.swarm/context.md\` under \`## Research Sources\`.
+- Cache bypass: if the user explicitly requests fresh research ("re-fetch", "ignore cache", "latest"): skip the cache check and fetch directly; still include the CACHE-UPDATE line.
+- Do NOT write to any file \u2014 SME is read-only. Cache persistence is the Architect's responsibility.`;
 function createSMEAgent(model, customPrompt, customAppendPrompt) {
   let prompt = SME_PROMPT;
   if (customPrompt) {
@@ -34541,6 +34775,15 @@ function handleAgentsCommand(agents, guardrails) {
 `);
 }
+// src/commands/analyze.ts
+async function handleAnalyzeCommand(_directory, args2) {
+  const description = args2.join(" ").trim();
+  if (description) {
+    return `[MODE: ANALYZE] ${description}`;
+  }
+  return "[MODE: ANALYZE] Please analyze the spec against the plan using MODE: ANALYZE.";
+}
 // src/commands/archive.ts
 init_manager();
 async function handleArchiveCommand(directory, args2) {
@@ -35110,6 +35353,15 @@ async function handleBenchmarkCommand(directory, args2) {
 `);
 }
+// src/commands/clarify.ts
+async function handleClarifyCommand(_directory, args2) {
+  const description = args2.join(" ").trim();
+  if (description) {
+    return `[MODE: CLARIFY-SPEC] ${description}`;
+  }
+  return "[MODE: CLARIFY-SPEC] Please enter MODE: CLARIFY-SPEC and clarify the existing spec.";
+}
 // src/commands/config.ts
 import * as os2 from "os";
 import * as path8 from "path";
@@ -36007,6 +36259,15 @@ ${error93 instanceof Error ? error93.message : String(error93)}`;
   }
 }
+// src/commands/specify.ts
+async function handleSpecifyCommand(_directory, args2) {
+  const description = args2.join(" ").trim();
+  if (description) {
+    return `[MODE: SPECIFY] ${description}`;
+  }
+  return "[MODE: SPECIFY] Please enter MODE: SPECIFY and generate a spec for this project.";
+}
 // src/hooks/extractors.ts
 function extractCurrentPhase(planContent) {
   if (!planContent) {
@@ -36459,7 +36720,10 @@ var HELP_TEXT = [
   "- `/swarm benchmark [--cumulative] [--ci-gate]` \u2014 Show performance metrics",
   "- `/swarm export` \u2014 Export plan and context as JSON",
   "- `/swarm reset --confirm` \u2014 Clear swarm state files",
-  "- `/swarm retrieve <id>` \u2014 Retrieve full output from a summary"
+  "- `/swarm retrieve <id>` \u2014 Retrieve full output from a summary",
+  "- `/swarm clarify [topic]` \u2014 Clarify and refine an existing feature specification",
+  "- `/swarm analyze` \u2014 Analyze spec.md vs plan.md for requirement coverage gaps",
+  "- `/swarm specify [description]` \u2014 Generate or import a feature specification"
 ].join(`
 `);
 function createSwarmCommandHandler(directory, agents) {
@@ -36527,6 +36791,15 @@ function createSwarmCommandHandler(directory, agents) {
       case "retrieve":
         text = await handleRetrieveCommand(directory, args2);
         break;
+      case "clarify":
+        text = await handleClarifyCommand(directory, args2);
+        break;
+      case "analyze":
+        text = await handleAnalyzeCommand(directory, args2);
+        break;
+      case "specify":
+        text = await handleSpecifyCommand(directory, args2);
+        break;
       default:
         text = HELP_TEXT;
         break;
@@ -36698,8 +36971,232 @@ function createCompactionCustomizerHook(config3, directory) {
     })
   };
 }
+// src/hooks/context-budget.ts
+init_utils();
+// src/hooks/message-priority.ts
+var MessagePriority = {
+  CRITICAL: 0,
+  HIGH: 1,
+  MEDIUM: 2,
+  LOW: 3,
+  DISPOSABLE: 4
+};
+function containsPlanContent(text) {
+  if (!text)
+    return false;
+  const lowerText = text.toLowerCase();
+  return lowerText.includes(".swarm/plan") || lowerText.includes(".swarm/context") || lowerText.includes("swarm/plan.md") || lowerText.includes("swarm/context.md");
+}
+function isToolResult(message) {
+  if (!message?.info)
+    return false;
+  const role = message.info.role;
+  const toolName = message.info.toolName;
+  return role === "assistant" && !!toolName;
+}
+function isDuplicateToolRead(current, previous) {
+  if (!current?.info || !previous?.info)
+    return false;
+  const currentTool = current.info.toolName;
+  const previousTool = previous.info.toolName;
+  if (currentTool !== previousTool)
+    return false;
+  const isReadTool = currentTool?.toLowerCase().includes("read") && previousTool?.toLowerCase().includes("read");
+  if (!isReadTool)
+    return false;
+  const currentArgs = current.info.toolArgs;
+  const previousArgs = previous.info.toolArgs;
+  if (!currentArgs || !previousArgs)
+    return false;
+  const currentKeys = Object.keys(currentArgs);
+  const previousKeys = Object.keys(previousArgs);
+  if (currentKeys.length === 0 || previousKeys.length === 0)
+    return false;
+  const firstKey = currentKeys[0];
+  return currentArgs[firstKey] === previousArgs[firstKey];
+}
+function isStaleError(text, turnsAgo) {
+  if (!text)
+    return false;
+  if (turnsAgo <= 6)
+    return false;
+  const lowerText = text.toLowerCase();
+  const errorPatterns = [
+    "error:",
+    "failed to",
+    "could not",
+    "unable to",
+    "exception",
+    "errno",
+    "cannot read",
+    "not found",
+    "access denied",
+    "timeout"
+  ];
+  return errorPatterns.some((pattern) => lowerText.includes(pattern));
+}
+function extractMessageText(message) {
+  if (!message?.parts || message.parts.length === 0)
+    return "";
+  return message.parts.map((part) => part?.text || "").join("");
+}
+function classifyMessage(message, index, totalMessages, recentWindowSize = 10) {
+  const role = message?.info?.role;
+  const text = extractMessageText(message);
+  if (containsPlanContent(text)) {
+    return MessagePriority.CRITICAL;
+  }
+  if (role === "system") {
+    return MessagePriority.CRITICAL;
+  }
+  if (role === "user") {
+    return MessagePriority.HIGH;
+  }
+  if (isToolResult(message)) {
+    const positionFromEnd = totalMessages - 1 - index;
+    if (positionFromEnd < recentWindowSize) {
+      return MessagePriority.MEDIUM;
+    }
+    if (isStaleError(text, positionFromEnd)) {
+      return MessagePriority.DISPOSABLE;
+    }
+    return MessagePriority.LOW;
+  }
+  if (role === "assistant") {
+    const positionFromEnd = totalMessages - 1 - index;
+    if (positionFromEnd < recentWindowSize) {
+      return MessagePriority.MEDIUM;
+    }
+    if (isStaleError(text, positionFromEnd)) {
+      return MessagePriority.DISPOSABLE;
+    }
+    return MessagePriority.LOW;
+  }
+  return MessagePriority.LOW;
+}
+function classifyMessages(messages, recentWindowSize = 10) {
+  const results = [];
+  const totalMessages = messages.length;
+  for (let i2 = 0;i2 < messages.length; i2++) {
+    const message = messages[i2];
+    const priority = classifyMessage(message, i2, totalMessages, recentWindowSize);
+    if (i2 > 0) {
+      const current = messages[i2];
+      const previous = messages[i2 - 1];
+      if (isDuplicateToolRead(current, previous)) {
+        if (results[i2 - 1] >= MessagePriority.MEDIUM) {
+          results[i2 - 1] = MessagePriority.DISPOSABLE;
+        }
+      }
+    }
+    results.push(priority);
+  }
+  return results;
+}
+// src/hooks/model-limits.ts
+init_utils();
+var NATIVE_MODEL_LIMITS = {
+  "claude-sonnet-4": 200000,
+  "claude-opus-4": 200000,
+  "claude-haiku-4": 200000,
+  "gpt-5": 400000,
+  "gpt-5.1-codex": 400000,
+  "gpt-5.1": 264000,
+  "gpt-4.1": 1047576,
+  "gemini-2.5-pro": 1048576,
+  "gemini-2.5-flash": 1048576,
+  o3: 200000,
+  "o4-mini": 200000,
+  "deepseek-r1": 163840,
+  "deepseek-chat": 163840,
+  "qwen3.5": 131072
+};
+var PROVIDER_CAPS = {
+  copilot: 128000,
+  "github-copilot": 128000
+};
+function extractModelInfo(messages) {
+  if (!messages || messages.length === 0) {
+    return {};
+  }
+  for (let i2 = messages.length - 1;i2 >= 0; i2--) {
+    const message = messages[i2];
+    if (!message?.info)
+      continue;
+    if (message.info.role === "assistant") {
+      const modelID = message.info.modelID;
+      const providerID = message.info.providerID;
+      if (modelID || providerID) {
+        return {
+          ...modelID ? { modelID } : {},
+          ...providerID ? { providerID } : {}
+        };
+      }
+    }
+  }
+  return {};
+}
+var loggedFirstCalls = new Set;
+function resolveModelLimit(modelID, providerID, configOverrides = {}) {
+  const normalizedModelID = modelID ?? "";
+  const normalizedProviderID = providerID ?? "";
+  if (normalizedProviderID && normalizedModelID) {
+    const providerModelKey = `${normalizedProviderID}/${normalizedModelID}`;
+    if (configOverrides[providerModelKey] !== undefined) {
+      logFirstCall(normalizedModelID, normalizedProviderID, "override(provider/model)", configOverrides[providerModelKey]);
+      return configOverrides[providerModelKey];
+    }
+  }
+  if (normalizedModelID && configOverrides[normalizedModelID] !== undefined) {
+    logFirstCall(normalizedModelID, normalizedProviderID, "override(model)", configOverrides[normalizedModelID]);
+    return configOverrides[normalizedModelID];
+  }
+  if (normalizedProviderID && PROVIDER_CAPS[normalizedProviderID] !== undefined) {
+    const cap = PROVIDER_CAPS[normalizedProviderID];
+    logFirstCall(normalizedModelID, normalizedProviderID, "provider_cap", cap);
+    return cap;
+  }
+  if (normalizedModelID) {
+    const matchedLimit = findNativeLimit(normalizedModelID);
+    if (matchedLimit !== undefined) {
+      logFirstCall(normalizedModelID, normalizedProviderID, "native", matchedLimit);
+      return matchedLimit;
+    }
+  }
+  if (configOverrides.default !== undefined) {
+    logFirstCall(normalizedModelID, normalizedProviderID, "default_override", configOverrides.default);
+    return configOverrides.default;
+  }
+  logFirstCall(normalizedModelID, normalizedProviderID, "fallback", 128000);
+  return 128000;
+}
+function findNativeLimit(modelID) {
+  if (NATIVE_MODEL_LIMITS[modelID] !== undefined) {
+    return NATIVE_MODEL_LIMITS[modelID];
+  }
+  let bestMatch;
+  for (const key of Object.keys(NATIVE_MODEL_LIMITS)) {
+    if (modelID.startsWith(key)) {
+      if (!bestMatch || key.length > bestMatch.length) {
+        bestMatch = key;
+      }
+    }
+  }
+  return bestMatch ? NATIVE_MODEL_LIMITS[bestMatch] : undefined;
+}
+function logFirstCall(modelID, providerID, source, limit) {
+  const key = `${modelID || "unknown"}::${providerID || "unknown"}`;
+  if (!loggedFirstCalls.has(key)) {
+    loggedFirstCalls.add(key);
+    warn(`[model-limits] Resolved limit for ${modelID || "(no model)"}@${providerID || "(no provider)"}: ${limit} (source: ${source})`);
+  }
+}
 // src/hooks/context-budget.ts
 init_utils2();
+var lastSeenAgent;
 function createContextBudgetHandler(config3) {
   const enabled = config3.context_budget?.enabled !== false;
   if (!enabled) {
@@ -36707,14 +37204,19 @@ function createContextBudgetHandler(config3) {
   }
   const warnThreshold = config3.context_budget?.warn_threshold ?? 0.7;
   const criticalThreshold = config3.context_budget?.critical_threshold ?? 0.9;
-  const modelLimits = config3.context_budget?.model_limits ?? {
-    default: 128000
-  };
-  const modelLimit = modelLimits.default ?? 128000;
-  return async (_input, output) => {
+  const modelLimitsConfig = config3.context_budget?.model_limits ?? {};
+  const loggedLimits = new Set;
+  const handler = async (_input, output) => {
     const messages = output?.messages;
     if (!messages || messages.length === 0)
       return;
+    const { modelID, providerID } = extractModelInfo(messages);
+    const modelLimit = resolveModelLimit(modelID, providerID, modelLimitsConfig);
+    const cacheKey = `${modelID || "unknown"}::${providerID || "unknown"}`;
+    if (!loggedLimits.has(cacheKey)) {
+      loggedLimits.add(cacheKey);
+      warn(`[swarm] Context budget: model=${modelID || "unknown"} provider=${providerID || "unknown"} limit=${modelLimit}`);
+    }
     let totalTokens = 0;
     for (const message of messages) {
       if (!message?.parts)
@@ -36726,6 +37228,79 @@ function createContextBudgetHandler(config3) {
       }
     }
     const usagePercent = totalTokens / modelLimit;
+    let baseAgent;
+    for (let i2 = messages.length - 1;i2 >= 0; i2--) {
+      const msg = messages[i2];
+      if (msg?.info?.role === "user" && msg?.info?.agent) {
+        baseAgent = stripKnownSwarmPrefix(msg.info.agent);
+        break;
+      }
+    }
+    let ratio = usagePercent;
+    if (lastSeenAgent !== undefined && baseAgent !== undefined && baseAgent !== lastSeenAgent) {
+      const enforceOnSwitch = config3.context_budget?.enforce_on_agent_switch ?? true;
+      if (enforceOnSwitch && usagePercent > (config3.context_budget?.warn_threshold ?? 0.7)) {
+        warn(`[swarm] Agent switch detected: ${lastSeenAgent} \u2192 ${baseAgent}, enforcing context budget`, {
+          from: lastSeenAgent,
+          to: baseAgent
+        });
+        ratio = 1;
+      }
+    }
+    lastSeenAgent = baseAgent;
+    if (ratio >= criticalThreshold) {
+      const enforce = config3.context_budget?.enforce ?? true;
+      if (enforce) {
+        const targetTokens = modelLimit * (config3.context_budget?.prune_target ?? 0.7);
+        const recentWindow = config3.context_budget?.recent_window ?? 10;
+        const priorities = classifyMessages(output.messages || [], recentWindow);
+        const toolMaskThreshold = config3.context_budget?.tool_output_mask_threshold ?? 2000;
+        let toolMaskFreedTokens = 0;
+        const maskedIndices = new Set;
+        for (let i2 = 0;i2 < (output.messages || []).length; i2++) {
+          const msg = (output.messages || [])[i2];
+          if (shouldMaskToolOutput(msg, i2, (output.messages || []).length, recentWindow, toolMaskThreshold)) {
+            toolMaskFreedTokens += maskToolOutput(msg, toolMaskThreshold);
+            maskedIndices.add(i2);
+          }
+        }
+        if (toolMaskFreedTokens > 0) {
+          totalTokens -= toolMaskFreedTokens;
+          warn(`[swarm] Tool output masking: masked ${maskedIndices.size} tool results, freed ~${toolMaskFreedTokens} tokens`, {
+            maskedCount: maskedIndices.size,
+            freedTokens: toolMaskFreedTokens
+          });
+        }
+        const preserveLastNTurns = config3.context_budget?.preserve_last_n_turns ?? 4;
+        const removableMessages = identifyRemovableMessages(output.messages || [], priorities, preserveLastNTurns);
+        let freedTokens = 0;
+        const toRemove = new Set;
+        for (const idx of removableMessages) {
+          if (totalTokens - freedTokens <= targetTokens)
+            break;
+          toRemove.add(idx);
+          freedTokens += estimateTokens(extractMessageText2(output.messages[idx]));
+        }
+        const beforeTokens = totalTokens;
+        if (toRemove.size > 0) {
+          const actualFreedTokens = applyObservationMasking(output.messages || [], toRemove);
+          totalTokens -= actualFreedTokens;
+          warn(`[swarm] Context enforcement: pruned ${toRemove.size} messages, freed ${actualFreedTokens} tokens (${beforeTokens}\u2192${totalTokens} of ${modelLimit})`, {
+            pruned: toRemove.size,
+            freedTokens: actualFreedTokens,
+            before: beforeTokens,
+            after: totalTokens,
+            limit: modelLimit
+          });
+        } else if (removableMessages.length === 0 && totalTokens > targetTokens) {
+          warn(`[swarm] Context enforcement: no removable messages found but still ${totalTokens} tokens (target: ${targetTokens})`, {
+            currentTokens: totalTokens,
+            targetTokens,
+            limit: modelLimit
+          });
+        }
+      }
+    }
     let lastUserMessageIndex = -1;
     for (let i2 = messages.length - 1;i2 >= 0; i2--) {
       if (messages[i2]?.info?.role === "user") {
@@ -36738,8 +37313,10 @@ function createContextBudgetHandler(config3) {
     const lastUserMessage = messages[lastUserMessageIndex];
     if (!lastUserMessage?.parts)
       return;
-    const agent = lastUserMessage.info?.agent;
-    if (agent && agent !== "architect")
+    const trackedAgents = config3.context_budget?.tracked_agents ?? [
+      "architect"
+    ];
+    if (baseAgent && !trackedAgents.includes(baseAgent))
       return;
     const textPartIndex = lastUserMessage.parts.findIndex((p) => p?.type === "text" && p.text !== undefined);
     if (textPartIndex === -1)
@@ -36760,6 +37337,110 @@ function createContextBudgetHandler(config3) {
       lastUserMessage.parts[textPartIndex].text = `${warningText}${originalText}`;
     }
   };
+  return handler;
+}
+function identifyRemovableMessages(messages, priorities, preserveLastNTurns) {
+  let turnCount = 0;
+  const protectedIndices = new Set;
+  for (let i2 = messages.length - 1;i2 >= 0 && turnCount < preserveLastNTurns * 2; i2--) {
+    const role = messages[i2]?.info?.role;
+    if (role === "user" || role === "assistant") {
+      protectedIndices.add(i2);
+      if (role === "user")
+        turnCount++;
+    }
+  }
+  let lastUserIdx = -1;
+  let lastAssistantIdx = -1;
+  for (let i2 = messages.length - 1;i2 >= 0; i2--) {
+    const role = messages[i2]?.info?.role;
+    if (role === "user" && lastUserIdx === -1) {
+      lastUserIdx = i2;
+    }
+    if (role === "assistant" && lastAssistantIdx === -1) {
+      lastAssistantIdx = i2;
+    }
+    if (lastUserIdx !== -1 && lastAssistantIdx !== -1)
+      break;
+  }
+  if (lastUserIdx !== -1)
+    protectedIndices.add(lastUserIdx);
+  if (lastAssistantIdx !== -1)
+    protectedIndices.add(lastAssistantIdx);
+  const HIGH = MessagePriority.HIGH;
+  const MEDIUM = MessagePriority.MEDIUM;
+  const LOW = MessagePriority.LOW;
+  const DISPOSABLE = MessagePriority.DISPOSABLE;
+  const byPriority = [[], [], [], [], []];
+  for (let i2 = 0;i2 < priorities.length; i2++) {
+    const priority = priorities[i2];
+    if (!protectedIndices.has(i2) && priority > HIGH) {
+      byPriority[priority].push(i2);
+    }
+  }
+  return [...byPriority[DISPOSABLE], ...byPriority[LOW], ...byPriority[MEDIUM]];
+}
+function applyObservationMasking(messages, toRemove) {
+  let actualFreedTokens = 0;
+  for (const idx of toRemove) {
+    const msg = messages[idx];
+    if (msg?.parts) {
+      for (const part of msg.parts) {
+        if (part.type === "text" && part.text) {
+          const originalTokens = estimateTokens(part.text);
+          const placeholder = `[Context pruned \u2014 message from turn ${idx}, ~${originalTokens} tokens freed. Use retrieve_summary if needed.]`;
+          const maskedTokens = estimateTokens(placeholder);
+          part.text = placeholder;
+          actualFreedTokens += originalTokens - maskedTokens;
+        }
+      }
+    }
+  }
+  return actualFreedTokens;
+}
+function extractMessageText2(msg) {
+  if (!msg?.parts)
+    return "";
+  return msg.parts.filter((p) => p.type === "text" && p.text).map((p) => p.text).join(`
+`);
+}
+function extractToolName(text) {
+  const match = text.match(/^(read_file|write|edit|apply_patch|task|bun|npm|git|bash|glob|grep|mkdir|cp|mv|rm)\b/i);
+  return match?.[1];
+}
+function shouldMaskToolOutput(msg, index, totalMessages, recentWindowSize, threshold) {
+  if (!isToolResult(msg))
+    return false;
+  const text = extractMessageText2(msg);
+  if (text.includes("[Tool output masked") || text.includes("[Context pruned")) {
+    return false;
+  }
+  const toolName = extractToolName(text);
+  if (toolName && ["retrieve_summary", "task"].includes(toolName.toLowerCase())) {
+    return false;
+  }
+  const age = totalMessages - 1 - index;
+  return age > recentWindowSize || text.length > threshold;
+}
+function maskToolOutput(msg, threshold) {
+  if (!msg?.parts)
+    return 0;
+  let freedTokens = 0;
+  for (const part of msg.parts) {
+    if (part.type === "text" && part.text) {
+      if (part.text.includes("[Tool output masked") || part.text.includes("[Context pruned")) {
+        continue;
+      }
+      const originalTokens = estimateTokens(part.text);
+      const toolName = extractToolName(part.text) || "unknown";
+      const excerpt = part.text.substring(0, 200).replace(/\n/g, " ");
+      const placeholder = `[Tool output masked \u2014 ${toolName} returned ~${originalTokens} tokens. First 200 chars: "${excerpt}..." Use retrieve_summary if needed.]`;
+      const maskedTokens = estimateTokens(placeholder);
+      part.text = placeholder;
+      freedTokens += originalTokens - maskedTokens;
+    }
+  }
+  return freedTokens;
 }
 // src/hooks/delegation-gate.ts
 function extractTaskLine(text) {
@@ -36988,6 +37669,12 @@ function isSourceCodePath(filePath) {
   ];
   return !nonSourcePatterns.some((pattern) => pattern.test(normalized));
 }
+function hasTraversalSegments(filePath) {
+  if (!filePath)
+    return false;
+  const normalized = filePath.replace(/\\/g, "/");
+  return normalized.startsWith("..") || normalized.includes("/../") || normalized.endsWith("/..");
+}
 function isGateTool(toolName) {
   const normalized = toolName.replace(/^[^:]+[:.]/, "");
   const gateTools = [
@@ -37030,10 +37717,43 @@ function createGuardrailsHooks(config3) {
   const inputArgsByCallID = new Map;
   return {
     toolBefore: async (input, output) => {
-      if (isArchitect(input.sessionID) && isWriteTool(input.tool)) {
+      const currentSession = swarmState.agentSessions.get(input.sessionID);
+      if (currentSession?.delegationActive) {} else if (isArchitect(input.sessionID) && isWriteTool(input.tool)) {
         const args2 = output.args;
         const targetPath = args2?.filePath ?? args2?.path ?? args2?.file ?? args2?.target;
-        if (typeof targetPath === "string" && isOutsideSwarmDir(targetPath) && isSourceCodePath(targetPath)) {
+        if (!targetPath && (input.tool === "apply_patch" || input.tool === "patch")) {
+          const patchText = args2?.input ?? args2?.patch ?? (Array.isArray(args2?.cmd) ? args2.cmd[1] : undefined);
+          if (typeof patchText === "string") {
+            const patchPathPattern = /\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)/gi;
+            const diffPathPattern = /\+\+\+\s+b\/(.+)/gm;
+            const paths = new Set;
+            let match;
+            while ((match = patchPathPattern.exec(patchText)) !== null) {
+              paths.add(match[1].trim());
+            }
+            while ((match = diffPathPattern.exec(patchText)) !== null) {
+              const p = match[1].trim();
+              if (p !== "/dev/null")
+                paths.add(p);
+            }
+            for (const p of paths) {
+              if (isOutsideSwarmDir(p) && (isSourceCodePath(p) || hasTraversalSegments(p))) {
+                const session2 = swarmState.agentSessions.get(input.sessionID);
+                if (session2) {
+                  session2.architectWriteCount++;
+                  warn("Architect direct code edit detected via apply_patch", {
+                    tool: input.tool,
+                    sessionID: input.sessionID,
+                    targetPath: p,
+                    writeCount: session2.architectWriteCount
+                  });
+                }
+                break;
+              }
+            }
+          }
+        }
+        if (typeof targetPath === "string" && isOutsideSwarmDir(targetPath) && (isSourceCodePath(targetPath) || hasTraversalSegments(targetPath))) {
           const session2 = swarmState.agentSessions.get(input.sessionID);
           if (session2) {
             session2.architectWriteCount++;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "opencode-swarm",
-	"version": "6.14.11",
+	"version": "6.15.0",
 	"description": "Architect-centric agentic swarm plugin for OpenCode - hub-and-spoke orchestration with SME consultation, code generation, and QA review",
 	"main": "dist/index.js",
 	"types": "dist/index.d.ts",