npm - opencode-swarm - Versions diffs - 6.20.3 → 6.21.1 - Mend

opencode-swarm 6.20.3 → 6.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +82 -2
package/dist/background/plan-sync-worker.d.ts +5 -0
package/dist/cli/index.js +17 -4
package/dist/config/constants.d.ts +14 -0
package/dist/index.js +984 -373
package/dist/state.d.ts +42 -0
package/dist/tools/declare-scope.d.ts +49 -0
package/dist/tools/index.d.ts +2 -1
package/dist/tools/save-plan.d.ts +1 -0
package/dist/tools/tool-names.d.ts +1 -1
package/dist/tools/update-task-status.d.ts +15 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -370,6 +370,17 @@ Every agent runs inside a circuit breaker that kills runaway behavior before it
 Limits reset per task. A coder working on Task 2.3 is not penalized for tool calls made during Task 2.2.
+#### Architect Self-Coding Block
+If the architect writes files directly instead of delegating to the coder, a hard block fires:
+| Write count | Behavior |
+|:-----------:|----------|
+| 1–2 | Warning injected into next architect message |
+| ≥ 3 | `Error` thrown with `SELF_CODING_BLOCK` — identifies file paths written and count |
+The counter resets only when a coder delegation is dispatched. This is a hard enforcement — not advisory.
 Per-agent overrides:
 ```json
@@ -677,6 +688,24 @@ When truncation is active, a footer is appended:
 [output truncated to {maxLines} lines – use `tool_output.per_tool.<tool>` to adjust]
 ```
+## Summarization Settings
+Control how tool outputs are summarized for LLM context.
+```json
+{
+  "summaries": {
+    "threshold_bytes": 102400,
+    "exempt_tools": ["retrieve_summary", "task", "read"]
+  }
+}
+```
+- **threshold_bytes** – Output size threshold in bytes before summarization is triggered (default 102400 = 100KB).
+- **exempt_tools** – Tools whose outputs are never summarized. Defaults to `["retrieve_summary", "task", "read"]` to prevent re-summarization loops.
+> **Note:** The `retrieve_summary` tool supports paginated retrieval via `offset` and `limit` parameters to fetch large summarized outputs in chunks.
 ---
 ### Disabling Agents
@@ -706,7 +735,7 @@ When truncation is active, a footer is appended:
 | `/swarm evidence [task]` | Evidence bundles for a task or all tasks |
 | `/swarm archive [--dry-run]` | Archive old evidence with retention policy |
 | `/swarm benchmark` | Performance benchmarks |
-| `/swarm retrieve [id]` | Retrieve auto-summarized tool outputs |
+| `/swarm retrieve [id]` | Retrieve auto-summarized tool outputs (supports offset/limit pagination) |
 | `/swarm reset --confirm` | Clear swarm state files |
 | `/swarm preflight` | Run phase preflight checks |
 | `/swarm config doctor [--fix]` | Config validation with optional auto-fix |
@@ -727,7 +756,7 @@ Swarm limits which tools each agent can access based on their role. This prevent
 | Agent | Tools | Count | Rationale |
 |-------|-------|:---:|-----------|
-| **architect** | All 17 tools | 17 | Orchestrator needs full visibility |
+| **architect** | All 21 tools | 21 | Orchestrator needs full visibility |
 | **reviewer** | diff, imports, lint, pkg_audit, pre_check_batch, secretscan, symbols, complexity_hotspots, retrieve_summary, extract_code_blocks, test_runner | 11 | Security-focused QA |
 | **coder** | diff, imports, lint, symbols, extract_code_blocks, retrieve_summary | 6 | Write-focused, minimal read tools |
 | **test_engineer** | test_runner, diff, symbols, extract_code_blocks, retrieve_summary, imports, complexity_hotspots, pkg_audit | 8 | Testing and verification |
@@ -811,11 +840,62 @@ The following tools can be assigned to agents via overrides:
 | `todo_extract` | Extract TODO/FIXME comments |
 | `write_retro` | Document phase retrospectives via the phase_complete workflow; capture lessons learned |
 | `phase_complete` | Enforces phase completion, verifies required agents, logs events, resets state |
+| `declare_scope` | Pre-declare the file scope for the next coder delegation (architect-only); violations trigger warnings |
 ---
 ## Recent Changes
+### v6.21 — Gate Enforcement Hardening
+This release replaces soft advisory warnings with hard runtime blocks and adds structural compliance tooling for all model tiers.
+#### Phase 1 — P0 Bug Fixes: Hard Blocks Replace Soft Warnings
+- **`qaSkipCount` reset fixed**: The skip-detection counter in `delegation-gate.ts` now resets only when **both** reviewer **and** test_engineer have been seen since the last coder entry — not when either one runs alone.
+- **`update_task_status` reviewer gate check**: Accepting `status='completed'` now validates that the reviewer gate is present in the session's `gateLog` for the given task. Missing reviewer returns a structured error naming the absent gate.
+- **Architect self-coding hard block**: `architectWriteCount ≥ 3` now throws an `Error` with message `SELF_CODING_BLOCK` (previously a warning only). Counts 1–2 remain advisory warnings. Counter resets on coder delegation.
+#### Phase 2 — Per-Task State Machine
+Every task now has a tracked workflow state in the session:
+| State | Meaning |
+|-------|---------|
+| `idle` | Task not started |
+| `coder_delegated` | Coder has received the delegation |
+| `pre_check_passed` | Automated gates (lint, SAST, secrets, quality) passed |
+| `reviewer_run` | Reviewer agent has returned a verdict |
+| `tests_run` | Test engineer has completed (verification + adversarial) |
+| `complete` | `update_task_status` accepted the `completed` transition |
+Transitions are forward-only. `advanceTaskState()` throws `INVALID_TASK_STATE_TRANSITION` if an illegal jump is attempted. `getTaskState()` returns `'idle'` for unknown tasks.
+`session.lastGateOutcome` records the most recent gate result: `{ gate, taskId, passed, timestamp }`.
+#### Phase 3 — State Machine Integration
+- `update_task_status` now uses the state machine (not a raw `gateLog.has()` check): `status='completed'` is rejected unless the task is in `'tests_run'` or `'complete'` state.
+- `delegation-gate.ts` protocol-violation check additionally verifies that the prior task's state has advanced past `'coder_delegated'` before allowing a new coder delegation.
+#### Phase 4 — Context Engineering
+- **Progressive task disclosure**: When >5 tasks are visible in the last user message, `delegation-gate.ts` trims to the current task ± a context window. A `[Task window: showing N of M tasks]` comment marks the trim point.
+- **Deliberation preamble**: Each architect turn is prefixed with `[Last gate: {tool} {result} for task {taskId}]` sourced from `session.lastGateOutcome`, prompting the architect to identify the single next step.
+- **Low-capability model detection**: `LOW_CAPABILITY_MODELS` constant (matches substrings `mini`, `nano`, `small`, `free`) and `isLowCapabilityModel(modelId)` helper added to `constants.ts`.
+- **Behavioral guidance markers**: Three `<!-- BEHAVIORAL_GUIDANCE_START --> … <!-- BEHAVIORAL_GUIDANCE_END -->` pairs wrap the BATCHING DETECTION, ARCHITECT CODING BOUNDARIES, and QA gate behavioral sections in the architect prompt.
+- **Tier-based prompt trimming**: When `session.activeModel` matches `isLowCapabilityModel()`, the behavioral guidance blocks are stripped from the architect prompt and replaced with `[Enforcement: programmatic gates active]`. Programmatic enforcement substitutes for verbose prompt instructions on smaller models.
+#### Phase 5 — Structural Scope Declaration (`declare_scope`)
+New architect-only tool and supporting runtime enforcement:
+- **`declare_scope` tool**: Pre-declares which files the coder is allowed to modify for a given task. Input: `{ taskId, files, whitelist?, working_directory? }`. Validates task ID format, plan membership, and non-`complete` state. On success, sets `session.declaredCoderScope`. Architect-only.
+- **Automatic scope from FILE: directives**: When a coder delegation is detected, `delegation-gate.ts` extracts FILE: directive values and stores them as `session.declaredCoderScope` automatically — no explicit `declare_scope` call required.
+- **Scope containment tracking**: `guardrails.ts` appends every file the architect writes to `session.modifiedFilesThisCoderTask`. On coder delegation start, the list resets to `[]`.
+- **Violation detection**: After a coder task completes, `toolAfter` compares `modifiedFilesThisCoderTask` against `declaredCoderScope`. If >2 files are outside the declared scope, `session.lastScopeViolation` is set. The next architect message receives a scope violation warning.
+- **`isInDeclaredScope(filePath, scopeEntries)`**: Module-level helper using `path.resolve()` + `path.relative()` for proper directory containment (not string matching).
 ### v6.13.2 — Pipeline Enforcement
 This release adds enforcement-layer tooling and self-healing guardrails:

package/dist/background/plan-sync-worker.d.ts CHANGED Viewed

@@ -110,6 +110,11 @@ export declare class PlanSyncWorker {
      * to prevent callback errors from affecting worker stability
      */
     private safeCallback;
+    /**
+     * Advisory: check for unauthorized writes to plan.json outside of save_plan/savePlan
+     * Logs a warning if plan.json appears to have been modified after the write marker
+     */
+    private checkForUnauthorizedWrite;
     /**
      * Wrap a promise with a timeout
      */

package/dist/cli/index.js CHANGED Viewed

@@ -16231,6 +16231,17 @@ ${markdown}`;
       unlinkSync(mdTempPath);
     } catch {}
   }
+  try {
+    const markerPath = path6.join(swarmDir, ".plan-write-marker");
+    const tasksCount = validated.phases.reduce((sum, phase) => sum + phase.tasks.length, 0);
+    const marker = JSON.stringify({
+      source: "plan_manager",
+      timestamp: new Date().toISOString(),
+      phases_count: validated.phases.length,
+      tasks_count: tasksCount
+    });
+    await Bun.write(markerPath, marker);
+  } catch {}
 }
 function derivePlanMarkdown(plan) {
   const statusMap = {
@@ -16603,7 +16614,8 @@ var TOOL_NAMES = [
   "phase_complete",
   "save_plan",
   "update_task_status",
-  "write_retro"
+  "write_retro",
+  "declare_scope"
 ];
 var TOOL_NAME_SET = new Set(TOOL_NAMES);
@@ -16643,7 +16655,8 @@ var AGENT_TOOL_MAP = {
     "test_runner",
     "todo_extract",
     "update_task_status",
-    "write_retro"
+    "write_retro",
+    "declare_scope"
   ],
   explorer: [
     "complexity_hotspots",
@@ -16863,11 +16876,11 @@ var PhaseCompleteConfigSchema = exports_external.object({
 });
 var SummaryConfigSchema = exports_external.object({
   enabled: exports_external.boolean().default(true),
-  threshold_bytes: exports_external.number().min(1024).max(1048576).default(20480),
+  threshold_bytes: exports_external.number().min(1024).max(1048576).default(102400),
   max_summary_chars: exports_external.number().min(100).max(5000).default(1000),
   max_stored_bytes: exports_external.number().min(10240).max(104857600).default(10485760),
   retention_days: exports_external.number().min(1).max(365).default(7),
-  exempt_tools: exports_external.array(exports_external.string()).default(["retrieve_summary", "task"])
+  exempt_tools: exports_external.array(exports_external.string()).default(["retrieve_summary", "task", "read"])
 });
 var ReviewPassesConfigSchema = exports_external.object({
   always_security_review: exports_external.boolean().default(false),

package/dist/config/constants.d.ts CHANGED Viewed

@@ -21,3 +21,17 @@ export declare const DEFAULT_SCORING_CONFIG: ScoringConfig;
  * @returns The effective scoring configuration with all defaults applied
  */
 export declare function resolveScoringConfig(userConfig?: ScoringConfig): ScoringConfig;
+/**
+ * Model ID substrings that identify low-capability models.
+ * If a model's ID contains any of these substrings (case-insensitive),
+ * it is considered a low-capability model.
+ */
+export declare const LOW_CAPABILITY_MODELS: readonly ["mini", "nano", "small", "free"];
+/**
+ * Returns true if the given modelId contains any LOW_CAPABILITY_MODELS substring
+ * (case-insensitive comparison).
+ *
+ * @param modelId - The model ID to check
+ * @returns true if the model is considered low capability, false otherwise
+ */
+export declare function isLowCapabilityModel(modelId: string): boolean;