opencode-swarm 6.20.3 → 6.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -370,6 +370,17 @@ Every agent runs inside a circuit breaker that kills runaway behavior before it
370
370
 
371
371
  Limits reset per task. A coder working on Task 2.3 is not penalized for tool calls made during Task 2.2.
372
372
 
373
+ #### Architect Self-Coding Block
374
+
375
+ If the architect writes files directly instead of delegating to the coder, a hard block fires:
376
+
377
+ | Write count | Behavior |
378
+ |:-----------:|----------|
379
+ | 1–2 | Warning injected into next architect message |
380
+ | ≥ 3 | `Error` thrown with `SELF_CODING_BLOCK` — identifies file paths written and count |
381
+
382
+ The counter resets only when a coder delegation is dispatched. This is a hard enforcement — not advisory.
383
+
373
384
  Per-agent overrides:
374
385
 
375
386
  ```json
@@ -677,6 +688,24 @@ When truncation is active, a footer is appended:
677
688
  [output truncated to {maxLines} lines – use `tool_output.per_tool.<tool>` to adjust]
678
689
  ```
679
690
 
691
+ ## Summarization Settings
692
+
693
+ Control how tool outputs are summarized for LLM context.
694
+
695
+ ```json
696
+ {
697
+ "summaries": {
698
+ "threshold_bytes": 102400,
699
+ "exempt_tools": ["retrieve_summary", "task", "read"]
700
+ }
701
+ }
702
+ ```
703
+
704
+ - **threshold_bytes** – Output size threshold in bytes before summarization is triggered (default 102400 = 100KB).
705
+ - **exempt_tools** – Tools whose outputs are never summarized. Defaults to `["retrieve_summary", "task", "read"]` to prevent re-summarization loops.
706
+
707
+ > **Note:** The `retrieve_summary` tool supports paginated retrieval via `offset` and `limit` parameters to fetch large summarized outputs in chunks.
708
+
680
709
  ---
681
710
 
682
711
  ### Disabling Agents
@@ -706,7 +735,7 @@ When truncation is active, a footer is appended:
706
735
  | `/swarm evidence [task]` | Evidence bundles for a task or all tasks |
707
736
  | `/swarm archive [--dry-run]` | Archive old evidence with retention policy |
708
737
  | `/swarm benchmark` | Performance benchmarks |
709
- | `/swarm retrieve [id]` | Retrieve auto-summarized tool outputs |
738
+ | `/swarm retrieve [id]` | Retrieve auto-summarized tool outputs (supports offset/limit pagination) |
710
739
  | `/swarm reset --confirm` | Clear swarm state files |
711
740
  | `/swarm preflight` | Run phase preflight checks |
712
741
  | `/swarm config doctor [--fix]` | Config validation with optional auto-fix |
@@ -727,7 +756,7 @@ Swarm limits which tools each agent can access based on their role. This prevent
727
756
 
728
757
  | Agent | Tools | Count | Rationale |
729
758
  |-------|-------|:---:|-----------|
730
- | **architect** | All 17 tools | 17 | Orchestrator needs full visibility |
759
+ | **architect** | All 21 tools | 21 | Orchestrator needs full visibility |
731
760
  | **reviewer** | diff, imports, lint, pkg_audit, pre_check_batch, secretscan, symbols, complexity_hotspots, retrieve_summary, extract_code_blocks, test_runner | 11 | Security-focused QA |
732
761
  | **coder** | diff, imports, lint, symbols, extract_code_blocks, retrieve_summary | 6 | Write-focused, minimal read tools |
733
762
  | **test_engineer** | test_runner, diff, symbols, extract_code_blocks, retrieve_summary, imports, complexity_hotspots, pkg_audit | 8 | Testing and verification |
@@ -811,11 +840,62 @@ The following tools can be assigned to agents via overrides:
811
840
  | `todo_extract` | Extract TODO/FIXME comments |
812
841
  | `write_retro` | Document phase retrospectives via the phase_complete workflow; capture lessons learned |
813
842
  | `phase_complete` | Enforces phase completion, verifies required agents, logs events, resets state |
843
+ | `declare_scope` | Pre-declare the file scope for the next coder delegation (architect-only); violations trigger warnings |
814
844
 
815
845
  ---
816
846
 
817
847
  ## Recent Changes
818
848
 
849
+ ### v6.21 — Gate Enforcement Hardening
850
+
851
+ This release replaces soft advisory warnings with hard runtime blocks and adds structural compliance tooling for all model tiers.
852
+
853
+ #### Phase 1 — P0 Bug Fixes: Hard Blocks Replace Soft Warnings
854
+
855
+ - **`qaSkipCount` reset fixed**: The skip-detection counter in `delegation-gate.ts` now resets only when **both** reviewer **and** test_engineer have been seen since the last coder entry — not when either one runs alone.
856
+ - **`update_task_status` reviewer gate check**: Accepting `status='completed'` now validates that the reviewer gate is present in the session's `gateLog` for the given task. Missing reviewer returns a structured error naming the absent gate.
857
+ - **Architect self-coding hard block**: `architectWriteCount ≥ 3` now throws an `Error` with message `SELF_CODING_BLOCK` (previously a warning only). Counts 1–2 remain advisory warnings. Counter resets on coder delegation.
858
+
859
+ #### Phase 2 — Per-Task State Machine
860
+
861
+ Every task now has a tracked workflow state in the session:
862
+
863
+ | State | Meaning |
864
+ |-------|---------|
865
+ | `idle` | Task not started |
866
+ | `coder_delegated` | Coder has received the delegation |
867
+ | `pre_check_passed` | Automated gates (lint, SAST, secrets, quality) passed |
868
+ | `reviewer_run` | Reviewer agent has returned a verdict |
869
+ | `tests_run` | Test engineer has completed (verification + adversarial) |
870
+ | `complete` | `update_task_status` accepted the `completed` transition |
871
+
872
+ Transitions are forward-only. `advanceTaskState()` throws `INVALID_TASK_STATE_TRANSITION` if an illegal jump is attempted. `getTaskState()` returns `'idle'` for unknown tasks.
873
+
874
+ `session.lastGateOutcome` records the most recent gate result: `{ gate, taskId, passed, timestamp }`.
875
+
876
+ #### Phase 3 — State Machine Integration
877
+
878
+ - `update_task_status` now uses the state machine (not a raw `gateLog.has()` check): `status='completed'` is rejected unless the task is in `'tests_run'` or `'complete'` state.
879
+ - `delegation-gate.ts` protocol-violation check additionally verifies that the prior task's state has advanced past `'coder_delegated'` before allowing a new coder delegation.
880
+
881
+ #### Phase 4 — Context Engineering
882
+
883
+ - **Progressive task disclosure**: When >5 tasks are visible in the last user message, `delegation-gate.ts` trims to the current task ± a context window. A `[Task window: showing N of M tasks]` comment marks the trim point.
884
+ - **Deliberation preamble**: Each architect turn is prefixed with `[Last gate: {tool} {result} for task {taskId}]` sourced from `session.lastGateOutcome`, prompting the architect to identify the single next step.
885
+ - **Low-capability model detection**: `LOW_CAPABILITY_MODELS` constant (matches substrings `mini`, `nano`, `small`, `free`) and `isLowCapabilityModel(modelId)` helper added to `constants.ts`.
886
+ - **Behavioral guidance markers**: Three `<!-- BEHAVIORAL_GUIDANCE_START --> … <!-- BEHAVIORAL_GUIDANCE_END -->` pairs wrap the BATCHING DETECTION, ARCHITECT CODING BOUNDARIES, and QA gate behavioral sections in the architect prompt.
887
+ - **Tier-based prompt trimming**: When `session.activeModel` matches `isLowCapabilityModel()`, the behavioral guidance blocks are stripped from the architect prompt and replaced with `[Enforcement: programmatic gates active]`. Programmatic enforcement substitutes for verbose prompt instructions on smaller models.
888
+
889
+ #### Phase 5 — Structural Scope Declaration (`declare_scope`)
890
+
891
+ New architect-only tool and supporting runtime enforcement:
892
+
893
+ - **`declare_scope` tool**: Pre-declares which files the coder is allowed to modify for a given task. Input: `{ taskId, files, whitelist?, working_directory? }`. Validates task ID format, plan membership, and non-`complete` state. On success, sets `session.declaredCoderScope`. Architect-only.
894
+ - **Automatic scope from FILE: directives**: When a coder delegation is detected, `delegation-gate.ts` extracts FILE: directive values and stores them as `session.declaredCoderScope` automatically — no explicit `declare_scope` call required.
895
+ - **Scope containment tracking**: `guardrails.ts` appends every file the architect writes to `session.modifiedFilesThisCoderTask`. On coder delegation start, the list resets to `[]`.
896
+ - **Violation detection**: After a coder task completes, `toolAfter` compares `modifiedFilesThisCoderTask` against `declaredCoderScope`. If >2 files are outside the declared scope, `session.lastScopeViolation` is set. The next architect message receives a scope violation warning.
897
+ - **`isInDeclaredScope(filePath, scopeEntries)`**: Module-level helper using `path.resolve()` + `path.relative()` for proper directory containment (not string matching).
898
+
819
899
  ### v6.13.2 — Pipeline Enforcement
820
900
 
821
901
  This release adds enforcement-layer tooling and self-healing guardrails:
@@ -110,6 +110,11 @@ export declare class PlanSyncWorker {
110
110
  * to prevent callback errors from affecting worker stability
111
111
  */
112
112
  private safeCallback;
113
+ /**
114
+ * Advisory: check for unauthorized writes to plan.json outside of save_plan/savePlan
115
+ * Logs a warning if plan.json appears to have been modified after the write marker
116
+ */
117
+ private checkForUnauthorizedWrite;
113
118
  /**
114
119
  * Wrap a promise with a timeout
115
120
  */
package/dist/cli/index.js CHANGED
@@ -16231,6 +16231,17 @@ ${markdown}`;
16231
16231
  unlinkSync(mdTempPath);
16232
16232
  } catch {}
16233
16233
  }
16234
+ try {
16235
+ const markerPath = path6.join(swarmDir, ".plan-write-marker");
16236
+ const tasksCount = validated.phases.reduce((sum, phase) => sum + phase.tasks.length, 0);
16237
+ const marker = JSON.stringify({
16238
+ source: "plan_manager",
16239
+ timestamp: new Date().toISOString(),
16240
+ phases_count: validated.phases.length,
16241
+ tasks_count: tasksCount
16242
+ });
16243
+ await Bun.write(markerPath, marker);
16244
+ } catch {}
16234
16245
  }
16235
16246
  function derivePlanMarkdown(plan) {
16236
16247
  const statusMap = {
@@ -16603,7 +16614,8 @@ var TOOL_NAMES = [
16603
16614
  "phase_complete",
16604
16615
  "save_plan",
16605
16616
  "update_task_status",
16606
- "write_retro"
16617
+ "write_retro",
16618
+ "declare_scope"
16607
16619
  ];
16608
16620
  var TOOL_NAME_SET = new Set(TOOL_NAMES);
16609
16621
 
@@ -16643,7 +16655,8 @@ var AGENT_TOOL_MAP = {
16643
16655
  "test_runner",
16644
16656
  "todo_extract",
16645
16657
  "update_task_status",
16646
- "write_retro"
16658
+ "write_retro",
16659
+ "declare_scope"
16647
16660
  ],
16648
16661
  explorer: [
16649
16662
  "complexity_hotspots",
@@ -16863,11 +16876,11 @@ var PhaseCompleteConfigSchema = exports_external.object({
16863
16876
  });
16864
16877
  var SummaryConfigSchema = exports_external.object({
16865
16878
  enabled: exports_external.boolean().default(true),
16866
- threshold_bytes: exports_external.number().min(1024).max(1048576).default(20480),
16879
+ threshold_bytes: exports_external.number().min(1024).max(1048576).default(102400),
16867
16880
  max_summary_chars: exports_external.number().min(100).max(5000).default(1000),
16868
16881
  max_stored_bytes: exports_external.number().min(10240).max(104857600).default(10485760),
16869
16882
  retention_days: exports_external.number().min(1).max(365).default(7),
16870
- exempt_tools: exports_external.array(exports_external.string()).default(["retrieve_summary", "task"])
16883
+ exempt_tools: exports_external.array(exports_external.string()).default(["retrieve_summary", "task", "read"])
16871
16884
  });
16872
16885
  var ReviewPassesConfigSchema = exports_external.object({
16873
16886
  always_security_review: exports_external.boolean().default(false),
@@ -21,3 +21,17 @@ export declare const DEFAULT_SCORING_CONFIG: ScoringConfig;
21
21
  * @returns The effective scoring configuration with all defaults applied
22
22
  */
23
23
  export declare function resolveScoringConfig(userConfig?: ScoringConfig): ScoringConfig;
24
+ /**
25
+ * Model ID substrings that identify low-capability models.
26
+ * If a model's ID contains any of these substrings (case-insensitive),
27
+ * it is considered a low-capability model.
28
+ */
29
+ export declare const LOW_CAPABILITY_MODELS: readonly ["mini", "nano", "small", "free"];
30
+ /**
31
+ * Returns true if the given modelId contains any LOW_CAPABILITY_MODELS substring
32
+ * (case-insensitive comparison).
33
+ *
34
+ * @param modelId - The model ID to check
35
+ * @returns true if the model is considered low capability, false otherwise
36
+ */
37
+ export declare function isLowCapabilityModel(modelId: string): boolean;