npm - @jaguilar87/gaia - Versions diffs - 5.0.8 → 5.0.9 - Mend

@jaguilar87/gaia 5.0.8 → 5.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +11 -0
package/bin/README.md +6 -1
package/bin/cli/approvals.py +341 -238
package/bin/cli/brief.py +13 -0
package/bin/cli/doctor.py +1 -1
package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
package/dist/gaia-ops/hooks/adapters/claude_code.py +19 -85
package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +24 -1
package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
package/dist/gaia-ops/hooks/post_compact.py +1 -0
package/dist/gaia-ops/hooks/pre_compact.py +1 -0
package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +27 -7
package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +11 -6
package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -28
package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +10 -5
package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +20 -6
package/dist/gaia-ops/skills/subagent-request-approval/reference.md +23 -15
package/dist/gaia-ops/tools/migration/README.md +10 -12
package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
package/dist/gaia-security/hooks/adapters/claude_code.py +19 -85
package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +24 -1
package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
package/gaia/approvals/store.py +87 -9
package/gaia/store/schema.sql +38 -1
package/gaia/store/writer.py +400 -0
package/hooks/adapters/claude_code.py +19 -85
package/hooks/elicitation_result.py +20 -75
package/hooks/modules/context/context_injector.py +23 -7
package/hooks/modules/events/event_writer.py +63 -96
package/hooks/modules/security/__init__.py +0 -2
package/hooks/modules/security/approval_cleanup.py +238 -69
package/hooks/modules/security/approval_grants.py +506 -1103
package/hooks/modules/security/mutative_verbs.py +24 -1
package/hooks/modules/session/pending_scanner.py +150 -90
package/hooks/modules/session/session_manifest.py +257 -28
package/hooks/post_compact.py +1 -0
package/hooks/pre_compact.py +1 -0
package/hooks/user_prompt_submit.py +20 -0
package/package.json +1 -1
package/pyproject.toml +1 -1
package/scripts/bootstrap_database.sh +66 -17
package/scripts/migrations/README.md +26 -14
package/scripts/migrations/schema.checksum +2 -2
package/scripts/migrations/v18_to_v19.sql +36 -0
package/scripts/migrations/v19_to_v20.sql +20 -0
package/skills/agent-approval-protocol/SKILL.md +27 -7
package/skills/agent-approval-protocol/reference.md +11 -6
package/skills/gaia-patterns/reference.md +2 -2
package/skills/orchestrator-present-approval/SKILL.md +69 -28
package/skills/orchestrator-present-approval/reference.md +16 -3
package/skills/orchestrator-present-approval/template.md +10 -5
package/skills/pending-approvals/SKILL.md +16 -11
package/skills/subagent-request-approval/SKILL.md +20 -6
package/skills/subagent-request-approval/reference.md +23 -15
package/tools/migration/README.md +10 -12
package/tools/scan/orchestrator.py +194 -10
package/tools/scan/tests/test_integration.py +1 -2
package/bin/cli/plans.py +0 -517
package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
package/dist/gaia-ops/tools/scan/merge.py +0 -213
package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
package/tools/context/deep_merge.py +0 -159
package/tools/migration/migrate_04_harness_events.py +0 -132
package/tools/migration/migrate_04_harness_events.sh +0 -23
package/tools/scan/merge.py +0 -213
package/tools/scan/tests/test_merge.py +0 -269

package/skills/pending-approvals/SKILL.md CHANGED Viewed

@@ -37,7 +37,7 @@ report "rejected" when nothing actually changed.
 | `gaia approvals list` | DB grants + filesystem pendings | `cmd_list` (mixed) |
 | `gaia approvals reject NONCE` | filesystem only | `reject_pending` in `hooks/modules/security/approval_grants.py` |
 | `gaia approvals reject-all` | filesystem only | loops `reject_pending` |
-| `gaia approvals clean` | filesystem only | `cleanup_expired_grants` |
+| `gaia approvals clean` | DB (cross-session stale pendings) + filesystem | `cmd_clean` in `bin/cli/approvals.py`: calls `store.list_pending(all_sessions=True)`, transitions every pending older than `DEFAULT_PENDING_TTL_MINUTES` (24 h) to `revoked` via `store.revoke()`, then calls `cleanup_expired_grants` for filesystem files |
 The practical consequence: `revoke` is the DB-aware single-id verb; `reject` and
 `reject-all` only touch the legacy filesystem queue. If you need to mark a DB
@@ -105,15 +105,19 @@ Offer bulk cleanup when the user says "limpia todos los pendings", "borra los
 pendientes", or when SessionStart surfaces 5+ orphaned pendings the user has
 not engaged with.
-- `gaia approvals reject-all` -- bulk reject across the **filesystem** queue.
-  Returns "0 rejected" when the queue is empty.
-- `gaia approvals clean` -- removes expired/stale **filesystem** files.
+- `gaia approvals reject-all` -- bulk soft-reject across the **filesystem** queue.
+  Returns "0 rejected" when the queue is empty. Does not touch DB rows.
+- `gaia approvals clean` -- the first-class cross-session bulk drain for stale
+  DB pendings: `cmd_clean` calls `store.list_pending(all_sessions=True)` and
+  transitions every pending older than 24 h (`DEFAULT_PENDING_TTL_MINUTES`) to
+  `revoked` via `store.revoke()`, then runs `cleanup_expired_grants` to clean
+  expired filesystem grant files. Runs without a T3 prompt (consent-reducing,
+  listed in `CONSENT_REDUCING_SUBCOMMAND_EXCEPTIONS`). Use this when
+  `gaia approvals pending --all-sessions` shows a backlog of stale rows.
-There is no first-class bulk-revoke for the DB queue. If `gaia approvals
-pending --all-sessions` shows rows that need clearing, either revoke each by id
-or call `store.revoke()` in a short Python loop. Do not report "bulk cleanup
-done" after `reject-all` if the DB queue still has pending rows -- check
-`gaia approvals pending --all-sessions` to confirm.
+Do not report "bulk cleanup done" after `reject-all` alone -- it only clears
+the filesystem queue. Run `gaia approvals clean` to drain the DB backlog, then
+confirm with `gaia approvals pending --all-sessions`.
 Do not offer `reject-all` when there are active same-session pendings the user
 may still want to approve.
@@ -123,8 +127,9 @@ may still want to approve.
 - Approving without showing the exact COMANDO -- the user consents on the
   verbatim string, not a summary. The full presentation discipline lives in
   `orchestrator-present-approval`; this skill does not restate it.
-- Treating `gaia approvals reject-all` as a DB cleanup -- it operates on the
-  filesystem queue only. DB rows survive the call.
+- Treating `gaia approvals reject-all` as a full cleanup -- it operates on the
+  filesystem queue only; DB rows survive the call. Use `gaia approvals clean`
+  to drain the DB backlog.
 - Reporting "rechazado" without verifying the store -- `revoke` returns
   `not_found` for filesystem-only pendings; the inverse happens for `reject` on
   DB rows. Pick the verb by store, or be ready to fall back.

package/skills/subagent-request-approval/SKILL.md CHANGED Viewed

@@ -44,9 +44,20 @@ Add an `approval_request` to your `agent_contract_handoff`, copying the hook's f
 The `approval_request` schema is canonical in `agent-approval-protocol` — relay the sealed_payload fields verbatim (the hook built them) and add `verification` (your own success criteria) + `approval_id` (the literal token from the denial). See `agent-approval-protocol/SKILL.md` for the full field list and types.
-The `approval_id` is the `P-{...}` token the orchestrator uses to find the
-`REQUESTED` row in the DB and validate the fingerprint. Fields written only in
-prose are invisible to the presentation -- the user would approve blind.
+The `approval_id` is the `P-{...}` token tying this request to its `REQUESTED`
+row in the DB. Fields written only in prose are invisible to the presentation --
+the user would approve blind.
+**What your relay is for: same-turn immediacy.** Your `approval_request` is the
+orchestrator's source only for the CURRENT turn. The orchestrator's primary
+source is the per-turn `[PENDING-APPROVALS-VERIFIED]` block injected at
+`UserPromptSubmit`, which carries every pending that has survived >= 1 turn,
+already DB-read and fingerprint-verified. But that block was built before you
+ran this turn, so a pending you mint now is not in it yet -- the orchestrator
+presents it from your relay until the next turn's block picks it up. You emit the
+same fields either way; nothing on your side changes. The orchestrator never
+dispatches a subagent to verify or derive your request -- integrity is enforced
+at grant activation, not at presentation.
 ## Non-negotiable rules
@@ -105,9 +116,12 @@ your side. What changed underneath: the minted `approval_id` is now
 (`derive_command_set_id` -> `P-<first 32 hex of sha256(canonical commands)>`),
 not a random uuid4. You do not compute or emit it (you cannot hash reliably, and
 you have nothing to attempt yet); the value is purely internal. The reason it
-matters: the orchestrator reproduces that exact id from the `command_set` you
-emitted (via `gaia approvals derive-id`), with no DB search and no cross-session
-miss. Your contract stays the same -- `command_set` of `{command, rationale}`
+matters: the content-derived id is reproducible without a uuid4 that could be
+lost across sessions. Once the minted pending has survived a turn, the
+orchestrator reads it -- with all N commands -- straight from the injected
+`[PENDING-APPROVALS-VERIFIED]` block (no DB search, no derive-dispatch); for the
+turn you mint it in, the orchestrator presents from the `command_set` in your
+relay. Your contract stays the same -- `command_set` of `{command, rationale}`
 items, no `approval_id`.
 On the user's approval, that one pending activates into a single `COMMAND_SET`

package/skills/subagent-request-approval/reference.md CHANGED Viewed

@@ -16,8 +16,12 @@ payload from the intercepted command and calls
 3. writes the `REQUESTED` event to the DB.
 The block message you receive (`[T3_BLOCKED] ...`) ends with `approval_id: P-{...}`.
-You relay that token plus the operation details; the orchestrator re-derives the
-fingerprint from the DB row.
+You relay that token plus the operation details. For the current turn the
+orchestrator presents from your relay; once the pending survives a turn it
+appears in the per-turn `[PENDING-APPROVALS-VERIFIED]` block, already
+fingerprint-verified by the hook. Payload integrity is enforced at grant
+activation (`verify_fingerprint`), so the orchestrator never dispatches to
+verify or derive your request.
 Source: `bash_validator._build_sealed_payload()`, the subagent block path in
 `bash_validator._validate_single_command()`; `gaia/approvals/store.py`
@@ -99,12 +103,14 @@ singular hook-block path (which mints `P-{uuid4hex}`), the intake derives the id
 from the command_set content via `gaia.approvals.store.derive_command_set_id()`:
 `P-<first 32 hex of sha256(canonical(post-filter command strings))>`. It then
 passes that id to `insert_requested(..., approval_id=...)` as the pending row id.
-The point is reproducibility without a DB lookup: the orchestrator holds the
-same `command_set` (you emitted it in the contract) and reproduces the EXACT id
-with `gaia approvals derive-id`, which applies the same mutative filter and the
-same canonicalization (`chain.canonical_payload`). This closes the cross-session
-miss -- a uuid4 minted at SubagentStop could not be recovered by the parent
-(Claude Code #5812), but a content-derived id needs no recovery. The id is
+The point is reproducibility without a fragile uuid4: a uuid4 minted at
+SubagentStop could not be recovered by the parent (Claude Code #5812), but a
+content-derived id needs no recovery -- the same canonicalization
+(`chain.canonical_payload`) and mutative filter always yield the same id. Once
+the minted pending survives a turn, the orchestrator reads that id (and all N
+commands) straight from the injected `[PENDING-APPROVALS-VERIFIED]` block -- no
+DB lookup and no `gaia approvals derive-id` dispatch; for the mint turn it
+presents from the `command_set` in your relay. The id is
 **order-sensitive** (the consume side matches positionally) and **content-only**
 (rationale/session/agent are not folded in, so both sides agree from the command
 list alone). Idempotency follows the existing fingerprint dedup: two identical
@@ -154,15 +160,17 @@ single-use within the 60-minute window.
 Always `plan_status: "APPROVAL_REQUEST"`. The presence of `approval_id` tells the
 orchestrator which path:
-- **With `approval_id`** -- the hook blocked a single command; orchestrator
-  validates the fingerprint and activates the single-use semantic grant on user
-  approval.
+- **With `approval_id`** -- the hook blocked a single command; the orchestrator
+  presents from your relay (current turn) or the injected
+  `[PENDING-APPROVALS-VERIFIED]` block (later turns), and the single-use semantic
+  grant activates on user approval (fingerprint checked at activation).
 - **Without `approval_id`, with a `command_set` of >= 2 items** -- plan-first
   batch. The SubagentStop intake processor mints ONE pending `COMMAND_SET` with a
-  **content-derived** id (`derive_command_set_id`), and the orchestrator
-  reproduces that exact id from the command_set via `gaia approvals derive-id`
-  (no DB search) before presenting the single approval (N commands, one nonce).
-  See "Batch / COMMAND_SET -- wired" above.
+  **content-derived** id (`derive_command_set_id`). The orchestrator reads that
+  id and the N commands from the injected `[PENDING-APPROVALS-VERIFIED]` block
+  (no derive-dispatch), or, for the mint turn, from the `command_set` in your
+  relay, then presents the single approval (N commands, one nonce). See
+  "Batch / COMMAND_SET -- wired" above.
 - **Without `approval_id` and without a multi-item `command_set`** -- plan-first
   single (you are presenting one T3 plan before attempting); the orchestrator
   gates on user consent before any execution.

package/tools/migration/README.md CHANGED Viewed

@@ -19,7 +19,12 @@ desde el filesystem hacia `~/.gaia/gaia.db`.
 | 01 | Episodes | `.claude/project-context/episodic-memory/episodes.jsonl` | `episodes` (+`episodes_fts`) |
 | 02 | Memory | `~/.claude/projects/-home-jorge-ws-me/memory/*.md` | `memory` (+`memory_fts`) |
 | 03 | Context contracts | `.claude/project-context/project-context.json` | `context_contracts` |
-| 04 | Harness events | `.claude/events/events.jsonl` | `harness_events` |
+| 04 | Harness events | ~~`.claude/events/events.jsonl`~~ (ELIMINADO) | `harness_events` |
+> **Dominio 04 completado y eliminado.** `events.jsonl` y su archivo `.lock` fueron
+> retirados. El hook `event_writer` escribe directamente a `harness_events` en la DB.
+> El script `migrate_04_harness_events.py` y su wrapper `.sh` fueron borrados una vez
+> completada la absorción. Los datos vivos se leen desde `harness_events` en `~/.gaia/gaia.db`.
 Cada dominio tiene 2 archivos:
@@ -37,8 +42,8 @@ bootstrap.sh                             # crea/inicializa ~/.gaia/gaia.db con s
 ./migrate_01_episodes.sh                 # ~50-80 MB de SQL, batch 80
 ./migrate_02_memory.sh                   # 28 .md (MEMORY.md excluido)
 ./migrate_03_context_contracts.sh        # 12 secciones
-./migrate_04_harness_events.sh           # ~5-10 MB de SQL, batch 200
-./validate.sh                            # 5 aserciones read-only
+# migrate_04_harness_events.sh ELIMINADO — dominio 04 completado; eventos en DB-canonical
+./validate.sh                            # aserciones read-only (V4 eliminada junto con 04)
 ```
 Cada script imprime `[migrate_NN] OK` al terminar.
@@ -50,14 +55,7 @@ Cada script imprime `[migrate_NN] OK` al terminar.
 | 01 episodes | `INSERT OR IGNORE` (PK = `episode_id`) | sí |
 | 02 memory | `INSERT OR IGNORE` (PK = `(project, name)`) | sí |
 | 03 context_contracts | `INSERT OR IGNORE` (PK = `(project, section_name)`) | sí |
-| 04 harness_events | `INSERT` simple (sin PK natural) | **no — duplica filas** |
-Para re-ejecutar 04 limpiamente:
-```
-sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
-./migrate_04_harness_events.sh
-```
+| 04 harness_events | N/A — tool eliminado; escritura vía `event_writer` DB-direct | N/A |
 ## Validación
@@ -68,7 +66,7 @@ sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
 | V1 | `COUNT(*) FROM episodes` == líneas no vacías de `episodes.jsonl` |
 | V2 | `COUNT(*) FROM memory` == archivos `.md` (excluyendo `MEMORY.md`) |
 | V3 | `COUNT(*) FROM context_contracts` == 12 |
-| V4 | `COUNT(*) FROM harness_events` == líneas no vacías de `events.jsonl` |
+| ~~V4~~ | ~~`COUNT(*) FROM harness_events` == líneas no vacías de `events.jsonl`~~ — eliminado junto con el dominio 04 |
 | V5 | `COUNT(*) FROM episodes_fts` == `COUNT(*) FROM episodes` (FTS sync) |
 Exit code: 0 si todas pasan, 1 si alguna falla.

package/tools/scan/orchestrator.py CHANGED Viewed

@@ -11,23 +11,26 @@ Pipeline:
   3. Collect and combine scanner sections (handling environment sub-keys)
   4. Cross-populate derived fields
   5. Return ScanOutput
+Section ownership rules (inlined from the retired tools/scan/merge.py):
+  Rule 1: Scanner-owned sections -> full replace
+  Rule 2: Agent-enriched sections -> never touch
+  Rule 3: Mixed sections -> selective update at sub-key level
+  Rule 4: Unknown/user-custom sections -> preserve
+  Rule 5: Metadata -> always update
 """
+import copy
 import logging
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 from tools.scan import __version__ as scanner_package_version
 from tools.scan.config import ScanConfig
-from tools.scan.merge import (
-    AGENT_ENRICHED_SECTIONS,
-    collect_scanner_sections,
-    merge_context,
-)
 from tools.scan.registry import ScannerRegistry
 from tools.scan.scanners.base import BaseScanner, ScanResult
 from tools.scan.workspace import WorkspaceInfo, detect_workspace_type
@@ -35,6 +38,190 @@ from tools.scan.workspace import WorkspaceInfo, detect_workspace_type
 logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Section ownership constants (Rule 1 / Rule 2 / Rule 3)
+# ---------------------------------------------------------------------------
+# Sections fully owned by scanners -- replaced entirely on each scan (Rule 1)
+# Top-level sections only; sub-key ownership handled separately
+SCANNER_OWNED_TOP_LEVEL: Dict[str, str] = {
+    "project_identity": "stack",
+    "stack": "stack",
+    "git": "git",
+    "infrastructure": "infrastructure",
+    "orchestration": "orchestration",
+    # "environment" is NOT listed here because it has sub-key ownership
+}
+# Sub-key ownership within the `environment` section (Rule 4 / sub-section)
+# Maps environment sub-key -> owning scanner name
+ENVIRONMENT_SUBKEY_OWNERS: Dict[str, str] = {
+    "tools": "tools",
+    "tool_preferences": "tools",
+    "os": "environment",
+    "runtimes": "environment",
+    "env_files": "environment",
+}
+# Agent-enriched sections -- never modified by scanners (Rule 2)
+AGENT_ENRICHED_SECTIONS: frozenset = frozenset([
+    "operational_guidelines",
+    "cluster_details",
+    "infrastructure_topology",
+    "monitoring_observability",
+    "architecture_overview",
+    "gcp_services",
+    "workload_identity",
+])
+# Mixed sections with partial scanner ownership (Rule 3)
+# Maps section_name -> set of scanner-owned field names
+MIXED_SECTION_SCANNER_FIELDS: Dict[str, Set[str]] = {
+    "terraform_infrastructure": {"layout"},
+    "gitops_configuration": {"repository"},
+    "application_services": {"base_path", "services"},
+}
+# ---------------------------------------------------------------------------
+# Section collection and merge helpers
+# ---------------------------------------------------------------------------
+def collect_scanner_sections(
+    scanner_results: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Collect and combine sections from all scanner results.
+    Handles the environment section specially: both `tools` and `environment`
+    scanners produce sub-keys under `environment`, so their outputs are
+    combined into a single `environment` section.
+    Args:
+        scanner_results: Mapping of scanner_name -> ScanResult (must have
+                         a `sections` attribute that is a dict).
+    Returns:
+        Combined sections dict from all scanners.
+    """
+    combined: Dict[str, Any] = {}
+    environment_parts: Dict[str, Any] = {}
+    for _scanner_name, scan_result in scanner_results.items():
+        sections = scan_result.sections if hasattr(scan_result, "sections") else {}
+        for section_name, section_data in sections.items():
+            if section_name == "environment":
+                # Merge environment sub-keys from both scanners
+                if isinstance(section_data, dict):
+                    for key, value in section_data.items():
+                        if key != "_source":
+                            environment_parts[key] = value
+            else:
+                # Non-environment sections: direct assignment (last scanner wins,
+                # but each section should have exactly one owner)
+                combined[section_name] = section_data
+    # Reassemble environment section if we got any parts
+    if environment_parts:
+        combined["environment"] = {
+            "_source": "scanner:environment+tools",
+            **environment_parts,
+        }
+    return combined
+def _merge_environment_section(
+    result: Dict[str, Any],
+    scan_sections: Dict[str, Any],
+) -> None:
+    """Merge the `environment` section with sub-key level ownership.
+    Two scanners contribute to the `environment` section:
+    - `tools` scanner owns: tools, tool_preferences
+    - `environment` scanner owns: os, runtimes, env_files
+    Each scanner's sub-keys replace their owned portion; the other scanner's
+    sub-keys are preserved. The `_source` field gets a combined tag.
+    Args:
+        result: The result dict being built (mutated in place).
+        scan_sections: Combined sections from all scanners.
+    """
+    if "environment" not in scan_sections:
+        return
+    scan_env = scan_sections["environment"]
+    if "environment" not in result:
+        result["environment"] = {}
+    env = result["environment"]
+    # Replace each sub-key based on ownership
+    for subkey in ENVIRONMENT_SUBKEY_OWNERS:
+        if subkey in scan_env:
+            env[subkey] = copy.deepcopy(scan_env[subkey])
+    # Set combined _source tag
+    env["_source"] = "scanner:environment+tools"
+def _merge_sections(
+    existing: Dict[str, Any],
+    scan_sections: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Merge scanner results with existing project-context sections.
+    Applies the ownership rules to produce the final merged sections dict.
+    Called with existing={} in normal scan runs (display-only path).
+    Args:
+        existing: Current sections (may be empty when called from scan).
+        scan_sections: Combined sections from all scanners.
+    Returns:
+        Merged sections dict. The merge is deterministic: same inputs always
+        produce the same output.
+    """
+    result = copy.deepcopy(existing)
+    # --- Rule 1: Scanner-owned top-level sections -> full replace ---
+    for section_name in SCANNER_OWNED_TOP_LEVEL:
+        if section_name in scan_sections:
+            result[section_name] = copy.deepcopy(scan_sections[section_name])
+    # --- Sub-section level ownership for `environment` ---
+    _merge_environment_section(result, scan_sections)
+    # --- Rule 2: Agent-enriched sections -> never touch ---
+    # These are already in `result` from the deepcopy of `existing`.
+    # (No action needed -- they are preserved by the deepcopy.)
+    # --- Rule 3: Mixed sections -> selective update ---
+    for section_name, scanner_fields in MIXED_SECTION_SCANNER_FIELDS.items():
+        if section_name in scan_sections:
+            scan_data = scan_sections[section_name]
+            if section_name not in result:
+                result[section_name] = {}
+            # Only update scanner-owned fields; preserve agent fields
+            for field_name in scanner_fields:
+                if field_name in scan_data:
+                    result[section_name][field_name] = copy.deepcopy(
+                        scan_data[field_name]
+                    )
+    # --- Rule 5: Unknown/user-custom sections -> preserve ---
+    # Any section in `existing` not covered above is preserved by the deepcopy.
+    # We do NOT add new unknown sections from scan_sections.
+    return result
+# ---------------------------------------------------------------------------
+# ScanOutput dataclass
+# ---------------------------------------------------------------------------
 @dataclass(frozen=True)
 class ScanOutput:
     """Aggregated output from all scanners.
@@ -187,11 +374,9 @@ class ScanOrchestrator:
         scan_sections = collect_scanner_sections(scanner_results)
         # Merge with empty existing context (no JSON persistence)
-        section_owners = self.registry.get_section_owners()
-        merged_sections = merge_context(
+        merged_sections = _merge_sections(
             existing={},
             scan_sections=scan_sections,
-            section_owners=section_owners,
         )
         # Determine which sections were updated vs preserved
@@ -210,7 +395,6 @@ class ScanOrchestrator:
         self._cross_populate_monorepo(merged_sections)
         # Remove empty {} placeholders for agent-enriched and mixed sections
-        from tools.scan.merge import MIXED_SECTION_SCANNER_FIELDS
         remove_if_empty = (
             AGENT_ENRICHED_SECTIONS
             | frozenset(MIXED_SECTION_SCANNER_FIELDS.keys())

package/tools/scan/tests/test_integration.py CHANGED Viewed

@@ -23,8 +23,7 @@ from unittest.mock import patch
 import pytest
 from tools.scan.config import ScanConfig
-from tools.scan.merge import AGENT_ENRICHED_SECTIONS
-from tools.scan.orchestrator import ScanOrchestrator, ScanOutput
+from tools.scan.orchestrator import AGENT_ENRICHED_SECTIONS, ScanOrchestrator, ScanOutput
 from tools.scan.registry import ScannerRegistry
 from tools.scan.scanners.base import BaseScanner, ScanResult
 from tools.scan.tests.conftest import create_git_dir