@jaguilar87/gaia 5.0.8 → 5.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +11 -0
  4. package/bin/README.md +6 -1
  5. package/bin/cli/approvals.py +341 -238
  6. package/bin/cli/brief.py +13 -0
  7. package/bin/cli/doctor.py +1 -1
  8. package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
  9. package/dist/gaia-ops/hooks/adapters/claude_code.py +19 -85
  10. package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
  11. package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
  12. package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
  13. package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
  14. package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
  15. package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +24 -1
  16. package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
  17. package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
  18. package/dist/gaia-ops/hooks/post_compact.py +1 -0
  19. package/dist/gaia-ops/hooks/pre_compact.py +1 -0
  20. package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
  21. package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +27 -7
  22. package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +11 -6
  23. package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
  24. package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -28
  25. package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
  26. package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +10 -5
  27. package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
  28. package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +20 -6
  29. package/dist/gaia-ops/skills/subagent-request-approval/reference.md +23 -15
  30. package/dist/gaia-ops/tools/migration/README.md +10 -12
  31. package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
  32. package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
  33. package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
  34. package/dist/gaia-security/hooks/adapters/claude_code.py +19 -85
  35. package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
  36. package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
  37. package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
  38. package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
  39. package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
  40. package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +24 -1
  41. package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
  42. package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
  43. package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
  44. package/gaia/approvals/store.py +87 -9
  45. package/gaia/store/schema.sql +38 -1
  46. package/gaia/store/writer.py +400 -0
  47. package/hooks/adapters/claude_code.py +19 -85
  48. package/hooks/elicitation_result.py +20 -75
  49. package/hooks/modules/context/context_injector.py +23 -7
  50. package/hooks/modules/events/event_writer.py +63 -96
  51. package/hooks/modules/security/__init__.py +0 -2
  52. package/hooks/modules/security/approval_cleanup.py +238 -69
  53. package/hooks/modules/security/approval_grants.py +506 -1103
  54. package/hooks/modules/security/mutative_verbs.py +24 -1
  55. package/hooks/modules/session/pending_scanner.py +150 -90
  56. package/hooks/modules/session/session_manifest.py +257 -28
  57. package/hooks/post_compact.py +1 -0
  58. package/hooks/pre_compact.py +1 -0
  59. package/hooks/user_prompt_submit.py +20 -0
  60. package/package.json +1 -1
  61. package/pyproject.toml +1 -1
  62. package/scripts/bootstrap_database.sh +66 -17
  63. package/scripts/migrations/README.md +26 -14
  64. package/scripts/migrations/schema.checksum +2 -2
  65. package/scripts/migrations/v18_to_v19.sql +36 -0
  66. package/scripts/migrations/v19_to_v20.sql +20 -0
  67. package/skills/agent-approval-protocol/SKILL.md +27 -7
  68. package/skills/agent-approval-protocol/reference.md +11 -6
  69. package/skills/gaia-patterns/reference.md +2 -2
  70. package/skills/orchestrator-present-approval/SKILL.md +69 -28
  71. package/skills/orchestrator-present-approval/reference.md +16 -3
  72. package/skills/orchestrator-present-approval/template.md +10 -5
  73. package/skills/pending-approvals/SKILL.md +16 -11
  74. package/skills/subagent-request-approval/SKILL.md +20 -6
  75. package/skills/subagent-request-approval/reference.md +23 -15
  76. package/tools/migration/README.md +10 -12
  77. package/tools/scan/orchestrator.py +194 -10
  78. package/tools/scan/tests/test_integration.py +1 -2
  79. package/bin/cli/plans.py +0 -517
  80. package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
  81. package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
  82. package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
  83. package/dist/gaia-ops/tools/scan/merge.py +0 -213
  84. package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
  85. package/tools/context/deep_merge.py +0 -159
  86. package/tools/migration/migrate_04_harness_events.py +0 -132
  87. package/tools/migration/migrate_04_harness_events.sh +0 -23
  88. package/tools/scan/merge.py +0 -213
  89. package/tools/scan/tests/test_merge.py +0 -269
@@ -37,7 +37,7 @@ report "rejected" when nothing actually changed.
37
37
  | `gaia approvals list` | DB grants + filesystem pendings | `cmd_list` (mixed) |
38
38
  | `gaia approvals reject NONCE` | filesystem only | `reject_pending` in `hooks/modules/security/approval_grants.py` |
39
39
  | `gaia approvals reject-all` | filesystem only | loops `reject_pending` |
40
- | `gaia approvals clean` | filesystem only | `cleanup_expired_grants` |
40
+ | `gaia approvals clean` | DB (cross-session stale pendings) + filesystem | `cmd_clean` in `bin/cli/approvals.py`: calls `store.list_pending(all_sessions=True)`, transitions every pending older than `DEFAULT_PENDING_TTL_MINUTES` (24 h) to `revoked` via `store.revoke()`, then calls `cleanup_expired_grants` for filesystem files |
41
41
 
42
42
  The practical consequence: `revoke` is the DB-aware single-id verb; `reject` and
43
43
  `reject-all` only touch the legacy filesystem queue. If you need to mark a DB
@@ -105,15 +105,19 @@ Offer bulk cleanup when the user says "limpia todos los pendings", "borra los
105
105
  pendientes", or when SessionStart surfaces 5+ orphaned pendings the user has
106
106
  not engaged with.
107
107
 
108
- - `gaia approvals reject-all` -- bulk reject across the **filesystem** queue.
109
- Returns "0 rejected" when the queue is empty.
110
- - `gaia approvals clean` -- removes expired/stale **filesystem** files.
108
+ - `gaia approvals reject-all` -- bulk soft-reject across the **filesystem** queue.
109
+ Returns "0 rejected" when the queue is empty. Does not touch DB rows.
110
+ - `gaia approvals clean` -- the first-class cross-session bulk drain for stale
111
+ DB pendings: `cmd_clean` calls `store.list_pending(all_sessions=True)` and
112
+ transitions every pending older than 24 h (`DEFAULT_PENDING_TTL_MINUTES`) to
113
+ `revoked` via `store.revoke()`, then runs `cleanup_expired_grants` to clean
114
+ expired filesystem grant files. Runs without a T3 prompt (consent-reducing,
115
+ listed in `CONSENT_REDUCING_SUBCOMMAND_EXCEPTIONS`). Use this when
116
+ `gaia approvals pending --all-sessions` shows a backlog of stale rows.
111
117
 
112
- There is no first-class bulk-revoke for the DB queue. If `gaia approvals
113
- pending --all-sessions` shows rows that need clearing, either revoke each by id
114
- or call `store.revoke()` in a short Python loop. Do not report "bulk cleanup
115
- done" after `reject-all` if the DB queue still has pending rows -- check
116
- `gaia approvals pending --all-sessions` to confirm.
118
+ Do not report "bulk cleanup done" after `reject-all` alone -- it only clears
119
+ the filesystem queue. Run `gaia approvals clean` to drain the DB backlog, then
120
+ confirm with `gaia approvals pending --all-sessions`.
117
121
 
118
122
  Do not offer `reject-all` when there are active same-session pendings the user
119
123
  may still want to approve.
@@ -123,8 +127,9 @@ may still want to approve.
123
127
  - Approving without showing the exact COMANDO -- the user consents on the
124
128
  verbatim string, not a summary. The full presentation discipline lives in
125
129
  `orchestrator-present-approval`; this skill does not restate it.
126
- - Treating `gaia approvals reject-all` as a DB cleanup -- it operates on the
127
- filesystem queue only. DB rows survive the call.
130
+ - Treating `gaia approvals reject-all` as a full cleanup -- it operates on the
131
+ filesystem queue only; DB rows survive the call. Use `gaia approvals clean`
132
+ to drain the DB backlog.
128
133
  - Reporting "rechazado" without verifying the store -- `revoke` returns
129
134
  `not_found` for filesystem-only pendings; the inverse happens for `reject` on
130
135
  DB rows. Pick the verb by store, or be ready to fall back.
@@ -44,9 +44,20 @@ Add an `approval_request` to your `agent_contract_handoff`, copying the hook's f
44
44
 
45
45
  The `approval_request` schema is canonical in `agent-approval-protocol` — relay the sealed_payload fields verbatim (the hook built them) and add `verification` (your own success criteria) + `approval_id` (the literal token from the denial). See `agent-approval-protocol/SKILL.md` for the full field list and types.
46
46
 
47
- The `approval_id` is the `P-{...}` token the orchestrator uses to find the
48
- `REQUESTED` row in the DB and validate the fingerprint. Fields written only in
49
- prose are invisible to the presentation -- the user would approve blind.
47
+ The `approval_id` is the `P-{...}` token tying this request to its `REQUESTED`
48
+ row in the DB. Fields written only in prose are invisible to the presentation --
49
+ the user would approve blind.
50
+
51
+ **What your relay is for: same-turn immediacy.** Your `approval_request` is the
52
+ orchestrator's source only for the CURRENT turn. The orchestrator's primary
53
+ source is the per-turn `[PENDING-APPROVALS-VERIFIED]` block injected at
54
+ `UserPromptSubmit`, which carries every pending that has survived >= 1 turn,
55
+ already DB-read and fingerprint-verified. But that block was built before you
56
+ ran this turn, so a pending you mint now is not in it yet -- the orchestrator
57
+ presents it from your relay until the next turn's block picks it up. You emit the
58
+ same fields either way; nothing on your side changes. The orchestrator never
59
+ dispatches a subagent to verify or derive your request -- integrity is enforced
60
+ at grant activation, not at presentation.
50
61
 
51
62
  ## Non-negotiable rules
52
63
 
@@ -105,9 +116,12 @@ your side. What changed underneath: the minted `approval_id` is now
105
116
  (`derive_command_set_id` -> `P-<first 32 hex of sha256(canonical commands)>`),
106
117
  not a random uuid4. You do not compute or emit it (you cannot hash reliably, and
107
118
  you have nothing to attempt yet); the value is purely internal. The reason it
108
- matters: the orchestrator reproduces that exact id from the `command_set` you
109
- emitted (via `gaia approvals derive-id`), with no DB search and no cross-session
110
- miss. Your contract stays the same -- `command_set` of `{command, rationale}`
119
+ matters: the content-derived id is reproducible without a uuid4 that could be
120
+ lost across sessions. Once the minted pending has survived a turn, the
121
+ orchestrator reads it -- with all N commands -- straight from the injected
122
+ `[PENDING-APPROVALS-VERIFIED]` block (no DB search, no derive-dispatch); for the
123
+ turn you mint it in, the orchestrator presents from the `command_set` in your
124
+ relay. Your contract stays the same -- `command_set` of `{command, rationale}`
111
125
  items, no `approval_id`.
112
126
 
113
127
  On the user's approval, that one pending activates into a single `COMMAND_SET`
@@ -16,8 +16,12 @@ payload from the intercepted command and calls
16
16
  3. writes the `REQUESTED` event to the DB.
17
17
 
18
18
  The block message you receive (`[T3_BLOCKED] ...`) ends with `approval_id: P-{...}`.
19
- You relay that token plus the operation details; the orchestrator re-derives the
20
- fingerprint from the DB row.
19
+ You relay that token plus the operation details. For the current turn the
20
+ orchestrator presents from your relay; once the pending survives a turn it
21
+ appears in the per-turn `[PENDING-APPROVALS-VERIFIED]` block, already
22
+ fingerprint-verified by the hook. Payload integrity is enforced at grant
23
+ activation (`verify_fingerprint`), so the orchestrator never dispatches to
24
+ verify or derive your request.
21
25
 
22
26
  Source: `bash_validator._build_sealed_payload()`, the subagent block path in
23
27
  `bash_validator._validate_single_command()`; `gaia/approvals/store.py`
@@ -99,12 +103,14 @@ singular hook-block path (which mints `P-{uuid4hex}`), the intake derives the id
99
103
  from the command_set content via `gaia.approvals.store.derive_command_set_id()`:
100
104
  `P-<first 32 hex of sha256(canonical(post-filter command strings))>`. It then
101
105
  passes that id to `insert_requested(..., approval_id=...)` as the pending row id.
102
- The point is reproducibility without a DB lookup: the orchestrator holds the
103
- same `command_set` (you emitted it in the contract) and reproduces the EXACT id
104
- with `gaia approvals derive-id`, which applies the same mutative filter and the
105
- same canonicalization (`chain.canonical_payload`). This closes the cross-session
106
- miss -- a uuid4 minted at SubagentStop could not be recovered by the parent
107
- (Claude Code #5812), but a content-derived id needs no recovery. The id is
106
+ The point is reproducibility without a fragile uuid4: a uuid4 minted at
107
+ SubagentStop could not be recovered by the parent (Claude Code #5812), but a
108
+ content-derived id needs no recovery -- the same canonicalization
109
+ (`chain.canonical_payload`) and mutative filter always yield the same id. Once
110
+ the minted pending survives a turn, the orchestrator reads that id (and all N
111
+ commands) straight from the injected `[PENDING-APPROVALS-VERIFIED]` block -- no
112
+ DB lookup and no `gaia approvals derive-id` dispatch; for the mint turn it
113
+ presents from the `command_set` in your relay. The id is
108
114
  **order-sensitive** (the consume side matches positionally) and **content-only**
109
115
  (rationale/session/agent are not folded in, so both sides agree from the command
110
116
  list alone). Idempotency follows the existing fingerprint dedup: two identical
@@ -154,15 +160,17 @@ single-use within the 60-minute window.
154
160
  Always `plan_status: "APPROVAL_REQUEST"`. The presence of `approval_id` tells the
155
161
  orchestrator which path:
156
162
 
157
- - **With `approval_id`** -- the hook blocked a single command; orchestrator
158
- validates the fingerprint and activates the single-use semantic grant on user
159
- approval.
163
+ - **With `approval_id`** -- the hook blocked a single command; the orchestrator
164
+ presents from your relay (current turn) or the injected
165
+ `[PENDING-APPROVALS-VERIFIED]` block (later turns), and the single-use semantic
166
+ grant activates on user approval (fingerprint checked at activation).
160
167
  - **Without `approval_id`, with a `command_set` of >= 2 items** -- plan-first
161
168
  batch. The SubagentStop intake processor mints ONE pending `COMMAND_SET` with a
162
- **content-derived** id (`derive_command_set_id`), and the orchestrator
163
- reproduces that exact id from the command_set via `gaia approvals derive-id`
164
- (no DB search) before presenting the single approval (N commands, one nonce).
165
- See "Batch / COMMAND_SET -- wired" above.
169
+ **content-derived** id (`derive_command_set_id`). The orchestrator reads that
170
+ id and the N commands from the injected `[PENDING-APPROVALS-VERIFIED]` block
171
+ (no derive-dispatch), or, for the mint turn, from the `command_set` in your
172
+ relay, then presents the single approval (N commands, one nonce). See
173
+ "Batch / COMMAND_SET -- wired" above.
166
174
  - **Without `approval_id` and without a multi-item `command_set`** -- plan-first
167
175
  single (you are presenting one T3 plan before attempting); the orchestrator
168
176
  gates on user consent before any execution.
@@ -19,7 +19,12 @@ desde el filesystem hacia `~/.gaia/gaia.db`.
19
19
  | 01 | Episodes | `.claude/project-context/episodic-memory/episodes.jsonl` | `episodes` (+`episodes_fts`) |
20
20
  | 02 | Memory | `~/.claude/projects/-home-jorge-ws-me/memory/*.md` | `memory` (+`memory_fts`) |
21
21
  | 03 | Context contracts | `.claude/project-context/project-context.json` | `context_contracts` |
22
- | 04 | Harness events | `.claude/events/events.jsonl` | `harness_events` |
22
+ | 04 | Harness events | ~~`.claude/events/events.jsonl`~~ (ELIMINADO) | `harness_events` |
23
+
24
+ > **Dominio 04 completado y eliminado.** `events.jsonl` y su archivo `.lock` fueron
25
+ > retirados. El hook `event_writer` escribe directamente a `harness_events` en la DB.
26
+ > El script `migrate_04_harness_events.py` y su wrapper `.sh` fueron borrados una vez
27
+ > completada la absorción. Los datos vivos se leen desde `harness_events` en `~/.gaia/gaia.db`.
23
28
 
24
29
  Cada dominio tiene 2 archivos:
25
30
 
@@ -37,8 +42,8 @@ bootstrap.sh # crea/inicializa ~/.gaia/gaia.db con s
37
42
  ./migrate_01_episodes.sh # ~50-80 MB de SQL, batch 80
38
43
  ./migrate_02_memory.sh # 28 .md (MEMORY.md excluido)
39
44
  ./migrate_03_context_contracts.sh # 12 secciones
40
- ./migrate_04_harness_events.sh # ~5-10 MB de SQL, batch 200
41
- ./validate.sh # 5 aserciones read-only
45
+ # migrate_04_harness_events.sh ELIMINADO dominio 04 completado; eventos en DB-canonical
46
+ ./validate.sh # aserciones read-only (V4 eliminada junto con 04)
42
47
  ```
43
48
 
44
49
  Cada script imprime `[migrate_NN] OK` al terminar.
@@ -50,14 +55,7 @@ Cada script imprime `[migrate_NN] OK` al terminar.
50
55
  | 01 episodes | `INSERT OR IGNORE` (PK = `episode_id`) | sí |
51
56
  | 02 memory | `INSERT OR IGNORE` (PK = `(project, name)`) | sí |
52
57
  | 03 context_contracts | `INSERT OR IGNORE` (PK = `(project, section_name)`) | sí |
53
- | 04 harness_events | `INSERT` simple (sin PK natural) | **no duplica filas** |
54
-
55
- Para re-ejecutar 04 limpiamente:
56
-
57
- ```
58
- sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
59
- ./migrate_04_harness_events.sh
60
- ```
58
+ | 04 harness_events | N/A tool eliminado; escritura vía `event_writer` DB-direct | N/A |
61
59
 
62
60
  ## Validación
63
61
 
@@ -68,7 +66,7 @@ sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
68
66
  | V1 | `COUNT(*) FROM episodes` == líneas no vacías de `episodes.jsonl` |
69
67
  | V2 | `COUNT(*) FROM memory` == archivos `.md` (excluyendo `MEMORY.md`) |
70
68
  | V3 | `COUNT(*) FROM context_contracts` == 12 |
71
- | V4 | `COUNT(*) FROM harness_events` == líneas no vacías de `events.jsonl` |
69
+ | ~~V4~~ | ~~`COUNT(*) FROM harness_events` == líneas no vacías de `events.jsonl`~~ — eliminado junto con el dominio 04 |
72
70
  | V5 | `COUNT(*) FROM episodes_fts` == `COUNT(*) FROM episodes` (FTS sync) |
73
71
 
74
72
  Exit code: 0 si todas pasan, 1 si alguna falla.
@@ -11,23 +11,26 @@ Pipeline:
11
11
  3. Collect and combine scanner sections (handling environment sub-keys)
12
12
  4. Cross-populate derived fields
13
13
  5. Return ScanOutput
14
+
15
+ Section ownership rules (inlined from the retired tools/scan/merge.py):
16
+ Rule 1: Scanner-owned sections -> full replace
17
+ Rule 2: Agent-enriched sections -> never touch
18
+ Rule 3: Mixed sections -> selective update at sub-key level
19
+ Rule 4: Unknown/user-custom sections -> preserve
20
+ Rule 5: Metadata -> always update
14
21
  """
15
22
 
23
+ import copy
16
24
  import logging
17
25
  import time
18
26
  from concurrent.futures import ThreadPoolExecutor, as_completed
19
27
  from dataclasses import dataclass, field
20
28
  from datetime import datetime, timezone
21
29
  from pathlib import Path
22
- from typing import Any, Dict, List, Optional
30
+ from typing import Any, Dict, List, Optional, Set
23
31
 
24
32
  from tools.scan import __version__ as scanner_package_version
25
33
  from tools.scan.config import ScanConfig
26
- from tools.scan.merge import (
27
- AGENT_ENRICHED_SECTIONS,
28
- collect_scanner_sections,
29
- merge_context,
30
- )
31
34
  from tools.scan.registry import ScannerRegistry
32
35
  from tools.scan.scanners.base import BaseScanner, ScanResult
33
36
  from tools.scan.workspace import WorkspaceInfo, detect_workspace_type
@@ -35,6 +38,190 @@ from tools.scan.workspace import WorkspaceInfo, detect_workspace_type
35
38
  logger = logging.getLogger(__name__)
36
39
 
37
40
 
41
+ # ---------------------------------------------------------------------------
42
+ # Section ownership constants (Rule 1 / Rule 2 / Rule 3)
43
+ # ---------------------------------------------------------------------------
44
+
45
+ # Sections fully owned by scanners -- replaced entirely on each scan (Rule 1)
46
+ # Top-level sections only; sub-key ownership handled separately
47
+ SCANNER_OWNED_TOP_LEVEL: Dict[str, str] = {
48
+ "project_identity": "stack",
49
+ "stack": "stack",
50
+ "git": "git",
51
+ "infrastructure": "infrastructure",
52
+ "orchestration": "orchestration",
53
+ # "environment" is NOT listed here because it has sub-key ownership
54
+ }
55
+
56
+ # Sub-key ownership within the `environment` section (Rule 4 / sub-section)
57
+ # Maps environment sub-key -> owning scanner name
58
+ ENVIRONMENT_SUBKEY_OWNERS: Dict[str, str] = {
59
+ "tools": "tools",
60
+ "tool_preferences": "tools",
61
+ "os": "environment",
62
+ "runtimes": "environment",
63
+ "env_files": "environment",
64
+ }
65
+
66
+ # Agent-enriched sections -- never modified by scanners (Rule 2)
67
+ AGENT_ENRICHED_SECTIONS: frozenset = frozenset([
68
+ "operational_guidelines",
69
+ "cluster_details",
70
+ "infrastructure_topology",
71
+ "monitoring_observability",
72
+ "architecture_overview",
73
+ "gcp_services",
74
+ "workload_identity",
75
+ ])
76
+
77
+ # Mixed sections with partial scanner ownership (Rule 3)
78
+ # Maps section_name -> set of scanner-owned field names
79
+ MIXED_SECTION_SCANNER_FIELDS: Dict[str, Set[str]] = {
80
+ "terraform_infrastructure": {"layout"},
81
+ "gitops_configuration": {"repository"},
82
+ "application_services": {"base_path", "services"},
83
+ }
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Section collection and merge helpers
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def collect_scanner_sections(
91
+ scanner_results: Dict[str, Any],
92
+ ) -> Dict[str, Any]:
93
+ """Collect and combine sections from all scanner results.
94
+
95
+ Handles the environment section specially: both `tools` and `environment`
96
+ scanners produce sub-keys under `environment`, so their outputs are
97
+ combined into a single `environment` section.
98
+
99
+ Args:
100
+ scanner_results: Mapping of scanner_name -> ScanResult (must have
101
+ a `sections` attribute that is a dict).
102
+
103
+ Returns:
104
+ Combined sections dict from all scanners.
105
+ """
106
+ combined: Dict[str, Any] = {}
107
+ environment_parts: Dict[str, Any] = {}
108
+
109
+ for _scanner_name, scan_result in scanner_results.items():
110
+ sections = scan_result.sections if hasattr(scan_result, "sections") else {}
111
+
112
+ for section_name, section_data in sections.items():
113
+ if section_name == "environment":
114
+ # Merge environment sub-keys from both scanners
115
+ if isinstance(section_data, dict):
116
+ for key, value in section_data.items():
117
+ if key != "_source":
118
+ environment_parts[key] = value
119
+ else:
120
+ # Non-environment sections: direct assignment (last scanner wins,
121
+ # but each section should have exactly one owner)
122
+ combined[section_name] = section_data
123
+
124
+ # Reassemble environment section if we got any parts
125
+ if environment_parts:
126
+ combined["environment"] = {
127
+ "_source": "scanner:environment+tools",
128
+ **environment_parts,
129
+ }
130
+
131
+ return combined
132
+
133
+
134
+ def _merge_environment_section(
135
+ result: Dict[str, Any],
136
+ scan_sections: Dict[str, Any],
137
+ ) -> None:
138
+ """Merge the `environment` section with sub-key level ownership.
139
+
140
+ Two scanners contribute to the `environment` section:
141
+ - `tools` scanner owns: tools, tool_preferences
142
+ - `environment` scanner owns: os, runtimes, env_files
143
+
144
+ Each scanner's sub-keys replace their owned portion; the other scanner's
145
+ sub-keys are preserved. The `_source` field gets a combined tag.
146
+
147
+ Args:
148
+ result: The result dict being built (mutated in place).
149
+ scan_sections: Combined sections from all scanners.
150
+ """
151
+ if "environment" not in scan_sections:
152
+ return
153
+
154
+ scan_env = scan_sections["environment"]
155
+
156
+ if "environment" not in result:
157
+ result["environment"] = {}
158
+
159
+ env = result["environment"]
160
+
161
+ # Replace each sub-key based on ownership
162
+ for subkey in ENVIRONMENT_SUBKEY_OWNERS:
163
+ if subkey in scan_env:
164
+ env[subkey] = copy.deepcopy(scan_env[subkey])
165
+
166
+ # Set combined _source tag
167
+ env["_source"] = "scanner:environment+tools"
168
+
169
+
170
+ def _merge_sections(
171
+ existing: Dict[str, Any],
172
+ scan_sections: Dict[str, Any],
173
+ ) -> Dict[str, Any]:
174
+ """Merge scanner results with existing project-context sections.
175
+
176
+ Applies the ownership rules to produce the final merged sections dict.
177
+ Called with existing={} in normal scan runs (display-only path).
178
+
179
+ Args:
180
+ existing: Current sections (may be empty when called from scan).
181
+ scan_sections: Combined sections from all scanners.
182
+
183
+ Returns:
184
+ Merged sections dict. The merge is deterministic: same inputs always
185
+ produce the same output.
186
+ """
187
+ result = copy.deepcopy(existing)
188
+
189
+ # --- Rule 1: Scanner-owned top-level sections -> full replace ---
190
+ for section_name in SCANNER_OWNED_TOP_LEVEL:
191
+ if section_name in scan_sections:
192
+ result[section_name] = copy.deepcopy(scan_sections[section_name])
193
+
194
+ # --- Sub-section level ownership for `environment` ---
195
+ _merge_environment_section(result, scan_sections)
196
+
197
+ # --- Rule 2: Agent-enriched sections -> never touch ---
198
+ # These are already in `result` from the deepcopy of `existing`.
199
+ # (No action needed -- they are preserved by the deepcopy.)
200
+
201
+ # --- Rule 3: Mixed sections -> selective update ---
202
+ for section_name, scanner_fields in MIXED_SECTION_SCANNER_FIELDS.items():
203
+ if section_name in scan_sections:
204
+ scan_data = scan_sections[section_name]
205
+ if section_name not in result:
206
+ result[section_name] = {}
207
+ # Only update scanner-owned fields; preserve agent fields
208
+ for field_name in scanner_fields:
209
+ if field_name in scan_data:
210
+ result[section_name][field_name] = copy.deepcopy(
211
+ scan_data[field_name]
212
+ )
213
+
214
+ # --- Rule 5: Unknown/user-custom sections -> preserve ---
215
+ # Any section in `existing` not covered above is preserved by the deepcopy.
216
+ # We do NOT add new unknown sections from scan_sections.
217
+
218
+ return result
219
+
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # ScanOutput dataclass
223
+ # ---------------------------------------------------------------------------
224
+
38
225
  @dataclass(frozen=True)
39
226
  class ScanOutput:
40
227
  """Aggregated output from all scanners.
@@ -187,11 +374,9 @@ class ScanOrchestrator:
187
374
  scan_sections = collect_scanner_sections(scanner_results)
188
375
 
189
376
  # Merge with empty existing context (no JSON persistence)
190
- section_owners = self.registry.get_section_owners()
191
- merged_sections = merge_context(
377
+ merged_sections = _merge_sections(
192
378
  existing={},
193
379
  scan_sections=scan_sections,
194
- section_owners=section_owners,
195
380
  )
196
381
 
197
382
  # Determine which sections were updated vs preserved
@@ -210,7 +395,6 @@ class ScanOrchestrator:
210
395
  self._cross_populate_monorepo(merged_sections)
211
396
 
212
397
  # Remove empty {} placeholders for agent-enriched and mixed sections
213
- from tools.scan.merge import MIXED_SECTION_SCANNER_FIELDS
214
398
  remove_if_empty = (
215
399
  AGENT_ENRICHED_SECTIONS
216
400
  | frozenset(MIXED_SECTION_SCANNER_FIELDS.keys())
@@ -23,8 +23,7 @@ from unittest.mock import patch
23
23
  import pytest
24
24
 
25
25
  from tools.scan.config import ScanConfig
26
- from tools.scan.merge import AGENT_ENRICHED_SECTIONS
27
- from tools.scan.orchestrator import ScanOrchestrator, ScanOutput
26
+ from tools.scan.orchestrator import AGENT_ENRICHED_SECTIONS, ScanOrchestrator, ScanOutput
28
27
  from tools.scan.registry import ScannerRegistry
29
28
  from tools.scan.scanners.base import BaseScanner, ScanResult
30
29
  from tools.scan.tests.conftest import create_git_dir