@jaguilar87/gaia 5.0.8 → 5.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +11 -0
- package/bin/README.md +6 -1
- package/bin/cli/approvals.py +341 -238
- package/bin/cli/brief.py +13 -0
- package/bin/cli/doctor.py +1 -1
- package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-ops/hooks/adapters/claude_code.py +19 -85
- package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
- package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
- package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
- package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
- package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
- package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +24 -1
- package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
- package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
- package/dist/gaia-ops/hooks/post_compact.py +1 -0
- package/dist/gaia-ops/hooks/pre_compact.py +1 -0
- package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
- package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +27 -7
- package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +11 -6
- package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
- package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -28
- package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
- package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +10 -5
- package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
- package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +20 -6
- package/dist/gaia-ops/skills/subagent-request-approval/reference.md +23 -15
- package/dist/gaia-ops/tools/migration/README.md +10 -12
- package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
- package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
- package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-security/hooks/adapters/claude_code.py +19 -85
- package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
- package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
- package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
- package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
- package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
- package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +24 -1
- package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
- package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
- package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
- package/gaia/approvals/store.py +87 -9
- package/gaia/store/schema.sql +38 -1
- package/gaia/store/writer.py +400 -0
- package/hooks/adapters/claude_code.py +19 -85
- package/hooks/elicitation_result.py +20 -75
- package/hooks/modules/context/context_injector.py +23 -7
- package/hooks/modules/events/event_writer.py +63 -96
- package/hooks/modules/security/__init__.py +0 -2
- package/hooks/modules/security/approval_cleanup.py +238 -69
- package/hooks/modules/security/approval_grants.py +506 -1103
- package/hooks/modules/security/mutative_verbs.py +24 -1
- package/hooks/modules/session/pending_scanner.py +150 -90
- package/hooks/modules/session/session_manifest.py +257 -28
- package/hooks/post_compact.py +1 -0
- package/hooks/pre_compact.py +1 -0
- package/hooks/user_prompt_submit.py +20 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/bootstrap_database.sh +66 -17
- package/scripts/migrations/README.md +26 -14
- package/scripts/migrations/schema.checksum +2 -2
- package/scripts/migrations/v18_to_v19.sql +36 -0
- package/scripts/migrations/v19_to_v20.sql +20 -0
- package/skills/agent-approval-protocol/SKILL.md +27 -7
- package/skills/agent-approval-protocol/reference.md +11 -6
- package/skills/gaia-patterns/reference.md +2 -2
- package/skills/orchestrator-present-approval/SKILL.md +69 -28
- package/skills/orchestrator-present-approval/reference.md +16 -3
- package/skills/orchestrator-present-approval/template.md +10 -5
- package/skills/pending-approvals/SKILL.md +16 -11
- package/skills/subagent-request-approval/SKILL.md +20 -6
- package/skills/subagent-request-approval/reference.md +23 -15
- package/tools/migration/README.md +10 -12
- package/tools/scan/orchestrator.py +194 -10
- package/tools/scan/tests/test_integration.py +1 -2
- package/bin/cli/plans.py +0 -517
- package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
- package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
- package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
- package/dist/gaia-ops/tools/scan/merge.py +0 -213
- package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
- package/tools/context/deep_merge.py +0 -159
- package/tools/migration/migrate_04_harness_events.py +0 -132
- package/tools/migration/migrate_04_harness_events.sh +0 -23
- package/tools/scan/merge.py +0 -213
- package/tools/scan/tests/test_merge.py +0 -269
|
@@ -37,7 +37,7 @@ report "rejected" when nothing actually changed.
|
|
|
37
37
|
| `gaia approvals list` | DB grants + filesystem pendings | `cmd_list` (mixed) |
|
|
38
38
|
| `gaia approvals reject NONCE` | filesystem only | `reject_pending` in `hooks/modules/security/approval_grants.py` |
|
|
39
39
|
| `gaia approvals reject-all` | filesystem only | loops `reject_pending` |
|
|
40
|
-
| `gaia approvals clean` | filesystem
|
|
40
|
+
| `gaia approvals clean` | DB (cross-session stale pendings) + filesystem | `cmd_clean` in `bin/cli/approvals.py`: calls `store.list_pending(all_sessions=True)`, transitions every pending older than `DEFAULT_PENDING_TTL_MINUTES` (24 h) to `revoked` via `store.revoke()`, then calls `cleanup_expired_grants` for filesystem files |
|
|
41
41
|
|
|
42
42
|
The practical consequence: `revoke` is the DB-aware single-id verb; `reject` and
|
|
43
43
|
`reject-all` only touch the legacy filesystem queue. If you need to mark a DB
|
|
@@ -105,15 +105,19 @@ Offer bulk cleanup when the user says "limpia todos los pendings", "borra los
|
|
|
105
105
|
pendientes", or when SessionStart surfaces 5+ orphaned pendings the user has
|
|
106
106
|
not engaged with.
|
|
107
107
|
|
|
108
|
-
- `gaia approvals reject-all` -- bulk reject across the **filesystem** queue.
|
|
109
|
-
Returns "0 rejected" when the queue is empty.
|
|
110
|
-
- `gaia approvals clean` --
|
|
108
|
+
- `gaia approvals reject-all` -- bulk soft-reject across the **filesystem** queue.
|
|
109
|
+
Returns "0 rejected" when the queue is empty. Does not touch DB rows.
|
|
110
|
+
- `gaia approvals clean` -- the first-class cross-session bulk drain for stale
|
|
111
|
+
DB pendings: `cmd_clean` calls `store.list_pending(all_sessions=True)` and
|
|
112
|
+
transitions every pending older than 24 h (`DEFAULT_PENDING_TTL_MINUTES`) to
|
|
113
|
+
`revoked` via `store.revoke()`, then runs `cleanup_expired_grants` to clean
|
|
114
|
+
expired filesystem grant files. Runs without a T3 prompt (consent-reducing,
|
|
115
|
+
listed in `CONSENT_REDUCING_SUBCOMMAND_EXCEPTIONS`). Use this when
|
|
116
|
+
`gaia approvals pending --all-sessions` shows a backlog of stale rows.
|
|
111
117
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
done" after `reject-all` if the DB queue still has pending rows -- check
|
|
116
|
-
`gaia approvals pending --all-sessions` to confirm.
|
|
118
|
+
Do not report "bulk cleanup done" after `reject-all` alone -- it only clears
|
|
119
|
+
the filesystem queue. Run `gaia approvals clean` to drain the DB backlog, then
|
|
120
|
+
confirm with `gaia approvals pending --all-sessions`.
|
|
117
121
|
|
|
118
122
|
Do not offer `reject-all` when there are active same-session pendings the user
|
|
119
123
|
may still want to approve.
|
|
@@ -123,8 +127,9 @@ may still want to approve.
|
|
|
123
127
|
- Approving without showing the exact COMANDO -- the user consents on the
|
|
124
128
|
verbatim string, not a summary. The full presentation discipline lives in
|
|
125
129
|
`orchestrator-present-approval`; this skill does not restate it.
|
|
126
|
-
- Treating `gaia approvals reject-all` as a
|
|
127
|
-
filesystem queue only
|
|
130
|
+
- Treating `gaia approvals reject-all` as a full cleanup -- it operates on the
|
|
131
|
+
filesystem queue only; DB rows survive the call. Use `gaia approvals clean`
|
|
132
|
+
to drain the DB backlog.
|
|
128
133
|
- Reporting "rechazado" without verifying the store -- `revoke` returns
|
|
129
134
|
`not_found` for filesystem-only pendings; the inverse happens for `reject` on
|
|
130
135
|
DB rows. Pick the verb by store, or be ready to fall back.
|
|
@@ -44,9 +44,20 @@ Add an `approval_request` to your `agent_contract_handoff`, copying the hook's f
|
|
|
44
44
|
|
|
45
45
|
The `approval_request` schema is canonical in `agent-approval-protocol` — relay the sealed_payload fields verbatim (the hook built them) and add `verification` (your own success criteria) + `approval_id` (the literal token from the denial). See `agent-approval-protocol/SKILL.md` for the full field list and types.
|
|
46
46
|
|
|
47
|
-
The `approval_id` is the `P-{...}` token
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
The `approval_id` is the `P-{...}` token tying this request to its `REQUESTED`
|
|
48
|
+
row in the DB. Fields written only in prose are invisible to the presentation --
|
|
49
|
+
the user would approve blind.
|
|
50
|
+
|
|
51
|
+
**What your relay is for: same-turn immediacy.** Your `approval_request` is the
|
|
52
|
+
orchestrator's source only for the CURRENT turn. The orchestrator's primary
|
|
53
|
+
source is the per-turn `[PENDING-APPROVALS-VERIFIED]` block injected at
|
|
54
|
+
`UserPromptSubmit`, which carries every pending that has survived >= 1 turn,
|
|
55
|
+
already DB-read and fingerprint-verified. But that block was built before you
|
|
56
|
+
ran this turn, so a pending you mint now is not in it yet -- the orchestrator
|
|
57
|
+
presents it from your relay until the next turn's block picks it up. You emit the
|
|
58
|
+
same fields either way; nothing on your side changes. The orchestrator never
|
|
59
|
+
dispatches a subagent to verify or derive your request -- integrity is enforced
|
|
60
|
+
at grant activation, not at presentation.
|
|
50
61
|
|
|
51
62
|
## Non-negotiable rules
|
|
52
63
|
|
|
@@ -105,9 +116,12 @@ your side. What changed underneath: the minted `approval_id` is now
|
|
|
105
116
|
(`derive_command_set_id` -> `P-<first 32 hex of sha256(canonical commands)>`),
|
|
106
117
|
not a random uuid4. You do not compute or emit it (you cannot hash reliably, and
|
|
107
118
|
you have nothing to attempt yet); the value is purely internal. The reason it
|
|
108
|
-
matters: the
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
matters: the content-derived id is reproducible without a uuid4 that could be
|
|
120
|
+
lost across sessions. Once the minted pending has survived a turn, the
|
|
121
|
+
orchestrator reads it -- with all N commands -- straight from the injected
|
|
122
|
+
`[PENDING-APPROVALS-VERIFIED]` block (no DB search, no derive-dispatch); for the
|
|
123
|
+
turn you mint it in, the orchestrator presents from the `command_set` in your
|
|
124
|
+
relay. Your contract stays the same -- `command_set` of `{command, rationale}`
|
|
111
125
|
items, no `approval_id`.
|
|
112
126
|
|
|
113
127
|
On the user's approval, that one pending activates into a single `COMMAND_SET`
|
|
@@ -16,8 +16,12 @@ payload from the intercepted command and calls
|
|
|
16
16
|
3. writes the `REQUESTED` event to the DB.
|
|
17
17
|
|
|
18
18
|
The block message you receive (`[T3_BLOCKED] ...`) ends with `approval_id: P-{...}`.
|
|
19
|
-
You relay that token plus the operation details
|
|
20
|
-
|
|
19
|
+
You relay that token plus the operation details. For the current turn the
|
|
20
|
+
orchestrator presents from your relay; once the pending survives a turn it
|
|
21
|
+
appears in the per-turn `[PENDING-APPROVALS-VERIFIED]` block, already
|
|
22
|
+
fingerprint-verified by the hook. Payload integrity is enforced at grant
|
|
23
|
+
activation (`verify_fingerprint`), so the orchestrator never dispatches to
|
|
24
|
+
verify or derive your request.
|
|
21
25
|
|
|
22
26
|
Source: `bash_validator._build_sealed_payload()`, the subagent block path in
|
|
23
27
|
`bash_validator._validate_single_command()`; `gaia/approvals/store.py`
|
|
@@ -99,12 +103,14 @@ singular hook-block path (which mints `P-{uuid4hex}`), the intake derives the id
|
|
|
99
103
|
from the command_set content via `gaia.approvals.store.derive_command_set_id()`:
|
|
100
104
|
`P-<first 32 hex of sha256(canonical(post-filter command strings))>`. It then
|
|
101
105
|
passes that id to `insert_requested(..., approval_id=...)` as the pending row id.
|
|
102
|
-
The point is reproducibility without a
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
The point is reproducibility without a fragile uuid4: a uuid4 minted at
|
|
107
|
+
SubagentStop could not be recovered by the parent (Claude Code #5812), but a
|
|
108
|
+
content-derived id needs no recovery -- the same canonicalization
|
|
109
|
+
(`chain.canonical_payload`) and mutative filter always yield the same id. Once
|
|
110
|
+
the minted pending survives a turn, the orchestrator reads that id (and all N
|
|
111
|
+
commands) straight from the injected `[PENDING-APPROVALS-VERIFIED]` block -- no
|
|
112
|
+
DB lookup and no `gaia approvals derive-id` dispatch; for the mint turn it
|
|
113
|
+
presents from the `command_set` in your relay. The id is
|
|
108
114
|
**order-sensitive** (the consume side matches positionally) and **content-only**
|
|
109
115
|
(rationale/session/agent are not folded in, so both sides agree from the command
|
|
110
116
|
list alone). Idempotency follows the existing fingerprint dedup: two identical
|
|
@@ -154,15 +160,17 @@ single-use within the 60-minute window.
|
|
|
154
160
|
Always `plan_status: "APPROVAL_REQUEST"`. The presence of `approval_id` tells the
|
|
155
161
|
orchestrator which path:
|
|
156
162
|
|
|
157
|
-
- **With `approval_id`** -- the hook blocked a single command; orchestrator
|
|
158
|
-
|
|
159
|
-
|
|
163
|
+
- **With `approval_id`** -- the hook blocked a single command; the orchestrator
|
|
164
|
+
presents from your relay (current turn) or the injected
|
|
165
|
+
`[PENDING-APPROVALS-VERIFIED]` block (later turns), and the single-use semantic
|
|
166
|
+
grant activates on user approval (fingerprint checked at activation).
|
|
160
167
|
- **Without `approval_id`, with a `command_set` of >= 2 items** -- plan-first
|
|
161
168
|
batch. The SubagentStop intake processor mints ONE pending `COMMAND_SET` with a
|
|
162
|
-
**content-derived** id (`derive_command_set_id`)
|
|
163
|
-
|
|
164
|
-
(no
|
|
165
|
-
|
|
169
|
+
**content-derived** id (`derive_command_set_id`). The orchestrator reads that
|
|
170
|
+
id and the N commands from the injected `[PENDING-APPROVALS-VERIFIED]` block
|
|
171
|
+
(no derive-dispatch), or, for the mint turn, from the `command_set` in your
|
|
172
|
+
relay, then presents the single approval (N commands, one nonce). See
|
|
173
|
+
"Batch / COMMAND_SET -- wired" above.
|
|
166
174
|
- **Without `approval_id` and without a multi-item `command_set`** -- plan-first
|
|
167
175
|
single (you are presenting one T3 plan before attempting); the orchestrator
|
|
168
176
|
gates on user consent before any execution.
|
|
@@ -19,7 +19,12 @@ desde el filesystem hacia `~/.gaia/gaia.db`.
|
|
|
19
19
|
| 01 | Episodes | `.claude/project-context/episodic-memory/episodes.jsonl` | `episodes` (+`episodes_fts`) |
|
|
20
20
|
| 02 | Memory | `~/.claude/projects/-home-jorge-ws-me/memory/*.md` | `memory` (+`memory_fts`) |
|
|
21
21
|
| 03 | Context contracts | `.claude/project-context/project-context.json` | `context_contracts` |
|
|
22
|
-
| 04 | Harness events |
|
|
22
|
+
| 04 | Harness events | ~~`.claude/events/events.jsonl`~~ (ELIMINADO) | `harness_events` |
|
|
23
|
+
|
|
24
|
+
> **Dominio 04 completado y eliminado.** `events.jsonl` y su archivo `.lock` fueron
|
|
25
|
+
> retirados. El hook `event_writer` escribe directamente a `harness_events` en la DB.
|
|
26
|
+
> El script `migrate_04_harness_events.py` y su wrapper `.sh` fueron borrados una vez
|
|
27
|
+
> completada la absorción. Los datos vivos se leen desde `harness_events` en `~/.gaia/gaia.db`.
|
|
23
28
|
|
|
24
29
|
Cada dominio tiene 2 archivos:
|
|
25
30
|
|
|
@@ -37,8 +42,8 @@ bootstrap.sh # crea/inicializa ~/.gaia/gaia.db con s
|
|
|
37
42
|
./migrate_01_episodes.sh # ~50-80 MB de SQL, batch 80
|
|
38
43
|
./migrate_02_memory.sh # 28 .md (MEMORY.md excluido)
|
|
39
44
|
./migrate_03_context_contracts.sh # 12 secciones
|
|
40
|
-
|
|
41
|
-
./validate.sh #
|
|
45
|
+
# migrate_04_harness_events.sh ELIMINADO — dominio 04 completado; eventos en DB-canonical
|
|
46
|
+
./validate.sh # aserciones read-only (V4 eliminada junto con 04)
|
|
42
47
|
```
|
|
43
48
|
|
|
44
49
|
Cada script imprime `[migrate_NN] OK` al terminar.
|
|
@@ -50,14 +55,7 @@ Cada script imprime `[migrate_NN] OK` al terminar.
|
|
|
50
55
|
| 01 episodes | `INSERT OR IGNORE` (PK = `episode_id`) | sí |
|
|
51
56
|
| 02 memory | `INSERT OR IGNORE` (PK = `(project, name)`) | sí |
|
|
52
57
|
| 03 context_contracts | `INSERT OR IGNORE` (PK = `(project, section_name)`) | sí |
|
|
53
|
-
| 04 harness_events |
|
|
54
|
-
|
|
55
|
-
Para re-ejecutar 04 limpiamente:
|
|
56
|
-
|
|
57
|
-
```
|
|
58
|
-
sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
|
|
59
|
-
./migrate_04_harness_events.sh
|
|
60
|
-
```
|
|
58
|
+
| 04 harness_events | N/A — tool eliminado; escritura vía `event_writer` DB-direct | N/A |
|
|
61
59
|
|
|
62
60
|
## Validación
|
|
63
61
|
|
|
@@ -68,7 +66,7 @@ sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
|
|
|
68
66
|
| V1 | `COUNT(*) FROM episodes` == líneas no vacías de `episodes.jsonl` |
|
|
69
67
|
| V2 | `COUNT(*) FROM memory` == archivos `.md` (excluyendo `MEMORY.md`) |
|
|
70
68
|
| V3 | `COUNT(*) FROM context_contracts` == 12 |
|
|
71
|
-
| V4 |
|
|
69
|
+
| ~~V4~~ | ~~`COUNT(*) FROM harness_events` == líneas no vacías de `events.jsonl`~~ — eliminado junto con el dominio 04 |
|
|
72
70
|
| V5 | `COUNT(*) FROM episodes_fts` == `COUNT(*) FROM episodes` (FTS sync) |
|
|
73
71
|
|
|
74
72
|
Exit code: 0 si todas pasan, 1 si alguna falla.
|
|
@@ -11,23 +11,26 @@ Pipeline:
|
|
|
11
11
|
3. Collect and combine scanner sections (handling environment sub-keys)
|
|
12
12
|
4. Cross-populate derived fields
|
|
13
13
|
5. Return ScanOutput
|
|
14
|
+
|
|
15
|
+
Section ownership rules (inlined from the retired tools/scan/merge.py):
|
|
16
|
+
Rule 1: Scanner-owned sections -> full replace
|
|
17
|
+
Rule 2: Agent-enriched sections -> never touch
|
|
18
|
+
Rule 3: Mixed sections -> selective update at sub-key level
|
|
19
|
+
Rule 4: Unknown/user-custom sections -> preserve
|
|
20
|
+
Rule 5: Metadata -> always update
|
|
14
21
|
"""
|
|
15
22
|
|
|
23
|
+
import copy
|
|
16
24
|
import logging
|
|
17
25
|
import time
|
|
18
26
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
19
27
|
from dataclasses import dataclass, field
|
|
20
28
|
from datetime import datetime, timezone
|
|
21
29
|
from pathlib import Path
|
|
22
|
-
from typing import Any, Dict, List, Optional
|
|
30
|
+
from typing import Any, Dict, List, Optional, Set
|
|
23
31
|
|
|
24
32
|
from tools.scan import __version__ as scanner_package_version
|
|
25
33
|
from tools.scan.config import ScanConfig
|
|
26
|
-
from tools.scan.merge import (
|
|
27
|
-
AGENT_ENRICHED_SECTIONS,
|
|
28
|
-
collect_scanner_sections,
|
|
29
|
-
merge_context,
|
|
30
|
-
)
|
|
31
34
|
from tools.scan.registry import ScannerRegistry
|
|
32
35
|
from tools.scan.scanners.base import BaseScanner, ScanResult
|
|
33
36
|
from tools.scan.workspace import WorkspaceInfo, detect_workspace_type
|
|
@@ -35,6 +38,190 @@ from tools.scan.workspace import WorkspaceInfo, detect_workspace_type
|
|
|
35
38
|
logger = logging.getLogger(__name__)
|
|
36
39
|
|
|
37
40
|
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Section ownership constants (Rule 1 / Rule 2 / Rule 3)
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
# Sections fully owned by scanners -- replaced entirely on each scan (Rule 1)
|
|
46
|
+
# Top-level sections only; sub-key ownership handled separately
|
|
47
|
+
SCANNER_OWNED_TOP_LEVEL: Dict[str, str] = {
|
|
48
|
+
"project_identity": "stack",
|
|
49
|
+
"stack": "stack",
|
|
50
|
+
"git": "git",
|
|
51
|
+
"infrastructure": "infrastructure",
|
|
52
|
+
"orchestration": "orchestration",
|
|
53
|
+
# "environment" is NOT listed here because it has sub-key ownership
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Sub-key ownership within the `environment` section (Rule 4 / sub-section)
|
|
57
|
+
# Maps environment sub-key -> owning scanner name
|
|
58
|
+
ENVIRONMENT_SUBKEY_OWNERS: Dict[str, str] = {
|
|
59
|
+
"tools": "tools",
|
|
60
|
+
"tool_preferences": "tools",
|
|
61
|
+
"os": "environment",
|
|
62
|
+
"runtimes": "environment",
|
|
63
|
+
"env_files": "environment",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# Agent-enriched sections -- never modified by scanners (Rule 2)
|
|
67
|
+
AGENT_ENRICHED_SECTIONS: frozenset = frozenset([
|
|
68
|
+
"operational_guidelines",
|
|
69
|
+
"cluster_details",
|
|
70
|
+
"infrastructure_topology",
|
|
71
|
+
"monitoring_observability",
|
|
72
|
+
"architecture_overview",
|
|
73
|
+
"gcp_services",
|
|
74
|
+
"workload_identity",
|
|
75
|
+
])
|
|
76
|
+
|
|
77
|
+
# Mixed sections with partial scanner ownership (Rule 3)
|
|
78
|
+
# Maps section_name -> set of scanner-owned field names
|
|
79
|
+
MIXED_SECTION_SCANNER_FIELDS: Dict[str, Set[str]] = {
|
|
80
|
+
"terraform_infrastructure": {"layout"},
|
|
81
|
+
"gitops_configuration": {"repository"},
|
|
82
|
+
"application_services": {"base_path", "services"},
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Section collection and merge helpers
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
def collect_scanner_sections(
|
|
91
|
+
scanner_results: Dict[str, Any],
|
|
92
|
+
) -> Dict[str, Any]:
|
|
93
|
+
"""Collect and combine sections from all scanner results.
|
|
94
|
+
|
|
95
|
+
Handles the environment section specially: both `tools` and `environment`
|
|
96
|
+
scanners produce sub-keys under `environment`, so their outputs are
|
|
97
|
+
combined into a single `environment` section.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
scanner_results: Mapping of scanner_name -> ScanResult (must have
|
|
101
|
+
a `sections` attribute that is a dict).
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Combined sections dict from all scanners.
|
|
105
|
+
"""
|
|
106
|
+
combined: Dict[str, Any] = {}
|
|
107
|
+
environment_parts: Dict[str, Any] = {}
|
|
108
|
+
|
|
109
|
+
for _scanner_name, scan_result in scanner_results.items():
|
|
110
|
+
sections = scan_result.sections if hasattr(scan_result, "sections") else {}
|
|
111
|
+
|
|
112
|
+
for section_name, section_data in sections.items():
|
|
113
|
+
if section_name == "environment":
|
|
114
|
+
# Merge environment sub-keys from both scanners
|
|
115
|
+
if isinstance(section_data, dict):
|
|
116
|
+
for key, value in section_data.items():
|
|
117
|
+
if key != "_source":
|
|
118
|
+
environment_parts[key] = value
|
|
119
|
+
else:
|
|
120
|
+
# Non-environment sections: direct assignment (last scanner wins,
|
|
121
|
+
# but each section should have exactly one owner)
|
|
122
|
+
combined[section_name] = section_data
|
|
123
|
+
|
|
124
|
+
# Reassemble environment section if we got any parts
|
|
125
|
+
if environment_parts:
|
|
126
|
+
combined["environment"] = {
|
|
127
|
+
"_source": "scanner:environment+tools",
|
|
128
|
+
**environment_parts,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return combined
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _merge_environment_section(
|
|
135
|
+
result: Dict[str, Any],
|
|
136
|
+
scan_sections: Dict[str, Any],
|
|
137
|
+
) -> None:
|
|
138
|
+
"""Merge the `environment` section with sub-key level ownership.
|
|
139
|
+
|
|
140
|
+
Two scanners contribute to the `environment` section:
|
|
141
|
+
- `tools` scanner owns: tools, tool_preferences
|
|
142
|
+
- `environment` scanner owns: os, runtimes, env_files
|
|
143
|
+
|
|
144
|
+
Each scanner's sub-keys replace their owned portion; the other scanner's
|
|
145
|
+
sub-keys are preserved. The `_source` field gets a combined tag.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
result: The result dict being built (mutated in place).
|
|
149
|
+
scan_sections: Combined sections from all scanners.
|
|
150
|
+
"""
|
|
151
|
+
if "environment" not in scan_sections:
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
scan_env = scan_sections["environment"]
|
|
155
|
+
|
|
156
|
+
if "environment" not in result:
|
|
157
|
+
result["environment"] = {}
|
|
158
|
+
|
|
159
|
+
env = result["environment"]
|
|
160
|
+
|
|
161
|
+
# Replace each sub-key based on ownership
|
|
162
|
+
for subkey in ENVIRONMENT_SUBKEY_OWNERS:
|
|
163
|
+
if subkey in scan_env:
|
|
164
|
+
env[subkey] = copy.deepcopy(scan_env[subkey])
|
|
165
|
+
|
|
166
|
+
# Set combined _source tag
|
|
167
|
+
env["_source"] = "scanner:environment+tools"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _merge_sections(
|
|
171
|
+
existing: Dict[str, Any],
|
|
172
|
+
scan_sections: Dict[str, Any],
|
|
173
|
+
) -> Dict[str, Any]:
|
|
174
|
+
"""Merge scanner results with existing project-context sections.
|
|
175
|
+
|
|
176
|
+
Applies the ownership rules to produce the final merged sections dict.
|
|
177
|
+
Called with existing={} in normal scan runs (display-only path).
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
existing: Current sections (may be empty when called from scan).
|
|
181
|
+
scan_sections: Combined sections from all scanners.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Merged sections dict. The merge is deterministic: same inputs always
|
|
185
|
+
produce the same output.
|
|
186
|
+
"""
|
|
187
|
+
result = copy.deepcopy(existing)
|
|
188
|
+
|
|
189
|
+
# --- Rule 1: Scanner-owned top-level sections -> full replace ---
|
|
190
|
+
for section_name in SCANNER_OWNED_TOP_LEVEL:
|
|
191
|
+
if section_name in scan_sections:
|
|
192
|
+
result[section_name] = copy.deepcopy(scan_sections[section_name])
|
|
193
|
+
|
|
194
|
+
# --- Sub-section level ownership for `environment` ---
|
|
195
|
+
_merge_environment_section(result, scan_sections)
|
|
196
|
+
|
|
197
|
+
# --- Rule 2: Agent-enriched sections -> never touch ---
|
|
198
|
+
# These are already in `result` from the deepcopy of `existing`.
|
|
199
|
+
# (No action needed -- they are preserved by the deepcopy.)
|
|
200
|
+
|
|
201
|
+
# --- Rule 3: Mixed sections -> selective update ---
|
|
202
|
+
for section_name, scanner_fields in MIXED_SECTION_SCANNER_FIELDS.items():
|
|
203
|
+
if section_name in scan_sections:
|
|
204
|
+
scan_data = scan_sections[section_name]
|
|
205
|
+
if section_name not in result:
|
|
206
|
+
result[section_name] = {}
|
|
207
|
+
# Only update scanner-owned fields; preserve agent fields
|
|
208
|
+
for field_name in scanner_fields:
|
|
209
|
+
if field_name in scan_data:
|
|
210
|
+
result[section_name][field_name] = copy.deepcopy(
|
|
211
|
+
scan_data[field_name]
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# --- Rule 5: Unknown/user-custom sections -> preserve ---
|
|
215
|
+
# Any section in `existing` not covered above is preserved by the deepcopy.
|
|
216
|
+
# We do NOT add new unknown sections from scan_sections.
|
|
217
|
+
|
|
218
|
+
return result
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
# ScanOutput dataclass
|
|
223
|
+
# ---------------------------------------------------------------------------
|
|
224
|
+
|
|
38
225
|
@dataclass(frozen=True)
|
|
39
226
|
class ScanOutput:
|
|
40
227
|
"""Aggregated output from all scanners.
|
|
@@ -187,11 +374,9 @@ class ScanOrchestrator:
|
|
|
187
374
|
scan_sections = collect_scanner_sections(scanner_results)
|
|
188
375
|
|
|
189
376
|
# Merge with empty existing context (no JSON persistence)
|
|
190
|
-
|
|
191
|
-
merged_sections = merge_context(
|
|
377
|
+
merged_sections = _merge_sections(
|
|
192
378
|
existing={},
|
|
193
379
|
scan_sections=scan_sections,
|
|
194
|
-
section_owners=section_owners,
|
|
195
380
|
)
|
|
196
381
|
|
|
197
382
|
# Determine which sections were updated vs preserved
|
|
@@ -210,7 +395,6 @@ class ScanOrchestrator:
|
|
|
210
395
|
self._cross_populate_monorepo(merged_sections)
|
|
211
396
|
|
|
212
397
|
# Remove empty {} placeholders for agent-enriched and mixed sections
|
|
213
|
-
from tools.scan.merge import MIXED_SECTION_SCANNER_FIELDS
|
|
214
398
|
remove_if_empty = (
|
|
215
399
|
AGENT_ENRICHED_SECTIONS
|
|
216
400
|
| frozenset(MIXED_SECTION_SCANNER_FIELDS.keys())
|
|
@@ -23,8 +23,7 @@ from unittest.mock import patch
|
|
|
23
23
|
import pytest
|
|
24
24
|
|
|
25
25
|
from tools.scan.config import ScanConfig
|
|
26
|
-
from tools.scan.
|
|
27
|
-
from tools.scan.orchestrator import ScanOrchestrator, ScanOutput
|
|
26
|
+
from tools.scan.orchestrator import AGENT_ENRICHED_SECTIONS, ScanOrchestrator, ScanOutput
|
|
28
27
|
from tools.scan.registry import ScannerRegistry
|
|
29
28
|
from tools.scan.scanners.base import BaseScanner, ScanResult
|
|
30
29
|
from tools.scan.tests.conftest import create_git_dir
|