@jaguilar87/gaia 5.0.2 → 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/ARCHITECTURE.md +0 -1
- package/CHANGELOG.md +54 -0
- package/bin/cli/approvals.py +23 -21
- package/config/surface-routing.json +0 -1
- package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-ops/config/surface-routing.json +0 -1
- package/dist/gaia-ops/hooks/modules/agents/contract_validator.py +18 -0
- package/dist/gaia-ops/hooks/modules/agents/handoff_persister.py +212 -2
- package/dist/gaia-ops/hooks/modules/agents/response_contract.py +26 -0
- package/dist/gaia-ops/hooks/modules/agents/transcript_reader.py +15 -0
- package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -5
- package/dist/gaia-ops/hooks/modules/security/approval_grants.py +122 -19
- package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +99 -7
- package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +125 -24
- package/dist/gaia-ops/skills/agent-contract-handoff/SKILL.md +3 -0
- package/dist/gaia-ops/skills/agent-response/SKILL.md +4 -2
- package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
- package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +20 -5
- package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +32 -15
- package/dist/gaia-ops/skills/security-tiers/SKILL.md +5 -1
- package/dist/gaia-ops/skills/security-tiers/reference.md +3 -1
- package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +43 -6
- package/dist/gaia-ops/skills/subagent-request-approval/reference.md +66 -16
- package/dist/gaia-ops/tools/context/README.md +1 -1
- package/dist/gaia-ops/tools/gaia_simulator/extractor.py +0 -1
- package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-security/hooks/modules/agents/contract_validator.py +18 -0
- package/dist/gaia-security/hooks/modules/agents/handoff_persister.py +212 -2
- package/dist/gaia-security/hooks/modules/agents/response_contract.py +26 -0
- package/dist/gaia-security/hooks/modules/agents/transcript_reader.py +15 -0
- package/dist/gaia-security/hooks/modules/security/__init__.py +0 -5
- package/dist/gaia-security/hooks/modules/security/approval_grants.py +122 -19
- package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +99 -7
- package/dist/gaia-security/hooks/modules/tools/bash_validator.py +125 -24
- package/gaia/state/transitions.py +4 -4
- package/gaia/store/writer.py +56 -0
- package/hooks/modules/README.md +2 -4
- package/hooks/modules/agents/contract_validator.py +18 -0
- package/hooks/modules/agents/handoff_persister.py +212 -2
- package/hooks/modules/agents/response_contract.py +26 -0
- package/hooks/modules/agents/transcript_reader.py +15 -0
- package/hooks/modules/security/__init__.py +0 -5
- package/hooks/modules/security/approval_grants.py +122 -19
- package/hooks/modules/security/mutative_verbs.py +99 -7
- package/hooks/modules/tools/bash_validator.py +125 -24
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/skills/agent-contract-handoff/SKILL.md +3 -0
- package/skills/agent-response/SKILL.md +4 -2
- package/skills/gaia-patterns/reference.md +2 -2
- package/skills/orchestrator-present-approval/SKILL.md +20 -5
- package/skills/orchestrator-present-approval/reference.md +32 -15
- package/skills/security-tiers/SKILL.md +5 -1
- package/skills/security-tiers/reference.md +3 -1
- package/skills/subagent-request-approval/SKILL.md +43 -6
- package/skills/subagent-request-approval/reference.md +66 -16
- package/tools/context/README.md +1 -1
- package/tools/gaia_simulator/extractor.py +0 -1
- package/dist/gaia-ops/hooks/modules/security/gitops_validator.py +0 -179
- package/dist/gaia-security/hooks/modules/security/gitops_validator.py +0 -179
- package/hooks/modules/security/gitops_validator.py +0 -179
|
@@ -16,7 +16,7 @@ The orchestrator loads this to interpret a returned `agent_contract_handoff` and
|
|
|
16
16
|
|
|
17
17
|
```
|
|
18
18
|
parse_contract(agent_output) -> read agent_status.plan_status
|
|
19
|
-
|- COMPLETE -> summarize key_outputs
|
|
19
|
+
|- COMPLETE -> relay user_facing_summary if present & N=1, else summarize key_outputs; surface verification, then close
|
|
20
20
|
|- APPROVAL_REQUEST -> split on approval_id (present: present-approval; absent: plan options)
|
|
21
21
|
|- NEEDS_INPUT -> AskUserQuestion, then SendMessage the answer
|
|
22
22
|
|- BLOCKED -> present open_gaps; new dispatch or accept the limitation
|
|
@@ -29,7 +29,7 @@ Before any branch runs, the contract must parse. A block that fails `parse_contr
|
|
|
29
29
|
|
|
30
30
|
| `plan_status` | Action |
|
|
31
31
|
|---|---|
|
|
32
|
-
| `COMPLETE` |
|
|
32
|
+
| `COMPLETE` | If `user_facing_summary` is present AND this is a single-agent turn (N=1), relay it near-verbatim -- adapt only to the user's language, do not re-synthesize -- because the subagent already wrote the human-shaped summary and re-summarizing its `key_outputs` only spends tokens to restate what it said. If the field is absent, or N>1 (multiple agents being consolidated), summarize `key_outputs` in 3-5 bullets as before. Either way, surface `verification.result` / `verification.details` -- that block is the proof the work landed, and relaying it is what lets the user trust the increment rather than take "done" on faith. Mention `cross_layer_impacts` and `open_gaps` when non-empty. |
|
|
33
33
|
| `APPROVAL_REQUEST` | Split on `approval_request.approval_id`: present -> load `Skill('orchestrator-present-approval')`; absent -> present the plan with options (execute / modify / cancel) and on execute/modify resume the SAME agent via `SendMessage`. It splits because a hook-issued `approval_id` carries a pending T3 grant that needs the structured consent flow, while a plan-first request only needs direction (`agent-approval-protocol`, combo decision 2). |
|
|
34
34
|
| `NEEDS_INPUT` | `AskUserQuestion` with the options in `next_action`, then `SendMessage` the answer back to resume. |
|
|
35
35
|
| `BLOCKED` | Present `open_gaps` to the user. If they give direction, dispatch a NEW agent addressing the blocker; if they accept the limitation, close the task as incomplete and move on. |
|
|
@@ -41,6 +41,8 @@ These ride alongside `plan_status` and carry signal the orchestrator loses if it
|
|
|
41
41
|
|
|
42
42
|
**`verification`** -- covered in COMPLETE above. It is required only on `COMPLETE` and its `result` must equal `"pass"` (`VERIFICATION_RESULT_MUST_BE_PASS`, `contract_validator.py`); surface `result` and `details` so the user sees the proof, never just the word "done."
|
|
43
43
|
|
|
44
|
+
**`user_facing_summary`** -- the one human-audience field (every other field is machine-audience for the orchestrator). On a single-agent `COMPLETE` it is what you relay to the user, near-verbatim and language-adapted, *instead of* re-synthesizing `key_outputs`; that is the whole point -- the subagent wrote the summary once, so re-summarizing duplicates work the user never sees value in. It is optional and additive: when absent, fall back to `key_outputs`; when multiple agents are in flight (N>1), ignore it and synthesize across them, because no single agent's summary speaks for the consolidated result.
|
|
45
|
+
|
|
44
46
|
**`memorialize_suggestions` / `memory_suggestions`** -- present each entry to the user before closing the turn and persist ONLY on consent. The orchestrator is the sole memory writer; subagents are blocked from curated writes by design so each entry enters the substrate as a named choice. For the curation mechanics -- how to triage, slug, and persist -- load `Skill('memory')` (combo decision 1: the HOW lives in `memory`).
|
|
45
47
|
|
|
46
48
|
**`ownership_assessment`** (in `consolidation_report`, enum `VALID_OWNERSHIP_ASSESSMENTS`) -- a ROUTING INPUT the orchestrator acts on silently, not a user-facing field. `owned_here` means the output is authoritative; `cross_surface_dependency` or `not_my_surface` means another dispatch may be needed to close the gap. Route on it; do not narrate it (combo decision 4).
|
|
@@ -29,7 +29,7 @@ SessionStart emits a one-shot `hookSpecificOutput.additionalContext` manifest (E
|
|
|
29
29
|
| Package | Files | Purpose |
|
|
30
30
|
|---------|-------|---------|
|
|
31
31
|
| `core/` | `hook_entry`, `paths`, `plugin_mode`, `plugin_setup`, `state`, `stdin` | Entry dispatch, path resolution, mode detection, shared state |
|
|
32
|
-
| `security/` | `blocked_commands`, `mutative_verbs`, `tiers`, `
|
|
32
|
+
| `security/` | `blocked_commands`, `mutative_verbs`, `tiers`, `command_semantics`, `approval_grants`, `approval_scopes`, `approval_cleanup`, `approval_constants`, `approval_messages`, `blocked_message_formatter`, `prompt_validator` | T3 gate, blocked commands, approval nonce lifecycle |
|
|
33
33
|
| `audit/` | `logger`, `metrics`, `event_detector`, `workflow_auditor`, `workflow_recorder` | Structured logging, metrics collection, workflow audit trail |
|
|
34
34
|
| `tools/` | `bash_validator`, `cloud_pipe_validator`, `shell_parser`, `task_validator`, `hook_response` | Command validation, pipe detection, shell parsing |
|
|
35
35
|
| `context/` | `context_injector`, `context_writer`, `context_freshness`, `contracts_loader`, `compact_context_builder`, `anchor_tracker` | Project-context injection, freshness checks, contract loading |
|
|
@@ -254,7 +254,7 @@ The hook invoker is `python3 <script>` rather than executing the script directly
|
|
|
254
254
|
| Category | Directory | What it tests |
|
|
255
255
|
|----------|-----------|---------------|
|
|
256
256
|
| Prompt regression | `tests/layer1_prompt_regression/` | Routing table, skill content rules, agent frontmatter, agent prompts, security tier consistency, skills cross-reference, context contracts |
|
|
257
|
-
| Hooks | `tests/hooks/modules/` | Security modules (mutative_verbs, blocked_commands, tiers,
|
|
257
|
+
| Hooks | `tests/hooks/modules/` | Security modules (mutative_verbs, blocked_commands, tiers, approval_grants, approval_scopes, command_semantics), tools (bash_validator, shell_parser, cloud_pipe_validator, task_validator), core (paths, state), context (context_writer) |
|
|
258
258
|
| System | `tests/system/` | Directory structure, permissions, agent definitions, configuration, schema compatibility |
|
|
259
259
|
| Tools | `tests/tools/` | context_provider, episodic, pending_updates, deep_merge, review_engine, surface_router |
|
|
260
260
|
| Integration | `tests/integration/` | Context enrichment, subagent lifecycle, subagent stop, nonce approval relay |
|
|
@@ -71,12 +71,27 @@ Fields above are extracted from the DB-stored canonical payload (`payload_json`
|
|
|
71
71
|
grant consumed by the first retry (`consume_db_semantic_grant` in
|
|
72
72
|
`gaia/store/writer.py`). A second invocation is a new APPROVAL_REQUEST.
|
|
73
73
|
|
|
74
|
-
3. **
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
74
|
+
3. **Batch grant is `COMMAND_SET` -- one consent, N commands.** Legacy
|
|
75
|
+
`verb_family` was removed; its replacement, `COMMAND_SET`, is now wired
|
|
76
|
+
end-to-end (intake, activation, consume). When a subagent emits a plan-first
|
|
77
|
+
`APPROVAL_REQUEST` carrying a `command_set` of >= 2 `{command, rationale}`
|
|
78
|
+
items and **no** `approval_id`, the SubagentStop processor
|
|
79
|
+
(`handoff_persister._intake_command_set_pending`) mints ONE pending
|
|
80
|
+
`COMMAND_SET` with one `approval_id`. You present that single approval: list
|
|
81
|
+
**all N commands** in the question body, but use **one** Approve label with
|
|
82
|
+
**one** `[P-{nonce8}]` suffix -- one consent covers the whole batch. On
|
|
83
|
+
approval, `activate_db_pending_by_prefix` Step 3b creates a single
|
|
84
|
+
`COMMAND_SET` grant (60-min TTL); each command is consumed byte-for-byte on
|
|
85
|
+
its own retry. `batch_scope` is still ignored (the signal is `command_set`).
|
|
78
86
|
See `reference.md` -> "On batch intents".
|
|
79
87
|
|
|
88
|
+
You present the batch the subagent chose to send; you do not steer it toward
|
|
89
|
+
batching. Whether grouping is warranted is the subagent's judgment (known
|
|
90
|
+
batch, >= 2, friction reduced -- see `subagent-request-approval`). A singular
|
|
91
|
+
approval arriving where you imagined a batch is not a defect to correct: the
|
|
92
|
+
default is just-in-time, and a batch you would have manufactured asks the
|
|
93
|
+
user to consent to commands that may never run.
|
|
94
|
+
|
|
80
95
|
4. **Re-dispatch, do not resume.** `mode` does not survive a SendMessage resume:
|
|
81
96
|
the resume runs in `default` and re-blocks the next protected operation even
|
|
82
97
|
after the Gaia grant activated. Prefer a fresh re-dispatch with the same
|
|
@@ -97,5 +112,5 @@ wording, see `reference.md` -> "GOOD vs BAD Examples", "Option Label Patterns",
|
|
|
97
112
|
| "I'll skip the [P-...] suffix, it's cosmetic" | The hook extracts the nonce from the label to find the right pending row; without it, targeted activation fails and no grant is created. |
|
|
98
113
|
| "Similar command, slightly different path -- I'll reuse / wrap it" | Grants match the statement signature byte-for-byte. Any wrapper, redirect, flag, or path drift is a different signature and a fresh re-block. |
|
|
99
114
|
| "The same command emitted a new approval_id" | Grants are single-use and consumed on the first retry. A second run is a new APPROVAL_REQUEST -- approve again. |
|
|
100
|
-
| "I'll set batch_scope to approve many at once" |
|
|
115
|
+
| "I'll set batch_scope to approve many at once" | `batch_scope` is ignored -- but a real batch path exists: a plan-first `command_set` (>= 2 items, no `approval_id`) is intaken into ONE pending `COMMAND_SET`. Present that single approval (N commands shown, one `[P-...]` nonce, one consent), not N separate approvals. |
|
|
101
116
|
| "I can paraphrase a field before relaying" | The fingerprint covers all 7 sealed fields; any modification raises `ChainTamperError` in Step 0 and the presentation is refused. |
|
|
@@ -107,32 +107,49 @@ contain `[P-<hex>]`. Reject labels never carry a nonce. The captured hex is the
|
|
|
107
107
|
`get_pending(all_sessions=True)` and selects the one whose `id` starts with
|
|
108
108
|
`P-{prefix}`.
|
|
109
109
|
|
|
110
|
-
## On batch intents --
|
|
110
|
+
## On batch intents -- the COMMAND_SET grant (one consent, N commands)
|
|
111
111
|
|
|
112
112
|
The old `verb_family` design (one approval covering many commands of the same
|
|
113
113
|
`base_cmd + verb`) **was removed**. The module docstring in
|
|
114
114
|
`hooks/modules/security/approval_grants.py` is explicit: "The legacy verb_family
|
|
115
115
|
path has been removed."
|
|
116
116
|
|
|
117
|
-
|
|
117
|
+
Its replacement is the `COMMAND_SET` grant: an explicit list of
|
|
118
118
|
`{command, rationale}` items, each matched **byte-for-byte** (D10: no whitespace
|
|
119
119
|
normalization, no quote canonicalization, no shell expansion) and consumed
|
|
120
120
|
individually (`create_command_set_grant` and `match_command_set_grant` in
|
|
121
121
|
`approval_grants.py`).
|
|
122
122
|
|
|
123
|
-
**Current state of the code
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
`
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
123
|
+
**Current state of the code: all three sides are wired -- intake, activation,
|
|
124
|
+
consume.** It is a **plan-first** flow: the subagent declares the batch up-front
|
|
125
|
+
by emitting an `APPROVAL_REQUEST` whose `approval_request` carries a
|
|
126
|
+
`command_set` list and **no** `approval_id`.
|
|
127
|
+
|
|
128
|
+
- **Intake.** The SubagentStop processor
|
|
129
|
+
`hooks/modules/agents/handoff_persister.py` ->
|
|
130
|
+
`_intake_command_set_pending()` reads the `command_set`; when it holds **>= 2**
|
|
131
|
+
items it calls `gaia.approvals.store.insert_requested()` with a payload that
|
|
132
|
+
contains the `command_set` key, minting **exactly ONE** pending `COMMAND_SET`
|
|
133
|
+
approval with one `approval_id`. A set of `<= 1` item is declined (no
|
|
134
|
+
COMMAND_SET is minted for one command).
|
|
135
|
+
- **Activation.** When the user approves, `activate_db_pending_by_prefix()`
|
|
136
|
+
(`hooks/modules/security/approval_grants.py`) reads `payload["command_set"]`,
|
|
137
|
+
and because it has > 1 item branches at **Step 3b** into
|
|
138
|
+
`create_command_set_grant()`, inserting ONE `COMMAND_SET` grant row (status
|
|
139
|
+
`PENDING`, `command_set_json` holding the whole set, 60-min TTL via
|
|
140
|
+
`DEFAULT_COMMAND_SET_TTL_MINUTES`) instead of a singular
|
|
141
|
+
`SCOPE_SEMANTIC_SIGNATURE` grant.
|
|
142
|
+
- **Consume.** On each retry, `bash_validator` calls `match_command_set_grant()`
|
|
143
|
+
(byte-for-byte index match), then `mark_command_set_item_consumed()`; a
|
|
144
|
+
consumed index never matches again (replay protection), and when every index
|
|
145
|
+
is consumed the grant flips to `CONSUMED`.
|
|
146
|
+
|
|
147
|
+
**Practical consequence:** a `batch_scope` field still does nothing -- the signal
|
|
148
|
+
is `command_set`. To approve a sweep of N related commands under one consent,
|
|
149
|
+
present the single `COMMAND_SET` approval the intake minted: show **all N
|
|
150
|
+
commands** in the question body, with **one** Approve label carrying **one**
|
|
151
|
+
`[P-{nonce8}]` suffix. The user gives one consent; each command then runs on its
|
|
152
|
+
own retry within the 60-minute window. You do NOT issue N separate approvals.
|
|
136
153
|
|
|
137
154
|
## Grant Activation Mechanics
|
|
138
155
|
|
|
@@ -17,7 +17,11 @@ security-tiers classifies every operation into four tiers so an agent knows whet
|
|
|
17
17
|
| **T0** | Read-only; observes state, changes nothing | No | get, list, describe, show, logs, status |
|
|
18
18
|
| **T1** | Local validation; no remote calls, no state | No | validate, lint, fmt, check |
|
|
19
19
|
| **T2** | Simulation / dry-run; may read remote, never writes | No | plan, diff, dry-run, template |
|
|
20
|
-
| **T3** | State-mutating; creates, updates, or destroys | **Yes** | apply, create, delete,
|
|
20
|
+
| **T3** | State-mutating; creates, updates, or destroys | **Yes** | apply, create, delete, push, deploy |
|
|
21
|
+
|
|
22
|
+
`git commit` and `git add` are **not** T3 -- they are local-only operations (they touch the working tree and local refs, never remote state), so they classify as safe by elimination. Only `git push` mutates remote state and is T3. This matches `GIT_LOCAL_SAFE_SUBCOMMANDS` in `mutative_verbs.py`, where `commit` and `add` are listed as local-safe.
|
|
23
|
+
|
|
24
|
+
**T3 gates a direction, not a category of verb.** An operation needs consent because it moves the system toward *more* capability (it grants) or *less* recoverability (it destroys). An operation that only moves the other way -- that *reduces* capability already granted -- does not need consent, because the worst it can do is take back power that was given. So within Gaia's own consent layer, `gaia approvals revoke|reject|reject-all|clean` are **not** T3: they only revoke or discard grants Gaia itself issued, never reaching outside the local approval store. The asymmetry is deliberate -- `gaia approvals approve` *grants* capability without the AskUserQuestion flow, so it stays T3. This is anchored to the `gaia approvals` group in `CONSENT_REDUCING_SUBCOMMAND_EXCEPTIONS` (`mutative_verbs.py`), not generalized to every CLI's "revoke" -- a cloud IAM revoke is a real remote mutation and remains T3.
|
|
21
25
|
|
|
22
26
|
## Classification heuristic
|
|
23
27
|
|
|
@@ -36,7 +36,9 @@ Read on-demand by infrastructure agents. Not injected automatically.
|
|
|
36
36
|
- `kubectl apply -f manifest.yaml`
|
|
37
37
|
- `helm upgrade` (without `--dry-run`)
|
|
38
38
|
- `flux reconcile` (write operations)
|
|
39
|
-
- `git
|
|
39
|
+
- `git push` (any branch) -- mutates remote state
|
|
40
|
+
|
|
41
|
+
Note: `git commit` and `git add` are **not** T3. They are local-only (working tree + local refs, never remote), classified safe by elimination via `GIT_LOCAL_SAFE_SUBCOMMANDS` in `mutative_verbs.py`. Only `git push` reaches remote state.
|
|
40
42
|
|
|
41
43
|
## Edge Cases
|
|
42
44
|
|
|
@@ -63,12 +63,47 @@ prose are invisible to the presentation -- the user would approve blind.
|
|
|
63
63
|
- **The grant is single-use.** It is consumed on your first matching retry. A
|
|
64
64
|
second run within the TTL will not match -- it needs a fresh approval.
|
|
65
65
|
|
|
66
|
-
## Batch / many-command intents
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
66
|
+
## Batch / many-command intents -- COMMAND_SET as a judgment, not a default
|
|
67
|
+
|
|
68
|
+
Grouping commands under one consent is a **judgment call you earn, not the
|
|
69
|
+
reflex you reach for**. The default is singular, just-in-time approval: attempt
|
|
70
|
+
the command, let the hook block it, request that one. Reach for `COMMAND_SET`
|
|
71
|
+
**only when all three hold** -- the batch is already **known** (the commands are
|
|
72
|
+
determined, not predicted), there are **>= 2** of them, and grouping **actually
|
|
73
|
+
reduces friction** versus approving each as it arrives. If any fails (a single
|
|
74
|
+
command, a sequential flow where the next depends on the last's output, or a
|
|
75
|
+
set you cannot yet name), use the singular path. The principle with its
|
|
76
|
+
consequence: **grouping trades the user's per-command visibility for fewer
|
|
77
|
+
prompts; make that trade only when the batch is real and known, because a batch
|
|
78
|
+
you guessed at asks the user to approve commands that may never run.**
|
|
79
|
+
|
|
80
|
+
The hard prohibition this rules out: **never invent or predict commands just to
|
|
81
|
+
have something to group.** Speculatively enumerating a `command_set` to "save
|
|
82
|
+
turns" inverts the cost -- it manufactures ceremony (a multi-command consent
|
|
83
|
+
surface) around work that was never determined, which is more overhead than the
|
|
84
|
+
just-in-time blocks it was meant to avoid. If you do not already know the
|
|
85
|
+
commands, you do not have a batch.
|
|
86
|
+
|
|
87
|
+
When the three conditions do hold, emit an `APPROVAL_REQUEST` whose
|
|
88
|
+
`approval_request` carries a `command_set` -- a list of `{command, rationale}`
|
|
89
|
+
items -- and **no `approval_id`** (nothing has been attempted yet). The
|
|
90
|
+
per-command rationale is what makes the grouped consent honest: the user sees
|
|
91
|
+
why each *known* command is in the batch before approving (D10).
|
|
92
|
+
|
|
93
|
+
What happens to that envelope: the SubagentStop processor
|
|
94
|
+
(`hooks/modules/agents/handoff_persister.py` -> `_intake_command_set_pending`)
|
|
95
|
+
reads the `command_set`, and when it holds **>= 2** items it calls
|
|
96
|
+
`gaia.approvals.store.insert_requested` with a payload containing the
|
|
97
|
+
`command_set` key. That mints **exactly ONE pending `COMMAND_SET` approval**
|
|
98
|
+
with one `approval_id` -- so a batch of N commands is **one consent, N
|
|
99
|
+
commands**, not N approvals. A set of `<= 1` item is not a batch: it does not
|
|
100
|
+
mint a COMMAND_SET (use the normal singular block path for a single command).
|
|
101
|
+
|
|
102
|
+
On the user's approval, that one pending activates into a single `COMMAND_SET`
|
|
103
|
+
grant (60-minute TTL); each item is then consumed byte-for-byte on its own
|
|
104
|
+
retry, with replay protection, until the whole set is `CONSUMED`. See
|
|
105
|
+
`reference.md` for the envelope shape, the intake processor, the grant TTL, and
|
|
106
|
+
the consume path.
|
|
72
107
|
|
|
73
108
|
## Pointers
|
|
74
109
|
|
|
@@ -84,3 +119,5 @@ approval, so emitting `batch_scope` does nothing. See `reference.md` for why.
|
|
|
84
119
|
- **Fabricating `approval_id`, fingerprint, or `sealed_payload`** -- the orchestrator validates against the DB; invented values never match.
|
|
85
120
|
- **Reusing a prior approval** -- single-use, consumed on first retry.
|
|
86
121
|
- **Emitting `batch_scope`** -- the field does not exist; it is ignored.
|
|
122
|
+
- **Grouping by reflex** -- reaching for `COMMAND_SET` because a batch *might* form, instead of because a known batch of >= 2 already exists that grouping makes cheaper. The default is singular just-in-time; grouping is the exception you justify.
|
|
123
|
+
- **Predicting commands to fill a batch** -- inventing commands you have not determined so a `command_set` has >= 2 items. You cannot ask consent for work that does not yet exist; the speculative batch is pure overhead.
|
|
@@ -69,35 +69,85 @@ On your retry, `check_approval_grant()` matches it and immediately consumes it
|
|
|
69
69
|
TTL will NOT match -- the grant is gone. This is replay protection by design;
|
|
70
70
|
re-approve if you need to run the command again.
|
|
71
71
|
|
|
72
|
-
## Batch / COMMAND_SET --
|
|
72
|
+
## Batch / COMMAND_SET -- wired
|
|
73
73
|
|
|
74
74
|
The legacy `verb_family` multi-use grant was removed (see module docstring in
|
|
75
|
-
`approval_grants.py`: "The legacy verb_family path has been removed"). Its
|
|
75
|
+
`approval_grants.py`: "The legacy verb_family path has been removed"). Its
|
|
76
76
|
replacement is the `COMMAND_SET` grant -- an explicit list of `{command, rationale}`
|
|
77
77
|
items, each matched byte-for-byte and consumed individually
|
|
78
78
|
(`approval_grants.create_command_set_grant()`; `approval_grants.match_command_set_grant()`).
|
|
79
|
+
All three sides are now wired end-to-end -- **intake**, **activation**, and
|
|
80
|
+
**consume** -- so one consent covers N commands.
|
|
81
|
+
|
|
82
|
+
**Intake -- plan-first, one pending.** The batch is declared up-front: you emit
|
|
83
|
+
an `APPROVAL_REQUEST` whose `approval_request` carries a `command_set` list and
|
|
84
|
+
**no `approval_id`** (you have attempted nothing). The production intake caller
|
|
85
|
+
is the SubagentStop processor `handoff_persister.persist_handoff()`, which calls
|
|
86
|
+
`_intake_command_set_pending()`. That helper normalizes the `command_set` and,
|
|
87
|
+
when it holds **>= 2** `{command, rationale}` items, builds a sealed_payload
|
|
88
|
+
carrying the `command_set` key (mirroring the shape
|
|
89
|
+
`bash_validator._build_sealed_payload()` emits) and calls
|
|
90
|
+
`gaia.approvals.store.insert_requested()` -- minting **exactly ONE** pending
|
|
91
|
+
`COMMAND_SET` approval with one `approval_id`. A set of length `<= 1` is not a
|
|
92
|
+
batch: the intake declines and the singular semantic-signature path owns it (no
|
|
93
|
+
COMMAND_SET is ever minted for one command). The intake runs independently of
|
|
94
|
+
the audit handoff-row write, so a batch consent is never lost to an unrelated
|
|
95
|
+
DB failure.
|
|
96
|
+
|
|
97
|
+
**Envelope shape.** The sealed_payload the intake writes carries a `command_set`
|
|
98
|
+
key holding the verbatim list of `{command, rationale}` items, and `commands`
|
|
99
|
+
listing every command string in the set:
|
|
79
100
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"operation": "MUTATIVE command intercepted: push",
|
|
104
|
+
"exact_content": "git add -A",
|
|
105
|
+
"commands": ["git add -A", "git commit -m 'v1.2.0'", "git push origin main"],
|
|
106
|
+
"command_set": [
|
|
107
|
+
{"command": "git add -A", "rationale": "stage release files"},
|
|
108
|
+
{"command": "git commit -m 'v1.2.0'", "rationale": "record the release commit"},
|
|
109
|
+
{"command": "git push origin main", "rationale": "publish to the remote"}
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
```
|
|
86
113
|
|
|
87
|
-
**
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
114
|
+
**Activation -- one consent, one grant.** When the user approves, the
|
|
115
|
+
ElicitationResult hook (`approval_grants.activate_db_pending_by_prefix()`)
|
|
116
|
+
detects the `command_set` and branches to `approval_grants.create_command_set_grant()`,
|
|
117
|
+
which inserts a single `COMMAND_SET` grant row into `approval_grants`
|
|
118
|
+
(status `PENDING`, `command_set_json` holding the whole set). The grant TTL is
|
|
119
|
+
**60 minutes** (`DEFAULT_COMMAND_SET_TTL_MINUTES`), aligned to the singular
|
|
120
|
+
active-grant TTL so the batch does not expire mid-consume across sessions.
|
|
121
|
+
|
|
122
|
+
**Consume -- item by item, replay-protected.** On each retry,
|
|
123
|
+
`bash_validator._validate_single_command()` calls `match_command_set_grant()`,
|
|
124
|
+
which finds the matching command's index byte-for-byte and returns it; the
|
|
125
|
+
validator then calls `mark_command_set_item_consumed()`, appending that index to
|
|
126
|
+
`consumed_indexes_json`. A consumed index never matches again (replay
|
|
127
|
+
protection), and when every index is consumed the grant flips to `CONSUMED`.
|
|
128
|
+
Wrapping an approved command -- adding `cd`, a redirect, a pipe, or a flag --
|
|
129
|
+
produces a different string and matches nothing in the set; it requires fresh
|
|
130
|
+
approval.
|
|
131
|
+
|
|
132
|
+
**Consequence:** for a set of N related T3 commands, emit the `command_set`
|
|
133
|
+
envelope and the user approves once. Each command runs on its own retry,
|
|
134
|
+
single-use within the 60-minute window.
|
|
91
135
|
|
|
92
136
|
## Status to emit -- with vs without approval_id
|
|
93
137
|
|
|
94
138
|
Always `plan_status: "APPROVAL_REQUEST"`. The presence of `approval_id` tells the
|
|
95
139
|
orchestrator which path:
|
|
96
140
|
|
|
97
|
-
- **With `approval_id`** -- the hook blocked; orchestrator
|
|
98
|
-
fingerprint and activates the grant on user
|
|
99
|
-
|
|
100
|
-
|
|
141
|
+
- **With `approval_id`** -- the hook blocked a single command; orchestrator
|
|
142
|
+
validates the fingerprint and activates the single-use semantic grant on user
|
|
143
|
+
approval.
|
|
144
|
+
- **Without `approval_id`, with a `command_set` of >= 2 items** -- plan-first
|
|
145
|
+
batch. The SubagentStop intake processor mints ONE pending `COMMAND_SET` and
|
|
146
|
+
the orchestrator presents that single approval (N commands, one nonce) before
|
|
147
|
+
any execution. See "Batch / COMMAND_SET -- wired" above.
|
|
148
|
+
- **Without `approval_id` and without a multi-item `command_set`** -- plan-first
|
|
149
|
+
single (you are presenting one T3 plan before attempting); the orchestrator
|
|
150
|
+
gates on user consent before any execution.
|
|
101
151
|
|
|
102
152
|
## Examples
|
|
103
153
|
|
|
@@ -77,7 +77,7 @@ Agent contracts live in `~/.gaia/gaia.db` (`project_context_contracts` + `agent_
|
|
|
77
77
|
**cloud-troubleshooter:**
|
|
78
78
|
- project_identity, stack, git, environment, infrastructure, orchestration
|
|
79
79
|
- cluster_details, infrastructure_topology, terraform_infrastructure
|
|
80
|
-
- gitops_configuration, application_services,
|
|
80
|
+
- gitops_configuration, application_services, architecture_overview
|
|
81
81
|
|
|
82
82
|
The same contracts are exposed under `write_permissions`:
|
|
83
83
|
- `readable_sections`
|
|
@@ -221,7 +221,6 @@ class LogExtractor:
|
|
|
221
221
|
# Exit 2 BLOCK (block_response is None):
|
|
222
222
|
# - "Command blocked by security policy ..." -- permanent deny list
|
|
223
223
|
# - "Commit message validation failed ..." -- validation error
|
|
224
|
-
# - "GitOps policy violation ..." -- GitOps validation
|
|
225
224
|
# - "Empty command not allowed"
|
|
226
225
|
if (
|
|
227
226
|
reason.startswith("Dangerous")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gaia-security",
|
|
3
|
-
"version": "5.0.
|
|
3
|
+
"version": "5.0.4",
|
|
4
4
|
"description": "Keeps you in the loop only when it matters. Gaia Security analyzes every command and classifies it into risk tiers: read-only queries run freely, simulations and validations pass through, and state-changing operations (create, delete, apply, push) pause for your explicit approval before executing. Irreversible commands like dropping databases or deleting cloud infrastructure are permanently blocked.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "jaguilar87",
|
|
@@ -19,6 +19,7 @@ Provides:
|
|
|
19
19
|
- parse_rollback_executed(): Parse rollback_executed clause (advisory)
|
|
20
20
|
- parse_context_consumption(): Parse context_consumption clause (advisory)
|
|
21
21
|
- parse_memory_suggestions(): Parse memory_suggestions clause (advisory)
|
|
22
|
+
- parse_user_facing_summary(): Parse user_facing_summary clause (advisory)
|
|
22
23
|
"""
|
|
23
24
|
|
|
24
25
|
import json
|
|
@@ -655,6 +656,23 @@ def parse_memory_suggestions(contract: dict) -> List[str]:
|
|
|
655
656
|
return [str(item) for item in raw if item is not None]
|
|
656
657
|
|
|
657
658
|
|
|
659
|
+
def parse_user_facing_summary(contract: dict) -> Optional[str]:
|
|
660
|
+
"""Parse the optional top-level ``user_facing_summary`` clause (advisory).
|
|
661
|
+
|
|
662
|
+
The single human-audience field in the contract: a short prose summary the
|
|
663
|
+
subagent writes once for the user. The orchestrator relays it near-verbatim
|
|
664
|
+
on a single-agent COMPLETE (N=1) instead of re-synthesizing ``key_outputs``.
|
|
665
|
+
|
|
666
|
+
Strictly additive and advisory -- the validator never rejects based on this
|
|
667
|
+
field. Returns the trimmed string when present and non-empty, else None.
|
|
668
|
+
"""
|
|
669
|
+
raw = contract.get("user_facing_summary")
|
|
670
|
+
if not isinstance(raw, str):
|
|
671
|
+
return None
|
|
672
|
+
text = raw.strip()
|
|
673
|
+
return text or None
|
|
674
|
+
|
|
675
|
+
|
|
658
676
|
def extract_plan_status_from_output(agent_output: str) -> str:
|
|
659
677
|
"""Extract the effective plan_status string from agent output.
|
|
660
678
|
|