@jaguilar87/gaia 5.0.8 → 5.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +11 -0
- package/bin/README.md +6 -1
- package/bin/cli/approvals.py +341 -238
- package/bin/cli/brief.py +13 -0
- package/bin/cli/doctor.py +1 -1
- package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-ops/hooks/adapters/claude_code.py +19 -85
- package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
- package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
- package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
- package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
- package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
- package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +24 -1
- package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
- package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
- package/dist/gaia-ops/hooks/post_compact.py +1 -0
- package/dist/gaia-ops/hooks/pre_compact.py +1 -0
- package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
- package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +27 -7
- package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +11 -6
- package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
- package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -28
- package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
- package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +10 -5
- package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
- package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +20 -6
- package/dist/gaia-ops/skills/subagent-request-approval/reference.md +23 -15
- package/dist/gaia-ops/tools/migration/README.md +10 -12
- package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
- package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
- package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-security/hooks/adapters/claude_code.py +19 -85
- package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
- package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
- package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
- package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
- package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
- package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +24 -1
- package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
- package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
- package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
- package/gaia/approvals/store.py +87 -9
- package/gaia/store/schema.sql +38 -1
- package/gaia/store/writer.py +400 -0
- package/hooks/adapters/claude_code.py +19 -85
- package/hooks/elicitation_result.py +20 -75
- package/hooks/modules/context/context_injector.py +23 -7
- package/hooks/modules/events/event_writer.py +63 -96
- package/hooks/modules/security/__init__.py +0 -2
- package/hooks/modules/security/approval_cleanup.py +238 -69
- package/hooks/modules/security/approval_grants.py +506 -1103
- package/hooks/modules/security/mutative_verbs.py +24 -1
- package/hooks/modules/session/pending_scanner.py +150 -90
- package/hooks/modules/session/session_manifest.py +257 -28
- package/hooks/post_compact.py +1 -0
- package/hooks/pre_compact.py +1 -0
- package/hooks/user_prompt_submit.py +20 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/bootstrap_database.sh +66 -17
- package/scripts/migrations/README.md +26 -14
- package/scripts/migrations/schema.checksum +2 -2
- package/scripts/migrations/v18_to_v19.sql +36 -0
- package/scripts/migrations/v19_to_v20.sql +20 -0
- package/skills/agent-approval-protocol/SKILL.md +27 -7
- package/skills/agent-approval-protocol/reference.md +11 -6
- package/skills/gaia-patterns/reference.md +2 -2
- package/skills/orchestrator-present-approval/SKILL.md +69 -28
- package/skills/orchestrator-present-approval/reference.md +16 -3
- package/skills/orchestrator-present-approval/template.md +10 -5
- package/skills/pending-approvals/SKILL.md +16 -11
- package/skills/subagent-request-approval/SKILL.md +20 -6
- package/skills/subagent-request-approval/reference.md +23 -15
- package/tools/migration/README.md +10 -12
- package/tools/scan/orchestrator.py +194 -10
- package/tools/scan/tests/test_integration.py +1 -2
- package/bin/cli/plans.py +0 -517
- package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
- package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
- package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
- package/dist/gaia-ops/tools/scan/merge.py +0 -213
- package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
- package/tools/context/deep_merge.py +0 -159
- package/tools/migration/migrate_04_harness_events.py +0 -132
- package/tools/migration/migrate_04_harness_events.sh +0 -23
- package/tools/scan/merge.py +0 -213
- package/tools/scan/tests/test_merge.py +0 -269
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Deep merge utility for project-context.json updates.
|
|
3
|
-
|
|
4
|
-
Merges two dicts recursively following the gaia-ops merge decision tree:
|
|
5
|
-
1. Key missing in current -> ADD
|
|
6
|
-
2. Both values are dicts -> RECURSE (deep merge)
|
|
7
|
-
3. Both values are lists -> UNION (primitives: sorted set union;
|
|
8
|
-
dicts with "name": merge by name;
|
|
9
|
-
other dicts: concatenate + deduplicate)
|
|
10
|
-
4. Both values are scalars -> OVERWRITE (new replaces old)
|
|
11
|
-
5. Type mismatch -> OVERWRITE with warning
|
|
12
|
-
|
|
13
|
-
No-Delete Policy: keys in current but NOT in update are always preserved.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import copy
|
|
17
|
-
import json
|
|
18
|
-
import logging
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def deep_merge(current: dict, update: dict) -> tuple[dict, dict]:
|
|
24
|
-
"""Merge *update* into *current* returning ``(merged, diff)``.
|
|
25
|
-
|
|
26
|
-
Parameters
|
|
27
|
-
----------
|
|
28
|
-
current:
|
|
29
|
-
The existing data (will NOT be mutated).
|
|
30
|
-
update:
|
|
31
|
-
New data to merge on top of *current*.
|
|
32
|
-
|
|
33
|
-
Returns
|
|
34
|
-
-------
|
|
35
|
-
tuple[dict, dict]
|
|
36
|
-
``merged`` – the result of the merge.
|
|
37
|
-
``diff`` – audit trail recording changes (``{key: {old, new}}``).
|
|
38
|
-
"""
|
|
39
|
-
merged = copy.deepcopy(current)
|
|
40
|
-
diff: dict = {}
|
|
41
|
-
|
|
42
|
-
for key, new_value in update.items():
|
|
43
|
-
if key not in merged:
|
|
44
|
-
# Rule 1: ADD missing key
|
|
45
|
-
merged[key] = copy.deepcopy(new_value)
|
|
46
|
-
continue
|
|
47
|
-
|
|
48
|
-
old_value = merged[key]
|
|
49
|
-
|
|
50
|
-
# Rule 2: Both dicts -> recurse
|
|
51
|
-
if isinstance(old_value, dict) and isinstance(new_value, dict):
|
|
52
|
-
sub_merged, sub_diff = deep_merge(old_value, new_value)
|
|
53
|
-
merged[key] = sub_merged
|
|
54
|
-
if sub_diff:
|
|
55
|
-
diff[key] = sub_diff
|
|
56
|
-
continue
|
|
57
|
-
|
|
58
|
-
# Rule 3: Both lists -> union strategy
|
|
59
|
-
if isinstance(old_value, list) and isinstance(new_value, list):
|
|
60
|
-
merged_list = _merge_lists(old_value, new_value)
|
|
61
|
-
if merged_list != old_value:
|
|
62
|
-
diff[key] = {"old": old_value, "new": merged_list}
|
|
63
|
-
merged[key] = merged_list
|
|
64
|
-
continue
|
|
65
|
-
|
|
66
|
-
# Rule 5: Type mismatch -> overwrite with warning
|
|
67
|
-
if type(old_value) is not type(new_value):
|
|
68
|
-
logger.warning(
|
|
69
|
-
"Type mismatch for key '%s': %s -> %s. New value wins.",
|
|
70
|
-
key,
|
|
71
|
-
type(old_value).__name__,
|
|
72
|
-
type(new_value).__name__,
|
|
73
|
-
)
|
|
74
|
-
diff[key] = {"old": old_value, "new": new_value}
|
|
75
|
-
merged[key] = copy.deepcopy(new_value)
|
|
76
|
-
continue
|
|
77
|
-
|
|
78
|
-
# Rule 4: Both scalars -> overwrite
|
|
79
|
-
if old_value != new_value:
|
|
80
|
-
diff[key] = {"old": old_value, "new": new_value}
|
|
81
|
-
merged[key] = copy.deepcopy(new_value)
|
|
82
|
-
|
|
83
|
-
return merged, diff
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
# ---------------------------------------------------------------------------
|
|
87
|
-
# List merge helpers
|
|
88
|
-
# ---------------------------------------------------------------------------
|
|
89
|
-
|
|
90
|
-
def _merge_lists(current: list, update: list) -> list:
|
|
91
|
-
"""Merge two lists following the union strategy.
|
|
92
|
-
|
|
93
|
-
a) All items are primitives (str, int, float, bool) -> sorted set union.
|
|
94
|
-
b) Items are dicts with a ``"name"`` key -> merge by name, preserve missing.
|
|
95
|
-
c) Otherwise -> concatenate, deduplicate by JSON equality.
|
|
96
|
-
"""
|
|
97
|
-
if _all_primitives(current) and _all_primitives(update):
|
|
98
|
-
return sorted(set(current) | set(update))
|
|
99
|
-
|
|
100
|
-
if _all_dicts_with_name(current) and _all_dicts_with_name(update):
|
|
101
|
-
return _merge_named_dicts(current, update)
|
|
102
|
-
|
|
103
|
-
# Fallback: concatenate + deduplicate by JSON equality
|
|
104
|
-
return _concat_deduplicate(current, update)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def _all_primitives(items: list) -> bool:
|
|
108
|
-
"""Return True if every item is a primitive (str, int, float, bool)."""
|
|
109
|
-
return all(isinstance(i, (str, int, float, bool)) for i in items)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _all_dicts_with_name(items: list) -> bool:
|
|
113
|
-
"""Return True if every item is a dict containing a ``"name"`` key."""
|
|
114
|
-
return bool(items) and all(
|
|
115
|
-
isinstance(i, dict) and "name" in i for i in items
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def _merge_named_dicts(current: list[dict], update: list[dict]) -> list[dict]:
|
|
120
|
-
"""Merge lists of dicts by their ``"name"`` field.
|
|
121
|
-
|
|
122
|
-
- Matching names: deep-merge the dict fields.
|
|
123
|
-
- Names only in current: preserved (no-delete).
|
|
124
|
-
- Names only in update: appended.
|
|
125
|
-
"""
|
|
126
|
-
result_by_name: dict[str, dict] = {}
|
|
127
|
-
order: list[str] = []
|
|
128
|
-
|
|
129
|
-
# Seed with current entries (preserves order + no-delete)
|
|
130
|
-
for item in current:
|
|
131
|
-
name = item["name"]
|
|
132
|
-
result_by_name[name] = copy.deepcopy(item)
|
|
133
|
-
order.append(name)
|
|
134
|
-
|
|
135
|
-
# Merge / add from update
|
|
136
|
-
for item in update:
|
|
137
|
-
name = item["name"]
|
|
138
|
-
if name in result_by_name:
|
|
139
|
-
merged_item, _ = deep_merge(result_by_name[name], item)
|
|
140
|
-
result_by_name[name] = merged_item
|
|
141
|
-
else:
|
|
142
|
-
result_by_name[name] = copy.deepcopy(item)
|
|
143
|
-
order.append(name)
|
|
144
|
-
|
|
145
|
-
return [result_by_name[n] for n in order]
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def _concat_deduplicate(current: list, update: list) -> list:
|
|
149
|
-
"""Concatenate two lists, deduplicating by JSON equality."""
|
|
150
|
-
seen: list[str] = []
|
|
151
|
-
result: list = []
|
|
152
|
-
|
|
153
|
-
for item in current + update:
|
|
154
|
-
serialized = json.dumps(item, sort_keys=True)
|
|
155
|
-
if serialized not in seen:
|
|
156
|
-
seen.append(serialized)
|
|
157
|
-
result.append(copy.deepcopy(item))
|
|
158
|
-
|
|
159
|
-
return result
|
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
migrate_04_harness_events.py
|
|
4
|
-
|
|
5
|
-
Convierte events.jsonl -> archivo SQL con INSERT batched.
|
|
6
|
-
|
|
7
|
-
Reglas:
|
|
8
|
-
- Solo I/O sobre filesystem.
|
|
9
|
-
- NO importa sqlite3.
|
|
10
|
-
- `id` es AUTOINCREMENT en la tabla; NO lo insertamos.
|
|
11
|
-
- `payload` = json.dumps(record) entero -- preserva todos los campos.
|
|
12
|
-
- Idempotencia: harness_events NO tiene UNIQUE constraint útil.
|
|
13
|
-
Aplicar una sola vez. Re-ejecuciones requieren DELETE WHERE project=...
|
|
14
|
-
|
|
15
|
-
CLI args (parametrización cross-workspace):
|
|
16
|
-
--project workspace name (default: 'me')
|
|
17
|
-
--src path al events.jsonl (default: ws/me)
|
|
18
|
-
--out path al SQL de salida (default: /tmp/migrate_04_harness_events.sql)
|
|
19
|
-
--fragment emite solo INSERTs (sin BEGIN/COMMIT)
|
|
20
|
-
"""
|
|
21
|
-
from __future__ import annotations
|
|
22
|
-
|
|
23
|
-
import argparse
|
|
24
|
-
import json
|
|
25
|
-
import sys
|
|
26
|
-
from pathlib import Path
|
|
27
|
-
|
|
28
|
-
DEFAULT_PROJECT = "me"
|
|
29
|
-
DEFAULT_SRC = Path("/home/jorge/ws/me/.claude/events/events.jsonl")
|
|
30
|
-
DEFAULT_OUT = Path("/tmp/migrate_04_harness_events.sql")
|
|
31
|
-
BATCH_SIZE = 200
|
|
32
|
-
|
|
33
|
-
COLUMNS = ["project", "ts", "type", "source", "agent", "result", "severity", "payload"]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def sql_quote(value) -> str:
|
|
37
|
-
if value is None:
|
|
38
|
-
return "NULL"
|
|
39
|
-
if isinstance(value, bool):
|
|
40
|
-
return "1" if value else "0"
|
|
41
|
-
if isinstance(value, (int, float)):
|
|
42
|
-
if isinstance(value, float) and (value != value or value in (float("inf"), float("-inf"))):
|
|
43
|
-
return "NULL"
|
|
44
|
-
return str(value)
|
|
45
|
-
s = str(value)
|
|
46
|
-
return "'" + s.replace("'", "''") + "'"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def extract_row(record: dict, project: str) -> dict:
|
|
50
|
-
return {
|
|
51
|
-
"project": project,
|
|
52
|
-
"ts": record.get("ts"),
|
|
53
|
-
"type": record.get("type"),
|
|
54
|
-
"source": record.get("source"),
|
|
55
|
-
"agent": record.get("agent"),
|
|
56
|
-
"result": record.get("result"),
|
|
57
|
-
"severity": record.get("severity"),
|
|
58
|
-
"payload": json.dumps(record, ensure_ascii=False, separators=(",", ":")),
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def row_values_sql(row: dict) -> str:
|
|
63
|
-
return "(" + ",".join(sql_quote(row.get(col)) for col in COLUMNS) + ")"
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def main() -> int:
|
|
67
|
-
parser = argparse.ArgumentParser(description="Generate INSERT SQL for harness_events table.")
|
|
68
|
-
parser.add_argument("--project", default=DEFAULT_PROJECT)
|
|
69
|
-
parser.add_argument("--src", default=str(DEFAULT_SRC), help="path to events.jsonl")
|
|
70
|
-
parser.add_argument("--out", default=str(DEFAULT_OUT))
|
|
71
|
-
parser.add_argument("--fragment", action="store_true")
|
|
72
|
-
args = parser.parse_args()
|
|
73
|
-
|
|
74
|
-
project = args.project
|
|
75
|
-
src = Path(args.src)
|
|
76
|
-
out = Path(args.out)
|
|
77
|
-
fragment = args.fragment
|
|
78
|
-
|
|
79
|
-
if not src.exists():
|
|
80
|
-
print(f"[migrate_04:{project}] ERROR: source not found: {src}", file=sys.stderr)
|
|
81
|
-
return 1
|
|
82
|
-
|
|
83
|
-
rows = []
|
|
84
|
-
skipped = 0
|
|
85
|
-
total_lines = 0
|
|
86
|
-
|
|
87
|
-
with src.open("r", encoding="utf-8") as f:
|
|
88
|
-
for line in f:
|
|
89
|
-
total_lines += 1
|
|
90
|
-
s = line.strip()
|
|
91
|
-
if not s:
|
|
92
|
-
continue
|
|
93
|
-
try:
|
|
94
|
-
rec = json.loads(s)
|
|
95
|
-
except json.JSONDecodeError:
|
|
96
|
-
skipped += 1
|
|
97
|
-
continue
|
|
98
|
-
if not rec.get("ts") or not rec.get("type"):
|
|
99
|
-
skipped += 1
|
|
100
|
-
continue
|
|
101
|
-
rows.append(extract_row(rec, project))
|
|
102
|
-
|
|
103
|
-
cols_csv = ",".join(COLUMNS)
|
|
104
|
-
insert_prefix = f"INSERT INTO harness_events ({cols_csv}) VALUES\n"
|
|
105
|
-
|
|
106
|
-
with out.open("w", encoding="utf-8") as fh:
|
|
107
|
-
fh.write(f"-- Generated by migrate_04_harness_events.py\n")
|
|
108
|
-
fh.write(f"-- Project: {project}\n")
|
|
109
|
-
fh.write(f"-- Source: {src}\n")
|
|
110
|
-
fh.write(f"-- Total source lines: {total_lines}\n")
|
|
111
|
-
fh.write(f"-- Records to insert: {len(rows)}\n")
|
|
112
|
-
fh.write(f"-- Skipped: {skipped}\n")
|
|
113
|
-
fh.write("--\n")
|
|
114
|
-
fh.write("-- WARNING: harness_events sin PK natural; aplicar 2 veces duplica filas.\n")
|
|
115
|
-
if not fragment:
|
|
116
|
-
fh.write("BEGIN TRANSACTION;\n")
|
|
117
|
-
|
|
118
|
-
for i in range(0, len(rows), BATCH_SIZE):
|
|
119
|
-
batch = rows[i : i + BATCH_SIZE]
|
|
120
|
-
fh.write(insert_prefix)
|
|
121
|
-
fh.write(",\n".join(row_values_sql(r) for r in batch))
|
|
122
|
-
fh.write(";\n")
|
|
123
|
-
|
|
124
|
-
if not fragment:
|
|
125
|
-
fh.write("COMMIT;\n")
|
|
126
|
-
|
|
127
|
-
print(f"[migrate_04:{project}] wrote {out} ({len(rows)} rows, {skipped} skipped)")
|
|
128
|
-
return 0
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
if __name__ == "__main__":
|
|
132
|
-
sys.exit(main())
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# migrate_04_harness_events.sh
|
|
3
|
-
# Wrapper: regenera el .sql desde events.jsonl y lo carga en ~/.gaia/gaia.db.
|
|
4
|
-
#
|
|
5
|
-
# OJO: harness_events no tiene PK natural. Re-ejecutar este wrapper duplica
|
|
6
|
-
# filas. Si necesitas re-ejecutar limpio, primero elimina las filas con:
|
|
7
|
-
# sqlite3 ~/.gaia/gaia.db "DELETE FROM harness_events WHERE project='me';"
|
|
8
|
-
set -euo pipefail
|
|
9
|
-
|
|
10
|
-
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
-
PY_SCRIPT="${HERE}/migrate_04_harness_events.py"
|
|
12
|
-
SQL_FILE="/tmp/migrate_04_harness_events.sql"
|
|
13
|
-
DB_PATH="${HOME}/.gaia/gaia.db"
|
|
14
|
-
|
|
15
|
-
# Paso 1: regenerar el .sql.
|
|
16
|
-
echo "[migrate_04] regenerando ${SQL_FILE} ..."
|
|
17
|
-
python3 "${PY_SCRIPT}"
|
|
18
|
-
|
|
19
|
-
# Paso 2: aplicar el SQL (interceptado por el hook).
|
|
20
|
-
echo "[migrate_04] aplicando ${SQL_FILE} en ${DB_PATH} ..."
|
|
21
|
-
sqlite3 "${DB_PATH}" < "${SQL_FILE}"
|
|
22
|
-
|
|
23
|
-
echo "[migrate_04] OK"
|
package/tools/scan/merge.py
DELETED
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Section-Aware Context Combining Logic
|
|
3
|
-
|
|
4
|
-
Merges scanner sections in-memory following the merge rules from
|
|
5
|
-
contracts/merge-behavior.md. No file I/O -- gaia.db is the sole
|
|
6
|
-
persistence layer.
|
|
7
|
-
|
|
8
|
-
Rule 1: Scanner-owned sections -> full replace
|
|
9
|
-
Rule 2: Agent-enriched sections -> never touch
|
|
10
|
-
Rule 3: Mixed sections -> selective update at sub-key level
|
|
11
|
-
Rule 4: Unknown/user-custom sections -> preserve
|
|
12
|
-
Rule 5: Metadata -> always update
|
|
13
|
-
|
|
14
|
-
Note: Backward-compatible sections (project_details, application_architecture,
|
|
15
|
-
development_standards) were removed in v3. Consumers read v2 scanner sections
|
|
16
|
-
directly (project_identity, stack, git, environment, infrastructure).
|
|
17
|
-
|
|
18
|
-
Special handling for sub-section level ownership: the `environment` section
|
|
19
|
-
is jointly owned by the `tools` scanner (tools, tool_preferences) and the
|
|
20
|
-
`environment` scanner (os, runtimes, env_files). Each scanner replaces only
|
|
21
|
-
its owned sub-keys without clobbering the other scanner's sub-keys.
|
|
22
|
-
|
|
23
|
-
Contract: specs/002-gaia-scan/contracts/merge-behavior.md
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
import copy
|
|
27
|
-
import logging
|
|
28
|
-
from typing import Any, Dict, Optional, Set
|
|
29
|
-
|
|
30
|
-
from tools.context.deep_merge import deep_merge
|
|
31
|
-
|
|
32
|
-
logger = logging.getLogger(__name__)
|
|
33
|
-
|
|
34
|
-
# Sections fully owned by scanners -- replaced entirely on each scan (Rule 1)
|
|
35
|
-
# Top-level sections only; sub-key ownership handled separately
|
|
36
|
-
SCANNER_OWNED_TOP_LEVEL: Dict[str, str] = {
|
|
37
|
-
"project_identity": "stack",
|
|
38
|
-
"stack": "stack",
|
|
39
|
-
"git": "git",
|
|
40
|
-
"infrastructure": "infrastructure",
|
|
41
|
-
"orchestration": "orchestration",
|
|
42
|
-
# "environment" is NOT listed here because it has sub-key ownership
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
# Sub-key ownership within the `environment` section (Rule 4 / sub-section)
|
|
46
|
-
# Maps environment sub-key -> owning scanner name
|
|
47
|
-
ENVIRONMENT_SUBKEY_OWNERS: Dict[str, str] = {
|
|
48
|
-
"tools": "tools",
|
|
49
|
-
"tool_preferences": "tools",
|
|
50
|
-
"os": "environment",
|
|
51
|
-
"runtimes": "environment",
|
|
52
|
-
"env_files": "environment",
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
# Agent-enriched sections -- never modified by scanners (Rule 2)
|
|
56
|
-
AGENT_ENRICHED_SECTIONS: frozenset = frozenset([
|
|
57
|
-
"operational_guidelines",
|
|
58
|
-
"cluster_details",
|
|
59
|
-
"infrastructure_topology",
|
|
60
|
-
"monitoring_observability",
|
|
61
|
-
"architecture_overview",
|
|
62
|
-
"gcp_services",
|
|
63
|
-
"workload_identity",
|
|
64
|
-
])
|
|
65
|
-
|
|
66
|
-
# Mixed sections with partial scanner ownership (Rule 3)
|
|
67
|
-
# Maps section_name -> set of scanner-owned field names
|
|
68
|
-
MIXED_SECTION_SCANNER_FIELDS: Dict[str, Set[str]] = {
|
|
69
|
-
"terraform_infrastructure": {"layout"},
|
|
70
|
-
"gitops_configuration": {"repository"},
|
|
71
|
-
"application_services": {"base_path", "services"},
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def merge_context(
|
|
76
|
-
existing: Dict[str, Any],
|
|
77
|
-
scan_sections: Dict[str, Any],
|
|
78
|
-
section_owners: Dict[str, str],
|
|
79
|
-
) -> Dict[str, Any]:
|
|
80
|
-
"""Merge scanner results with existing project-context sections.
|
|
81
|
-
|
|
82
|
-
Applies the merge rules from contracts/merge-behavior.md to produce
|
|
83
|
-
the final merged sections dict.
|
|
84
|
-
|
|
85
|
-
Args:
|
|
86
|
-
existing: Current sections from project-context.json (may be empty).
|
|
87
|
-
scan_sections: Combined sections from all scanners.
|
|
88
|
-
section_owners: Mapping of section/sub-section name to scanner name,
|
|
89
|
-
from ScannerRegistry.get_section_owners().
|
|
90
|
-
|
|
91
|
-
Returns:
|
|
92
|
-
Merged sections dict ready to be written to project-context.json.
|
|
93
|
-
The merge is deterministic: same inputs always produce the same output.
|
|
94
|
-
"""
|
|
95
|
-
result = copy.deepcopy(existing)
|
|
96
|
-
|
|
97
|
-
# --- Rule 1: Scanner-owned top-level sections -> full replace ---
|
|
98
|
-
for section_name in SCANNER_OWNED_TOP_LEVEL:
|
|
99
|
-
if section_name in scan_sections:
|
|
100
|
-
result[section_name] = copy.deepcopy(scan_sections[section_name])
|
|
101
|
-
|
|
102
|
-
# --- Sub-section level ownership for `environment` ---
|
|
103
|
-
# Both the `tools` scanner and `environment` scanner contribute sub-keys
|
|
104
|
-
# to the `environment` section. Each scanner's sub-keys replace their owned
|
|
105
|
-
# portion without clobbering the other scanner's sub-keys.
|
|
106
|
-
_merge_environment_section(result, scan_sections)
|
|
107
|
-
|
|
108
|
-
# --- Rule 2: Agent-enriched sections -> never touch ---
|
|
109
|
-
# These are already in `result` from the deepcopy of `existing`.
|
|
110
|
-
# We explicitly do NOT overwrite them, even if a scanner accidentally
|
|
111
|
-
# produced data for one of these section names.
|
|
112
|
-
# (No action needed -- they are preserved by the deepcopy.)
|
|
113
|
-
|
|
114
|
-
# --- Rule 3: Mixed sections -> selective update ---
|
|
115
|
-
for section_name, scanner_fields in MIXED_SECTION_SCANNER_FIELDS.items():
|
|
116
|
-
if section_name in scan_sections:
|
|
117
|
-
scan_data = scan_sections[section_name]
|
|
118
|
-
if section_name not in result:
|
|
119
|
-
result[section_name] = {}
|
|
120
|
-
# Only update scanner-owned fields; preserve agent fields
|
|
121
|
-
for field_name in scanner_fields:
|
|
122
|
-
if field_name in scan_data:
|
|
123
|
-
result[section_name][field_name] = copy.deepcopy(
|
|
124
|
-
scan_data[field_name]
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
# --- Rule 5: Unknown/user-custom sections -> preserve ---
|
|
128
|
-
# Any section in `existing` that is not scanner-owned, not agent-enriched,
|
|
129
|
-
# not backward-compat, and not mixed is a user-custom section.
|
|
130
|
-
# These are already preserved by the initial deepcopy of `existing`.
|
|
131
|
-
# We do NOT add new unknown sections from scan_sections.
|
|
132
|
-
|
|
133
|
-
return result
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def _merge_environment_section(
|
|
137
|
-
result: Dict[str, Any],
|
|
138
|
-
scan_sections: Dict[str, Any],
|
|
139
|
-
) -> None:
|
|
140
|
-
"""Merge the `environment` section with sub-key level ownership.
|
|
141
|
-
|
|
142
|
-
Two scanners contribute to the `environment` section:
|
|
143
|
-
- `tools` scanner owns: tools, tool_preferences
|
|
144
|
-
- `environment` scanner owns: os, runtimes, env_files
|
|
145
|
-
|
|
146
|
-
Each scanner's sub-keys replace their owned portion; the other scanner's
|
|
147
|
-
sub-keys are preserved. The `_source` field gets a combined tag.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
result: The result dict being built (mutated in place).
|
|
151
|
-
scan_sections: Combined sections from all scanners.
|
|
152
|
-
"""
|
|
153
|
-
if "environment" not in scan_sections:
|
|
154
|
-
return
|
|
155
|
-
|
|
156
|
-
scan_env = scan_sections["environment"]
|
|
157
|
-
|
|
158
|
-
if "environment" not in result:
|
|
159
|
-
result["environment"] = {}
|
|
160
|
-
|
|
161
|
-
env = result["environment"]
|
|
162
|
-
|
|
163
|
-
# Replace each sub-key based on ownership
|
|
164
|
-
for subkey in ENVIRONMENT_SUBKEY_OWNERS:
|
|
165
|
-
if subkey in scan_env:
|
|
166
|
-
env[subkey] = copy.deepcopy(scan_env[subkey])
|
|
167
|
-
|
|
168
|
-
# Set combined _source tag
|
|
169
|
-
env["_source"] = "scanner:environment+tools"
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def collect_scanner_sections(
|
|
173
|
-
scanner_results: Dict[str, Any],
|
|
174
|
-
) -> Dict[str, Any]:
|
|
175
|
-
"""Collect and combine sections from all scanner results.
|
|
176
|
-
|
|
177
|
-
Handles the environment section specially: both `tools` and `environment`
|
|
178
|
-
scanners produce sub-keys under `environment`, so their outputs are
|
|
179
|
-
combined into a single `environment` section.
|
|
180
|
-
|
|
181
|
-
Args:
|
|
182
|
-
scanner_results: Mapping of scanner_name -> ScanResult (must have
|
|
183
|
-
a `sections` attribute that is a dict).
|
|
184
|
-
|
|
185
|
-
Returns:
|
|
186
|
-
Combined sections dict from all scanners.
|
|
187
|
-
"""
|
|
188
|
-
combined: Dict[str, Any] = {}
|
|
189
|
-
environment_parts: Dict[str, Any] = {}
|
|
190
|
-
|
|
191
|
-
for _scanner_name, scan_result in scanner_results.items():
|
|
192
|
-
sections = scan_result.sections if hasattr(scan_result, "sections") else {}
|
|
193
|
-
|
|
194
|
-
for section_name, section_data in sections.items():
|
|
195
|
-
if section_name == "environment":
|
|
196
|
-
# Merge environment sub-keys from both scanners
|
|
197
|
-
if isinstance(section_data, dict):
|
|
198
|
-
for key, value in section_data.items():
|
|
199
|
-
if key != "_source":
|
|
200
|
-
environment_parts[key] = value
|
|
201
|
-
else:
|
|
202
|
-
# Non-environment sections: direct assignment (last scanner wins,
|
|
203
|
-
# but each section should have exactly one owner)
|
|
204
|
-
combined[section_name] = section_data
|
|
205
|
-
|
|
206
|
-
# Reassemble environment section if we got any parts
|
|
207
|
-
if environment_parts:
|
|
208
|
-
combined["environment"] = {
|
|
209
|
-
"_source": "scanner:environment+tools",
|
|
210
|
-
**environment_parts,
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return combined
|