@rubytech/create-realagent-code 0.1.248 → 0.1.250

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/plugins/admin/PLUGIN.md +1 -1
  3. package/payload/platform/plugins/admin/hooks/__tests__/session-end-retrospective.test.sh +3 -3
  4. package/payload/platform/plugins/admin/skills/platform-architecture/SKILL.md +20 -10
  5. package/payload/platform/plugins/docs/PLUGIN.md +2 -2
  6. package/payload/platform/plugins/docs/references/admin-session.md +7 -67
  7. package/payload/platform/plugins/docs/references/admin-ui.md +7 -3
  8. package/payload/platform/plugins/docs/references/deployment.md +1 -1
  9. package/payload/platform/plugins/docs/references/internals.md +8 -2
  10. package/payload/platform/plugins/docs/references/platform.md +3 -3
  11. package/payload/platform/scripts/check-no-legacy-spawn-route.mjs +37 -0
  12. package/payload/platform/services/claude-session-manager/dist/http-server.d.ts.map +1 -1
  13. package/payload/platform/services/claude-session-manager/dist/http-server.js +87 -21
  14. package/payload/platform/services/claude-session-manager/dist/http-server.js.map +1 -1
  15. package/payload/platform/services/claude-session-manager/dist/index.js +1 -0
  16. package/payload/platform/services/claude-session-manager/dist/index.js.map +1 -1
  17. package/payload/platform/services/claude-session-manager/dist/rc-daemon.d.ts +8 -0
  18. package/payload/platform/services/claude-session-manager/dist/rc-daemon.d.ts.map +1 -1
  19. package/payload/platform/services/claude-session-manager/dist/rc-daemon.js +14 -4
  20. package/payload/platform/services/claude-session-manager/dist/rc-daemon.js.map +1 -1
  21. package/payload/server/{chunk-UEIA5YUG.js → chunk-BE7O7KZ5.js} +4 -0
  22. package/payload/server/maxy-edge.js +1 -1
  23. package/payload/server/server.js +160 -126
  24. package/payload/platform/plugins/admin/hooks/__tests__/turn-completed-graph-write.test.sh +0 -601
  25. package/payload/platform/plugins/admin/hooks/turn-completed-graph-write.sh +0 -441
@@ -1,601 +0,0 @@
1
- #!/usr/bin/env bash
2
- # Task 129/131/165/175/177/195/214/216 regression test for the Stop hook.
3
- #
4
- # Dormant since Task 214 — the Stop-hook registration was removed from
5
- # account `settings.json`; the script + this test remain as preserved
6
- # infrastructure (see PLUGIN.md). When the script does run (e.g. via this
7
- # test invoking it directly), the post-Task-214 `database-operator.md`
8
- # template carries no `{schema}` / `{conversation}` / `{accountId}`
9
- # placeholders, so the substitution layer is a no-op pass-through and
10
- # `initialMessage` is the literal post-frontmatter body of the agent file.
11
- #
12
- # Contract (post-214):
13
- # - Every log emission goes through `POST /api/admin/log-ingest`. Hook
14
- # stderr is silent on every path.
15
- # - Every gated-off path emits one `trigger-skipped` line carrying a
16
- # distinct `reason=` (role-not-admin | is-recorder | empty-stdin |
17
- # missing-transcript | conversation-empty).
18
- # - Happy path emits exactly ONE `trigger` line carrying
19
- # `conversationBytes=<n>` and exactly ONE POST to
20
- # `/api/admin/claude-sessions`. The body carries the operator
21
- # session id as `adminSessionId` and an `initialMessage` whose value
22
- # is the literal database-operator agent body (post-frontmatter).
23
- # No placeholders are substituted because the post-Task-214 template
24
- # has none. The walker still parses the JSONL, still emits the
25
- # `envelope … turnsCount=N userTurns=U assistantTurns=A toolCallTurns=T`
26
- # log line, and the schema file is still read and reported in the
27
- # `substitution … schemaBytes=N` line — but neither the transcript
28
- # nor the schema content reaches `initialMessage`.
29
- # - The hook emits exactly one envelope line and one substitution line
30
- # before `spawn-request`.
31
- # - The hook never writes the legacy `spawn-with-input` / `fired` stderr
32
- # lines.
33
- #
34
- # The mock listener accepts both routes:
35
- # POST /api/admin/log-ingest — log-ingest
36
- # POST /api/admin/claude-sessions — wrapper
37
- # Both record their bodies (with route path) into REQ_LOG.
38
-
39
- set -u
40
-
41
- HOOK="$(cd "$(dirname "$0")/.." && pwd)/turn-completed-graph-write.sh"
42
- if [[ ! -x "$HOOK" ]]; then
43
- echo "FAIL: $HOOK not executable" >&2
44
- exit 1
45
- fi
46
-
47
- OP_ID='aaaaaaaa-1111-2222-3333-bbbbbbbbbbbb'
48
- ACCT_ID='acct-test-0001'
49
-
50
- TMPFILES=()
51
- LISTENER_PIDS=()
52
- cleanup_test_state() {
53
- for pid in "${LISTENER_PIDS[@]:-}"; do
54
- if [[ -n "$pid" ]]; then
55
- kill "$pid" 2>/dev/null || true
56
- wait "$pid" 2>/dev/null || true
57
- fi
58
- done
59
- for f in "${TMPFILES[@]:-}"; do
60
- [[ -n "$f" ]] && rm -f "$f" 2>/dev/null || true
61
- done
62
- }
63
- trap cleanup_test_state EXIT
64
-
65
- PASS=0
66
- FAIL=0
67
- pass() { echo "PASS: $1"; PASS=$((PASS + 1)); }
68
- fail() { echo "FAIL: $1" >&2; FAIL=$((FAIL + 1)); }
69
-
70
- start_listener() {
71
- REQ_LOG=$(mktemp); TMPFILES+=("$REQ_LOG")
72
- LISTENER_PORT=$((39400 + RANDOM % 100))
73
- python3 - "$LISTENER_PORT" "$REQ_LOG" <<'PY' &
74
- import sys, http.server, json
75
- port = int(sys.argv[1])
76
- log_path = sys.argv[2]
77
- class H(http.server.BaseHTTPRequestHandler):
78
- def log_message(self, *a, **k): pass
79
- def do_POST(self):
80
- n = int(self.headers.get('Content-Length','0') or 0)
81
- body = self.rfile.read(n).decode('utf-8','replace')
82
- with open(log_path, 'a', encoding='utf-8') as f:
83
- f.write(self.path + '\t' + body + '\n')
84
- self.send_response(200)
85
- self.send_header('Content-Type','application/json')
86
- self.end_headers()
87
- if self.path == '/api/admin/claude-sessions':
88
- self.wfile.write(json.dumps({"sessionId":"rec00001-feedfeed","pid":99999}).encode('utf-8'))
89
- else:
90
- self.wfile.write(json.dumps({"ok": True}).encode('utf-8'))
91
- http.server.HTTPServer(('127.0.0.1', port), H).serve_forever()
92
- PY
93
- LISTENER_PIDS+=("$!")
94
- for _ in $(seq 1 20); do
95
- if curl -sS --max-time 1 -X POST "http://127.0.0.1:${LISTENER_PORT}/ping" -d '{}' >/dev/null 2>&1; then
96
- break
97
- fi
98
- sleep 0.1
99
- done
100
- : > "$REQ_LOG"
101
- }
102
-
103
- run_hook() {
104
- local role="$1"; local specialist="$2"; local stdin_json="$3"
105
- local stderr_file; stderr_file=$(mktemp); TMPFILES+=("$stderr_file")
106
- local stdout_file; stdout_file=$(mktemp); TMPFILES+=("$stdout_file")
107
- printf '%s' "$stdin_json" | \
108
- MAXY_SESSION_ROLE="$role" \
109
- MAXY_SPECIALIST="$specialist" \
110
- MAXY_UI_INTERNAL_PORT="$LISTENER_PORT" \
111
- ACCOUNT_ID="$ACCT_ID" \
112
- bash "$HOOK" >"$stdout_file" 2>"$stderr_file"
113
- HOOK_RC=$?
114
- HOOK_STDERR=$(cat "$stderr_file")
115
- HOOK_STDOUT=$(cat "$stdout_file")
116
- sleep 0.1 # let listener flush
117
- }
118
-
119
- # Extract the `line` field of every log-ingest POST in REQ_LOG.
120
- ingest_lines() {
121
- grep -E '^/api/admin/log-ingest ' "$REQ_LOG" 2>/dev/null | python3 -c '
122
- import sys, json
123
- for raw in sys.stdin:
124
- try:
125
- _, body = raw.rstrip("\n").split("\t", 1)
126
- d = json.loads(body)
127
- if isinstance(d, dict) and isinstance(d.get("line"), str):
128
- print(d["line"])
129
- except Exception:
130
- pass
131
- ' || true
132
- }
133
-
134
- start_listener
135
- # Simple two-turn transcript with timestamps. One user, one assistant text.
136
- TRANSCRIPT=$(mktemp); TMPFILES+=("$TRANSCRIPT")
137
- {
138
- printf '{"type":"user","timestamp":"2026-05-19T21:27:00.000Z","message":{"role":"user","content":"hello operator"}}\n'
139
- printf '{"type":"assistant","timestamp":"2026-05-19T21:27:01.000Z","message":{"id":"msg_001","role":"assistant","content":[{"type":"text","text":"hello, assistant reply"}]}}\n'
140
- } > "$TRANSCRIPT"
141
- ENVELOPE=$(python3 -c '
142
- import json, sys
143
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
144
- ' "$OP_ID" "$TRANSCRIPT")
145
-
146
- # --- Case 1: role != admin → trigger-skipped reason=role-not-admin -----
147
- : > "$REQ_LOG"
148
- run_hook "public" "" "$ENVELOPE"
149
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-1 rc=$HOOK_RC"
150
- [[ -z "$HOOK_STDERR" ]] || fail "case-1 stderr must be empty, got: $HOOK_STDERR"
151
- if ingest_lines | grep -qE '^trigger-skipped sessionId=.* reason=role-not-admin$'; then
152
- pass "case-1 role=public → trigger-skipped reason=role-not-admin emitted"
153
- else
154
- fail "case-1 expected trigger-skipped role-not-admin, got log-ingest lines: $(ingest_lines)"
155
- fi
156
- if grep -qE '^/api/admin/claude-sessions ' "$REQ_LOG"; then
157
- fail "case-1 recorder-spawn must NOT be called when role!=admin"
158
- else
159
- pass "case-1 no recorder-spawn POST"
160
- fi
161
-
162
- # --- Case 2: MAXY_SPECIALIST=database-operator → trigger-skipped is-recorder ----
163
- : > "$REQ_LOG"
164
- run_hook "admin" "database-operator" "$ENVELOPE"
165
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-2 rc=$HOOK_RC"
166
- [[ -z "$HOOK_STDERR" ]] || fail "case-2 stderr must be empty, got: $HOOK_STDERR"
167
- if ingest_lines | grep -qE '^trigger-skipped sessionId=.* reason=is-recorder$'; then
168
- pass "case-2 MAXY_SPECIALIST=database-operator → trigger-skipped reason=is-recorder emitted"
169
- else
170
- fail "case-2 expected trigger-skipped is-recorder, got: $(ingest_lines)"
171
- fi
172
-
173
- # --- Case 3: empty stdin → trigger-skipped reason=empty-stdin ------------
174
- : > "$REQ_LOG"
175
- run_hook "admin" "" ""
176
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-3 rc=$HOOK_RC"
177
- if ingest_lines | grep -qE '^trigger-skipped sessionId=.* reason=empty-stdin$'; then
178
- pass "case-3 empty stdin → trigger-skipped reason=empty-stdin emitted"
179
- else
180
- fail "case-3 expected trigger-skipped empty-stdin, got: $(ingest_lines)"
181
- fi
182
-
183
- # --- Case 4: missing transcript_path → trigger-skipped missing-transcript -
184
- : > "$REQ_LOG"
185
- BAD_ENVELOPE=$(python3 -c 'import json,sys; print(json.dumps({"session_id": sys.argv[1]}))' "$OP_ID")
186
- run_hook "admin" "" "$BAD_ENVELOPE"
187
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-4 rc=$HOOK_RC"
188
- if ingest_lines | grep -qE "^trigger-skipped sessionId=${OP_ID} reason=missing-transcript$"; then
189
- pass "case-4 missing transcript → trigger-skipped reason=missing-transcript emitted"
190
- else
191
- fail "case-4 expected trigger-skipped missing-transcript, got: $(ingest_lines)"
192
- fi
193
-
194
- # --- Case 5 (Task 177): ordered-turns happy path -----------------------
195
- # Transcript with four turns: user-A, asst-A-reply, user-B, asst-B-reply.
196
- # Envelope `turns` array must carry exactly that order, oldest first.
197
- # Envelope shape: { turns, sessionId, accountId, occurredAt } — no
198
- # top-level operatorMessage / assistantReply.
199
- ORDERED_TRANSCRIPT=$(mktemp); TMPFILES+=("$ORDERED_TRANSCRIPT")
200
- {
201
- printf '{"type":"user","timestamp":"2026-05-19T21:27:42.258Z","message":{"role":"user","content":"New Real Agent session"}}\n'
202
- printf '{"type":"assistant","timestamp":"2026-05-19T21:27:45.158Z","message":{"id":"msg_a","role":"assistant","content":[{"type":"text","text":"Session noted. What would you like to work on?"}]}}\n'
203
- printf '{"type":"user","timestamp":"2026-05-19T21:27:57.314Z","message":{"role":"user","content":"you tell me"}}\n'
204
- printf '{"type":"assistant","timestamp":"2026-05-19T21:28:00.847Z","message":{"id":"msg_b","role":"assistant","content":[{"type":"text","text":"One question to anchor the next move: what is your name?"}]}}\n'
205
- } > "$ORDERED_TRANSCRIPT"
206
- ORDERED_ENVELOPE=$(python3 -c '
207
- import json, sys
208
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
209
- ' "$OP_ID" "$ORDERED_TRANSCRIPT")
210
- : > "$REQ_LOG"
211
- run_hook "admin" "" "$ORDERED_ENVELOPE"
212
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-5 rc=$HOOK_RC stderr=$HOOK_STDERR"
213
- [[ -z "$HOOK_STDERR" ]] || fail "case-5 stderr must be empty, got: $HOOK_STDERR"
214
-
215
- # 5a. exactly ONE `trigger` log-ingest line carrying the operator session id.
216
- TRIGGER_COUNT=$(ingest_lines | grep -cE "^trigger sessionId=${OP_ID} turnIndex=0 transcriptBytes=[0-9]+ conversationBytes=[0-9]+$" || true)
217
- if [[ "$TRIGGER_COUNT" -eq 1 ]]; then
218
- pass "case-5a exactly one trigger line via log-ingest"
219
- else
220
- fail "case-5a expected exactly 1 trigger line, got $TRIGGER_COUNT (lines: $(ingest_lines))"
221
- fi
222
-
223
- # 5b. exactly ONE POST to /api/admin/claude-sessions
224
- RECORDER_LINES=$(grep -cE '^/api/admin/claude-sessions ' "$REQ_LOG" || true)
225
- if [[ "$RECORDER_LINES" -eq 1 ]]; then
226
- pass "case-5b exactly one recorder-spawn POST observed"
227
- else
228
- fail "case-5b expected exactly 1 recorder-spawn POST, got $RECORDER_LINES"
229
- fi
230
-
231
- # 5c. (Task 216) initialMessage is the literal post-frontmatter body of
232
- # `database-operator.md`. Task 214 stripped the `{schema}` /
233
- # `{conversation}` / `{accountId}` placeholders from the template,
234
- # so the substitution layer in the hook is now a no-op pass-through
235
- # and the agent body lands in `initialMessage` verbatim. Assertions
236
- # are property-based — they guard the regression class (substitution
237
- # layer reintroduced, transcript leak, schema content leak, outer
238
- # spawn-body drift) without coupling to the prompt's exact wording.
239
- RECORDER_BODY=$(grep -E '^/api/admin/claude-sessions ' "$REQ_LOG" | head -1 | cut -f2-)
240
- BODY_OK=$(printf '%s' "$RECORDER_BODY" | python3 -c '
241
- import sys, json
242
- op = sys.argv[1]
243
- acct = sys.argv[2]
244
- try:
245
- outer = json.load(sys.stdin)
246
- msg = outer.get("initialMessage")
247
- if not isinstance(msg, str):
248
- print("no:initialMessage-not-string:"+repr(type(msg).__name__))
249
- sys.exit(0)
250
- conds = []
251
- # Template-header anchor — the post-Task-214 body opens with this
252
- # sentence. A different opener means the agent file was renamed,
253
- # rewritten, or its frontmatter parse drifted.
254
- conds.append(("starts-with-template",
255
- msg.startswith("You are an expert Neo4J graph operator.")))
256
- # Placeholder-reintroduction guards. If any of these reappear, Task
257
- # 214 was reverted without updating the template.
258
- conds.append(("no-placeholder-schema", "{schema}" not in msg))
259
- conds.append(("no-placeholder-conversation", "{conversation}" not in msg))
260
- conds.append(("no-placeholder-accountid", "{accountId}" not in msg))
261
- # Transcript-leak guard. The walker still parses the JSONL, but its
262
- # `turns` array no longer reaches `initialMessage`. Any `\nuser: ` or
263
- # `\nassistant: ` line means substitution crept back in.
264
- conds.append(("no-transcript-user", "\nuser: " not in msg))
265
- conds.append(("no-transcript-asst", "\nassistant: " not in msg))
266
- # Schema-leak guard. schema-base.md is still read (and its bytes are
267
- # reported in the substitution log line), but its content is no
268
- # longer substituted into the body. `Schema Reference` is the
269
- # opening header of `platform/plugins/memory/references/schema-base.md`
270
- # (`# Schema Reference — Base`); its appearance in the body means
271
- # `{schema}` re-entered the template.
272
- conds.append(("no-schema-content", "Schema Reference" not in msg))
273
- # AccountId-value leak guard. If `{accountId}` is re-added in
274
- # isolation (no `{schema}` / `{conversation}` reintroduction), the
275
- # placeholder guards above pass (because substitution replaces the
276
- # token cleanly), but the operator accountId still leaks into the
277
- # prompt. The test fixture passes ACCOUNT_ID="$ACCT_ID" into the
278
- # hook env, so the literal acct id must NOT appear anywhere in the
279
- # body.
280
- conds.append(("no-accountid-value-leak", acct not in msg))
281
- # Pre-Task-177 envelope-shape guard. `initialMessage` is a plain
282
- # string, never a JSON envelope.
283
- conds.append(("no-leading-json", not msg.lstrip().startswith("{")))
284
- conds.append(("no-operatorMessage", "operatorMessage" not in msg))
285
- conds.append(("no-assistantReply", "assistantReply" not in msg))
286
- # Spawn-body outer-shape sanity (unchanged from Task 195/200).
287
- conds.append(("outer-specialist", outer.get("specialist") == "database-operator"))
288
- conds.append(("outer-no-model-key", "model" not in outer))
289
- conds.append(("outer-adminSessionId", outer.get("adminSessionId") == op))
290
- conds.append(("outer-channel", outer.get("channel") == "browser"))
291
- failed = [name for name, ok in conds if not ok]
292
- print("yes" if not failed else "no:"+repr(failed))
293
- except Exception as e:
294
- print("parse-fail:"+str(e))
295
- ' "$OP_ID" "$ACCT_ID" 2>/dev/null)
296
- if [[ "$BODY_OK" == "yes" ]]; then
297
- pass "case-5c initialMessage is the literal template body (no placeholders, no transcript leak, no schema leak); outer spawn shape intact"
298
- else
299
- fail "case-5c literal-body shape wrong ($BODY_OK)"
300
- fi
301
-
302
- # 5h. (Task 195 + 199) exactly one `substitution` log-ingest line carrying
303
- # positive byte counts for schema / conversation / body AND
304
- # accountIdPresent=yes (the envelope's accountId is non-empty here —
305
- # run_hook always passes ACCOUNT_ID="$ACCT_ID" into the hook env).
306
- SUBST_LINE_COUNT=$(ingest_lines | grep -cE "^substitution sessionId=${OP_ID} schemaBytes=[1-9][0-9]* conversationBytes=[1-9][0-9]* bodyBytes=[1-9][0-9]* accountIdPresent=yes$" || true)
307
- if [[ "$SUBST_LINE_COUNT" -eq 1 ]]; then
308
- pass "case-5h substitution log line emitted once with positive byte counts and accountIdPresent=yes"
309
- else
310
- fail "case-5h expected exactly 1 substitution line w/ positive bytes + accountIdPresent=yes, got $SUBST_LINE_COUNT (lines: $(ingest_lines))"
311
- fi
312
-
313
- # 5i. (Task 195) substitution line lands between envelope and spawn-request.
314
- ALL_5I_LINES=$(ingest_lines)
315
- ENV_POS_5I=$(printf '%s\n' "$ALL_5I_LINES" | grep -nE "^envelope sessionId=${OP_ID}" | head -1 | cut -d: -f1)
316
- SUB_POS_5I=$(printf '%s\n' "$ALL_5I_LINES" | grep -nE "^substitution sessionId=${OP_ID}" | head -1 | cut -d: -f1)
317
- SPAWN_POS_5I=$(printf '%s\n' "$ALL_5I_LINES" | grep -nE "^spawn-request sessionId=${OP_ID}" | head -1 | cut -d: -f1)
318
- if [[ -n "$ENV_POS_5I" && -n "$SUB_POS_5I" && -n "$SPAWN_POS_5I" \
319
- && "$ENV_POS_5I" -lt "$SUB_POS_5I" && "$SUB_POS_5I" -lt "$SPAWN_POS_5I" ]]; then
320
- pass "case-5i envelope < substitution < spawn-request ordering preserved"
321
- else
322
- fail "case-5i ordering wrong (env=$ENV_POS_5I sub=$SUB_POS_5I spawn=$SPAWN_POS_5I)"
323
- fi
324
-
325
- # 5j. (Task 195) `initialMessageBytes` on spawn-request equals `bodyBytes`
326
- # on substitution — the two log lines are reading the same number.
327
- BODY_BYTES_5J=$(printf '%s\n' "$ALL_5I_LINES" | grep -E "^substitution sessionId=${OP_ID}" | head -1 \
328
- | sed -E 's/.*bodyBytes=([0-9]+).*/\1/')
329
- INIT_BYTES_5J=$(printf '%s\n' "$ALL_5I_LINES" | grep -E "^spawn-request sessionId=${OP_ID}" | head -1 \
330
- | sed -E 's/.*initialMessageBytes=([0-9]+).*/\1/')
331
- if [[ -n "$BODY_BYTES_5J" && "$BODY_BYTES_5J" == "$INIT_BYTES_5J" ]]; then
332
- pass "case-5j substitution.bodyBytes == spawn-request.initialMessageBytes (=$BODY_BYTES_5J)"
333
- else
334
- fail "case-5j bodyBytes ($BODY_BYTES_5J) does not match initialMessageBytes ($INIT_BYTES_5J)"
335
- fi
336
-
337
- # 5d. exactly ONE `spawn-request` log-ingest line
338
- SPAWN_REQ_COUNT=$(ingest_lines | grep -cE "^spawn-request sessionId=${OP_ID} specialist=database-operator initialMessageBytes=[0-9]+$" || true)
339
- if [[ "$SPAWN_REQ_COUNT" -eq 1 ]]; then
340
- pass "case-5d exactly one spawn-request line emitted"
341
- else
342
- fail "case-5d expected 1 spawn-request, got $SPAWN_REQ_COUNT (lines: $(ingest_lines))"
343
- fi
344
-
345
- # 5e. Task 177 observability — exactly ONE `[turn-recorder] envelope ...`
346
- # line emitted before `spawn-request`. The line carries turnsCount,
347
- # userTurns, assistantTurns, toolCallTurns.
348
- ENVELOPE_LINE_COUNT=$(ingest_lines | grep -cE "^envelope sessionId=${OP_ID} turnsCount=4 userTurns=2 assistantTurns=2 toolCallTurns=0$" || true)
349
- if [[ "$ENVELOPE_LINE_COUNT" -eq 1 ]]; then
350
- pass "case-5e envelope log line emitted with correct counts"
351
- else
352
- fail "case-5e expected exactly 1 envelope line w/ turnsCount=4 userTurns=2 assistantTurns=2 toolCallTurns=0, got $ENVELOPE_LINE_COUNT (lines: $(ingest_lines))"
353
- fi
354
-
355
- # 5f. envelope line precedes spawn-request line.
356
- ALL_LINES=$(ingest_lines)
357
- ENV_POS=$(printf '%s\n' "$ALL_LINES" | grep -nE "^envelope sessionId=${OP_ID}" | head -1 | cut -d: -f1)
358
- SPAWN_POS=$(printf '%s\n' "$ALL_LINES" | grep -nE "^spawn-request sessionId=${OP_ID}" | head -1 | cut -d: -f1)
359
- if [[ -n "$ENV_POS" && -n "$SPAWN_POS" && "$ENV_POS" -lt "$SPAWN_POS" ]]; then
360
- pass "case-5f envelope line precedes spawn-request"
361
- else
362
- fail "case-5f envelope line must precede spawn-request (env=$ENV_POS spawn=$SPAWN_POS)"
363
- fi
364
-
365
- # 5g. no legacy stderr emissions
366
- if echo "$HOOK_STDERR" | grep -qE 'spawn-with-input|spawn-with-stdin|turn-completed-graph-write fired'; then
367
- fail "case-5g legacy stderr line emitted: $HOOK_STDERR"
368
- else
369
- pass "case-5g no legacy stderr emissions"
370
- fi
371
-
372
- # --- Case 6: multi-block assistant collapse (walker coverage) ---------
373
- # Two assistant records sharing `message.id`, BOTH carrying text. The
374
- # walker's collapse branch (turn-completed-graph-write.sh, the
375
- # `msg_id in msg_id_to_turn_index` branch in the assistant arm) merges
376
- # them into ONE assistant turn whose text is the concatenation. Without
377
- # the collapse the walker would emit two assistant turns. Pre-Task-214
378
- # this was visible in `initialMessage` via a single `\nassistant: ...`
379
- # line carrying both fragments; post-Task-214 the transcript no longer
380
- # reaches the body, so coverage moves to the envelope log line —
381
- # `turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0` proves
382
- # the two assistant records collapsed to one (otherwise the line would
383
- # read assistantTurns=2). Fixture must use TEXT-bearing records on both
384
- # sides; a thinking-only first record would be skipped at line 202 of
385
- # the walker before the collapse branch ever runs, so the test would
386
- # only prove the skip path (already covered by case 9) instead of the
387
- # collapse path.
388
- MULTI_TRANSCRIPT=$(mktemp); TMPFILES+=("$MULTI_TRANSCRIPT")
389
- {
390
- printf '{"type":"user","timestamp":"2026-05-19T21:30:00.000Z","message":{"role":"user","content":"split your answer"}}\n'
391
- printf '{"type":"assistant","timestamp":"2026-05-19T21:30:01.000Z","message":{"id":"msg_multi","role":"assistant","content":[{"type":"text","text":"part one"}]}}\n'
392
- printf '{"type":"assistant","timestamp":"2026-05-19T21:30:02.000Z","message":{"id":"msg_multi","role":"assistant","content":[{"type":"text","text":" and part two"}]}}\n'
393
- } > "$MULTI_TRANSCRIPT"
394
- MULTI_ENVELOPE=$(python3 -c '
395
- import json, sys
396
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
397
- ' "$OP_ID" "$MULTI_TRANSCRIPT")
398
- : > "$REQ_LOG"
399
- run_hook "admin" "" "$MULTI_ENVELOPE"
400
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-6 rc=$HOOK_RC stderr=$HOOK_STDERR"
401
- echo "case-6 SKIPPED — dormant since Task 214; transcript no longer substituted into initialMessage. Walker collapse now asserted via envelope log line below."
402
- ENV_LINE_6=$(ingest_lines | grep -cE "^envelope sessionId=${OP_ID} turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0$" || true)
403
- if [[ "$ENV_LINE_6" -eq 1 ]]; then
404
- pass "case-6 envelope log line counts multi-block collapse: turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0"
405
- else
406
- fail "case-6 expected 1 envelope line w/ turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0, got $ENV_LINE_6 (lines: $(ingest_lines))"
407
- fi
408
-
409
- # --- Case 7: tool-call pairing (walker coverage) ----------------------
410
- # Assistant tool_use + user tool_result fold into one `toolCalls` entry
411
- # on the assistant turn; the user-record carrying only the tool_result
412
- # never creates its own user turn. Pre-Task-214 this was visible in
413
- # `initialMessage` as "exactly one user line, two assistant lines (one
414
- # empty-bodied)". Post-Task-214 the transcript no longer reaches the
415
- # body, so the pairing is now only observable through the envelope log
416
- # line — `turnsCount=3 userTurns=1 assistantTurns=2 toolCallTurns=1`
417
- # proves the tool_result was absorbed (no extra user turn) and the
418
- # tool_use was paired on the owning assistant turn (toolCallTurns=1).
419
- TOOL_TRANSCRIPT=$(mktemp); TMPFILES+=("$TOOL_TRANSCRIPT")
420
- python3 - "$TOOL_TRANSCRIPT" <<'PY'
421
- import json, sys
422
- out = sys.argv[1]
423
- recs = [
424
- {"type":"user","timestamp":"2026-05-19T21:32:00.000Z","message":{"role":"user","content":"please write a node"}},
425
- {"type":"assistant","timestamp":"2026-05-19T21:32:01.000Z","message":{"id":"msg_tu","role":"assistant","content":[
426
- {"type":"tool_use","id":"toolu_001","name":"mcp__memory__memory-write","input":{"nodeType":"LocalBusiness","name":"Smalleys","nested":{"k":1}}}
427
- ]}},
428
- {"type":"user","timestamp":"2026-05-19T21:32:02.000Z","message":{"role":"user","content":[
429
- {"type":"tool_result","tool_use_id":"toolu_001","content":"ok: wrote LocalBusiness"}
430
- ]}},
431
- {"type":"assistant","timestamp":"2026-05-19T21:32:03.000Z","message":{"id":"msg_done","role":"assistant","content":[
432
- {"type":"text","text":"done"}
433
- ]}},
434
- ]
435
- with open(out,"w") as f:
436
- for r in recs: f.write(json.dumps(r)+"\n")
437
- PY
438
- TOOL_ENVELOPE=$(python3 -c '
439
- import json, sys
440
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
441
- ' "$OP_ID" "$TOOL_TRANSCRIPT")
442
- : > "$REQ_LOG"
443
- run_hook "admin" "" "$TOOL_ENVELOPE"
444
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-7 rc=$HOOK_RC stderr=$HOOK_STDERR"
445
- echo "case-7 SKIPPED — dormant since Task 214; transcript no longer substituted into initialMessage. Tool-call pairing now asserted via envelope log line below."
446
-
447
- # Envelope log line for case-7: turnsCount=3, userTurns=1, assistantTurns=2, toolCallTurns=1
448
- ENV_LINE_7=$(ingest_lines | grep -cE "^envelope sessionId=${OP_ID} turnsCount=3 userTurns=1 assistantTurns=2 toolCallTurns=1$" || true)
449
- if [[ "$ENV_LINE_7" -eq 1 ]]; then
450
- pass "case-7 envelope log line counts tool-call turn correctly: turnsCount=3 userTurns=1 assistantTurns=2 toolCallTurns=1"
451
- else
452
- fail "case-7 expected envelope line w/ turnsCount=3 userTurns=1 assistantTurns=2 toolCallTurns=1, got $ENV_LINE_7 (lines: $(ingest_lines))"
453
- fi
454
-
455
- # --- Case 8: long-conversation 100 turns (walker coverage) -----------
456
- # Transcript with 100 turns (50 user + 50 assistant, interleaved).
457
- # Pre-Task-214 the body had to contain all 100 transcript lines and
458
- # exceed 5_000 bytes. Post-Task-214 the transcript no longer reaches
459
- # the body, so the "no windowing" invariant is asserted via the
460
- # envelope log line: `turnsCount=100 userTurns=50 assistantTurns=50
461
- # toolCallTurns=0`. A flat or shrinking turnsCount means windowing or
462
- # truncation crept back into the walker.
463
- LONG_TRANSCRIPT=$(mktemp); TMPFILES+=("$LONG_TRANSCRIPT")
464
- python3 - "$LONG_TRANSCRIPT" <<'PY'
465
- import json, sys
466
- out = sys.argv[1]
467
- with open(out,"w") as f:
468
- for i in range(50):
469
- ts_u = f"2026-05-19T21:{(i*2)//60:02d}:{(i*2)%60:02d}.000Z"
470
- ts_a = f"2026-05-19T21:{(i*2+1)//60:02d}:{(i*2+1)%60:02d}.000Z"
471
- f.write(json.dumps({"type":"user","timestamp":ts_u,"message":{"role":"user","content":f"user msg {i}"}})+"\n")
472
- f.write(json.dumps({"type":"assistant","timestamp":ts_a,"message":{"id":f"msg_{i}","role":"assistant","content":[{"type":"text","text":f"asst reply {i}"}]}})+"\n")
473
- PY
474
- LONG_ENVELOPE=$(python3 -c '
475
- import json, sys
476
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
477
- ' "$OP_ID" "$LONG_TRANSCRIPT")
478
- : > "$REQ_LOG"
479
- run_hook "admin" "" "$LONG_ENVELOPE"
480
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-8 rc=$HOOK_RC stderr=$HOOK_STDERR"
481
- echo "case-8 SKIPPED — dormant since Task 214; transcript no longer substituted into initialMessage. No-windowing invariant now asserted via envelope log line below."
482
- ENV_LINE_8=$(ingest_lines | grep -cE "^envelope sessionId=${OP_ID} turnsCount=100 userTurns=50 assistantTurns=50 toolCallTurns=0$" || true)
483
- if [[ "$ENV_LINE_8" -eq 1 ]]; then
484
- pass "case-8 envelope log line counts 100-turn transcript: turnsCount=100 userTurns=50 assistantTurns=50 toolCallTurns=0 (no windowing)"
485
- else
486
- fail "case-8 expected 1 envelope line w/ turnsCount=100 userTurns=50 assistantTurns=50 toolCallTurns=0, got $ENV_LINE_8 (lines: $(ingest_lines))"
487
- fi
488
-
489
- # --- Case 9 (Task 177): empty-skip ------------------------------------
490
- # Transcript with no text content and no tool_use → turns array is empty
491
- # → trigger-skipped reason=conversation-empty, no spawn POST.
492
- EMPTY_TRANSCRIPT=$(mktemp); TMPFILES+=("$EMPTY_TRANSCRIPT")
493
- {
494
- printf '{"type":"system","summary":"noop"}\n'
495
- printf '{"type":"assistant","timestamp":"2026-05-19T21:40:00.000Z","message":{"id":"msg_t","role":"assistant","content":[{"type":"thinking","thinking":"silent"}]}}\n'
496
- } > "$EMPTY_TRANSCRIPT"
497
- EMPTY_ENVELOPE=$(python3 -c '
498
- import json, sys
499
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
500
- ' "$OP_ID" "$EMPTY_TRANSCRIPT")
501
- : > "$REQ_LOG"
502
- run_hook "admin" "" "$EMPTY_ENVELOPE"
503
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-9 rc=$HOOK_RC"
504
- if ingest_lines | grep -qE "^trigger-skipped sessionId=${OP_ID} reason=conversation-empty$"; then
505
- pass "case-9 thinking-only transcript → trigger-skipped reason=conversation-empty"
506
- else
507
- fail "case-9 expected trigger-skipped conversation-empty, got: $(ingest_lines)"
508
- fi
509
- if grep -qE '^/api/admin/claude-sessions ' "$REQ_LOG"; then
510
- fail "case-9 recorder-spawn must NOT be called when conversation-empty"
511
- else
512
- pass "case-9 no recorder-spawn POST"
513
- fi
514
-
515
- # --- Case 10: escaping round-trip (walker coverage) -------------------
516
- # User and assistant text with quotes / backslashes / em-dash / emoji.
517
- # Pre-Task-214 these had to survive interpolation into the body and
518
- # appear verbatim. Post-Task-214 the transcript no longer reaches the
519
- # body, but the walker still parses the JSONL — the envelope log line
520
- # `turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0` proves
521
- # the walker did NOT crash on the special-character text. A parser
522
- # regression would either drop a turn (zero counts) or fail the hook
523
- # (non-zero rc).
524
- ESC_TRANSCRIPT=$(mktemp); TMPFILES+=("$ESC_TRANSCRIPT")
525
- python3 - "$ESC_TRANSCRIPT" <<'PY'
526
- import json, sys
527
- out = sys.argv[1]
528
- op_msg = 'hello "world"\\nline2\nlast — café 🦊'
529
- asst_msg = 'reply with backslash \\\\ and newline\nhere'
530
- recs = [
531
- {"type":"user","timestamp":"2026-05-19T21:50:00.000Z","message":{"role":"user","content":op_msg}},
532
- {"type":"assistant","timestamp":"2026-05-19T21:50:01.000Z","message":{"id":"msg_esc","role":"assistant","content":[{"type":"text","text":asst_msg}]}},
533
- ]
534
- with open(out,"w") as f:
535
- for r in recs: f.write(json.dumps(r)+"\n")
536
- PY
537
- ESC_ENVELOPE=$(python3 -c '
538
- import json, sys
539
- print(json.dumps({"session_id": sys.argv[1], "transcript_path": sys.argv[2]}))
540
- ' "$OP_ID" "$ESC_TRANSCRIPT")
541
- : > "$REQ_LOG"
542
- run_hook "admin" "" "$ESC_ENVELOPE"
543
- [[ "$HOOK_RC" -eq 0 ]] || fail "case-10 rc=$HOOK_RC stderr=$HOOK_STDERR"
544
- echo "case-10 SKIPPED — dormant since Task 214; transcript no longer substituted into initialMessage. Walker-survives-special-chars now asserted via envelope log line below."
545
- ENV_LINE_10=$(ingest_lines | grep -cE "^envelope sessionId=${OP_ID} turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0$" || true)
546
- if [[ "$ENV_LINE_10" -eq 1 ]]; then
547
- pass "case-10 envelope log line counts special-character transcript: turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0 (walker survived quotes/backslashes/em-dash/emoji)"
548
- else
549
- fail "case-10 expected 1 envelope line w/ turnsCount=2 userTurns=1 assistantTurns=1 toolCallTurns=0, got $ENV_LINE_10 (lines: $(ingest_lines))"
550
- fi
551
-
552
- # --- Case 11 (Task 199): missing accountId → accountIdPresent=no, no crash ---
553
- # When ACCOUNT_ID env is empty, the envelope's accountId field is "" and
554
- # the {accountId} placeholder collapses to an empty string. The hook does
555
- # NOT loud-fail (the agent's new clarifier sentence + the server-side
556
- # validator on the writers handle the bad signal). The substitution log
557
- # line emits accountIdPresent=no — the regression signature for upstream
558
- # envelope-shape drift.
559
- : > "$REQ_LOG"
560
- NO_ACCT_STDIN_FILE=$(mktemp); TMPFILES+=("$NO_ACCT_STDIN_FILE")
561
- printf '%s' "$ORDERED_ENVELOPE" > "$NO_ACCT_STDIN_FILE"
562
- NO_ACCT_STDERR=$(mktemp); TMPFILES+=("$NO_ACCT_STDERR")
563
- NO_ACCT_STDOUT=$(mktemp); TMPFILES+=("$NO_ACCT_STDOUT")
564
- MAXY_SESSION_ROLE="admin" \
565
- MAXY_SPECIALIST="" \
566
- MAXY_UI_INTERNAL_PORT="$LISTENER_PORT" \
567
- ACCOUNT_ID="" \
568
- bash "$HOOK" <"$NO_ACCT_STDIN_FILE" >"$NO_ACCT_STDOUT" 2>"$NO_ACCT_STDERR"
569
- NO_ACCT_RC=$?
570
- sleep 0.1
571
- [[ "$NO_ACCT_RC" -eq 0 ]] || fail "case-11 rc=$NO_ACCT_RC stderr=$(cat "$NO_ACCT_STDERR")"
572
- [[ -z "$(cat "$NO_ACCT_STDERR")" ]] || fail "case-11 stderr must be empty, got: $(cat "$NO_ACCT_STDERR")"
573
-
574
- # 11a. substitution log line carries accountIdPresent=no
575
- SUBST_NO_COUNT=$(ingest_lines | grep -cE "^substitution sessionId=${OP_ID} schemaBytes=[1-9][0-9]* conversationBytes=[1-9][0-9]* bodyBytes=[1-9][0-9]* accountIdPresent=no$" || true)
576
- if [[ "$SUBST_NO_COUNT" -eq 1 ]]; then
577
- pass "case-11a empty ACCOUNT_ID → substitution log carries accountIdPresent=no"
578
- else
579
- fail "case-11a expected 1 substitution line w/ accountIdPresent=no, got $SUBST_NO_COUNT (lines: $(ingest_lines))"
580
- fi
581
-
582
- # 11b. recorder spawn still fires — the hook does NOT loud-fail on absent
583
- # accountId. Pre-Task-214 the {accountId} placeholder collapsed to
584
- # empty backticks in the body. Post-Task-214 the template has no
585
- # {accountId} placeholder, so the body is the literal template
586
- # regardless of ACCOUNT_ID — the regression surface for "hook
587
- # survives empty ACCOUNT_ID" is now 11a (substitution log line
588
- # carries accountIdPresent=no) plus the spawn POST landing at all.
589
- echo "case-11b SKIPPED — dormant since Task 214; {accountId} placeholder removed from template, body is literal regardless of ACCOUNT_ID. Survival on empty accountId is now asserted by 11a (substitution log line) + spawn-POST presence below."
590
- NO_ACCT_POST=$(grep -cE '^/api/admin/claude-sessions ' "$REQ_LOG" || true)
591
- if [[ "$NO_ACCT_POST" -eq 1 ]]; then
592
- pass "case-11b spawn POST fires on empty ACCOUNT_ID (hook does not loud-fail)"
593
- else
594
- fail "case-11b expected 1 spawn POST on empty ACCOUNT_ID, got $NO_ACCT_POST"
595
- fi
596
-
597
- # --- Summary ------------------------------------------------------------
598
- echo "---"
599
- echo "PASSED: $PASS FAILED: $FAIL"
600
- [[ "$FAIL" -eq 0 ]] || exit 1
601
- exit 0