@mirnoorata/codexa 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +110 -31
  2. package/dist/cli/hooks.js +11 -6
  3. package/dist/cli/hooks.js.map +1 -1
  4. package/dist/cli.js +13 -4
  5. package/dist/cli.js.map +1 -1
  6. package/dist/eval/scoring.js +17 -0
  7. package/dist/eval/scoring.js.map +1 -1
  8. package/dist/implicit-baseline.d.ts +8 -0
  9. package/dist/implicit-baseline.js +94 -0
  10. package/dist/implicit-baseline.js.map +1 -0
  11. package/dist/init.d.ts +3 -0
  12. package/dist/init.js +129 -15
  13. package/dist/init.js.map +1 -1
  14. package/dist/mcp/compaction.d.ts +1 -0
  15. package/dist/mcp/compaction.js +24 -0
  16. package/dist/mcp/compaction.js.map +1 -1
  17. package/dist/mcp/envelope.d.ts +4 -1
  18. package/dist/mcp/envelope.js +45 -5
  19. package/dist/mcp/envelope.js.map +1 -1
  20. package/dist/mcp/prompts.d.ts +1 -1
  21. package/dist/mcp/prompts.js +5 -2
  22. package/dist/mcp/prompts.js.map +1 -1
  23. package/dist/mcp/tool-registry.d.ts +20 -19
  24. package/dist/mcp/tool-registry.js +24 -19
  25. package/dist/mcp/tool-registry.js.map +1 -1
  26. package/dist/mcp/tools.d.ts +1 -0
  27. package/dist/mcp/tools.js +11 -2
  28. package/dist/mcp/tools.js.map +1 -1
  29. package/dist/mcp-tool-catalog.d.ts +1 -1
  30. package/dist/mcp-tool-catalog.js +1 -1
  31. package/dist/mcp-tool-catalog.js.map +1 -1
  32. package/dist/mcp.js +10 -5
  33. package/dist/mcp.js.map +1 -1
  34. package/dist/query/post-edit/decision.d.ts +1 -0
  35. package/dist/query/post-edit/decision.js +13 -4
  36. package/dist/query/post-edit/decision.js.map +1 -1
  37. package/dist/query/post-edit.js +46 -16
  38. package/dist/query/post-edit.js.map +1 -1
  39. package/dist/task-snapshots.js +29 -0
  40. package/dist/task-snapshots.js.map +1 -1
  41. package/dist/types.d.ts +2 -0
  42. package/dist/types.js.map +1 -1
  43. package/integrations/.claude-plugin/marketplace.json +23 -0
  44. package/integrations/claude-code/.claude-plugin/plugin.json +16 -0
  45. package/integrations/claude-code/.mcp.json +8 -0
  46. package/integrations/claude-code/README.md +177 -0
  47. package/integrations/claude-code/commands/codexa-brief.md +14 -0
  48. package/integrations/claude-code/commands/codexa-impact.md +14 -0
  49. package/integrations/claude-code/commands/codexa-plan.md +20 -0
  50. package/integrations/claude-code/commands/codexa-review.md +23 -0
  51. package/integrations/claude-code/commands/codexa-status.md +10 -0
  52. package/integrations/claude-code/hooks/hooks.json +39 -0
  53. package/integrations/claude-code/scripts/cmd/brief.sh +18 -0
  54. package/integrations/claude-code/scripts/cmd/impact.sh +35 -0
  55. package/integrations/claude-code/scripts/cmd/lib.sh +136 -0
  56. package/integrations/claude-code/scripts/cmd/plan.sh +52 -0
  57. package/integrations/claude-code/scripts/cmd/review.sh +66 -0
  58. package/integrations/claude-code/scripts/cmd/status.sh +52 -0
  59. package/integrations/claude-code/scripts/codexa-mcp.js +111 -0
  60. package/integrations/claude-code/scripts/lib/codexa-repo.sh +773 -0
  61. package/integrations/claude-code/scripts/pre-edit.sh +116 -0
  62. package/integrations/claude-code/scripts/session-start.sh +201 -0
  63. package/integrations/claude-code/scripts/stop.sh +443 -0
  64. package/integrations/claude-code/tests/cmd-smoke.sh +310 -0
  65. package/integrations/claude-code/tests/hook-smoke.sh +1412 -0
  66. package/package.json +6 -3
  67. package/plugins/codexa/.codex-plugin/plugin.json +1 -1
@@ -0,0 +1,773 @@
1
+ #!/usr/bin/env bash
2
+ # Shared helpers for claudio hooks. Never write to the user's repo. Fail safe —
3
+ # if anything goes wrong, exit 0 so the session never gets blocked.
4
+ #
5
+ # All functions:
6
+ # - take primitive args (paths, tool names) and return via stdout
7
+ # - never read stdin (hook scripts read stdin once, then pass fields)
8
+ # - emit diagnostic output to stderr under CLAUDIO_DEBUG=1
9
+
10
+ set -u
11
+
12
+ # Resolve how to invoke Codexa. Priority:
13
+ # 1. $CODEXA_CLI env var (explicit path to dist/cli.js) — user override.
14
+ # 2. <checkout>/dist/cli.js walked up from this script — when the plugin
15
+ # is loaded directly from a codexa checkout via --plugin-dir.
16
+ # 3. `codexa` on $PATH — when the user ran `npm install -g @mirnoorata/codexa`
17
+ # (the supported path once the plugin is copied into the Claude Code
18
+ # plugin cache, where the walk-up no longer points at a real checkout).
19
+ #
20
+ # _CODEXA_INVOKE is a bash array — the full argv to launch Codexa. Hooks
21
+ # invoke it via `"${_CODEXA_INVOKE[@]}" <codexa args...>`.
22
+ NODE_BIN="${CLAUDIO_NODE_BIN:-node}"
23
+ _codexa_lib_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd -P)" || _codexa_lib_dir=""
24
+ _codexa_root_guess="$(cd "${_codexa_lib_dir}/../../../.." 2>/dev/null && pwd -P)" || _codexa_root_guess=""
25
+ _codexa_cli_guess="${_codexa_root_guess}/dist/cli.js"
26
+ _CODEXA_INVOKE=()
27
+ if [[ -n "${CODEXA_CLI:-}" ]]; then
28
+ _CODEXA_INVOKE=("$NODE_BIN" "$CODEXA_CLI")
29
+ elif [[ -f "$_codexa_cli_guess" ]]; then
30
+ CODEXA_CLI="$_codexa_cli_guess"
31
+ _CODEXA_INVOKE=("$NODE_BIN" "$CODEXA_CLI")
32
+ elif command -v codexa >/dev/null 2>&1; then
33
+ CODEXA_CLI="$(command -v codexa)"
34
+ _CODEXA_INVOKE=("$CODEXA_CLI")
35
+ else
36
+ CODEXA_CLI=""
37
+ fi
38
+
39
+ claudio_log() {
40
+ if [[ "${CLAUDIO_DEBUG:-0}" == "1" ]]; then
41
+ printf '[claudio] %s\n' "$*" >&2
42
+ fi
43
+ }
44
+
45
+ claudio_is_wired_repo() {
46
+ local repo="${1:-}"
47
+ [[ -z "$repo" ]] && return 1
48
+ [[ ! -d "$repo" ]] && return 1
49
+ python3 - "$repo" <<'PY' 2>/dev/null
50
+ import os
51
+ import stat
52
+ import sys
53
+
54
+ repo = sys.argv[1]
55
+ repo_fd = codex_fd = config_fd = None
56
+ try:
57
+ repo_fd = os.open(repo, os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC)
58
+ codex_fd = os.open(".codex", os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC, dir_fd=repo_fd)
59
+ config_fd = os.open("config.toml", os.O_RDONLY | os.O_NOFOLLOW | os.O_CLOEXEC, dir_fd=codex_fd)
60
+ st = os.fstat(config_fd)
61
+ sys.exit(0 if stat.S_ISREG(st.st_mode) else 1)
62
+ except OSError:
63
+ sys.exit(1)
64
+ finally:
65
+ for fd in (config_fd, codex_fd, repo_fd):
66
+ if fd is not None:
67
+ try:
68
+ os.close(fd)
69
+ except OSError:
70
+ pass
71
+ PY
72
+ }
73
+
74
+ # Print the nearest ancestor directory that contains a .codex/config.toml
75
+ # (a codexa-wired repo), or empty if not found. Refuses to traverse above
76
+ # the user's home directory or return "/".
77
+ claudio_find_codexa_repo() {
78
+ local start_dir="${1:-}"
79
+ [[ -z "$start_dir" ]] && return 0
80
+ [[ ! -d "$start_dir" ]] && return 0
81
+ local dir
82
+ dir="$(cd "$start_dir" 2>/dev/null && pwd -P)" || return 0
83
+ local home_real
84
+ home_real="$(cd "$HOME" 2>/dev/null && pwd -P)" || home_real=""
85
+ while [[ -n "$dir" && "$dir" != "/" ]]; do
86
+ if claudio_is_wired_repo "$dir"; then
87
+ printf '%s\n' "$dir"
88
+ return 0
89
+ fi
90
+ if [[ -n "$home_real" && "$dir" == "$home_real" ]]; then
91
+ return 0
92
+ fi
93
+ dir="$(dirname "$dir")"
94
+ done
95
+ return 0
96
+ }
97
+
98
+ # Return 0 if the argument names a code-editing tool we want to guard.
99
+ claudio_is_edit_tool() {
100
+ case "${1:-}" in
101
+ Edit|Write|MultiEdit|NotebookEdit)
102
+ return 0
103
+ ;;
104
+ esac
105
+ return 1
106
+ }
107
+
108
+ # Return 0 if the Codexa CLI is invocable (either node+cli.js or PATH codexa).
109
+ claudio_codexa_available() {
110
+ [[ ${#_CODEXA_INVOKE[@]} -gt 0 ]] || return 1
111
+ # If invoking via node, also require the node binary to be present.
112
+ if [[ "${_CODEXA_INVOKE[0]}" == "$NODE_BIN" ]]; then
113
+ command -v "$NODE_BIN" >/dev/null 2>&1 || return 1
114
+ fi
115
+ return 0
116
+ }
117
+
118
+ # Run the codexa CLI with a hard timeout and stdout capped. Args after the
119
+ # timeout are forwarded. Returns the CLI's exit status, or 124 on timeout.
120
+ # Stock macOS ships no coreutils `timeout`; python3 (already required by
121
+ # every parser in this lib) enforces the same bound there.
122
+ claudio_codexa_run() {
123
+ local seconds="$1"
124
+ shift
125
+ if ! claudio_codexa_available; then
126
+ claudio_log "codexa CLI unavailable; skipping"
127
+ return 127
128
+ fi
129
+ if command -v timeout >/dev/null 2>&1; then
130
+ timeout --preserve-status "${seconds}s" "${_CODEXA_INVOKE[@]}" "$@"
131
+ return $?
132
+ fi
133
+ python3 - "$seconds" "${_CODEXA_INVOKE[@]}" "$@" <<'PY'
134
+ import subprocess
135
+ import sys
136
+
137
+ try:
138
+ seconds = float(sys.argv[1])
139
+ except ValueError:
140
+ seconds = 30.0
141
+ try:
142
+ proc = subprocess.Popen(sys.argv[2:])
143
+ except OSError:
144
+ sys.exit(127)
145
+ try:
146
+ sys.exit(proc.wait(timeout=seconds))
147
+ except subprocess.TimeoutExpired:
148
+ proc.kill()
149
+ proc.wait()
150
+ sys.exit(124)
151
+ except KeyboardInterrupt:
152
+ proc.kill()
153
+ proc.wait()
154
+ sys.exit(130)
155
+ PY
156
+ }
157
+
158
+ # Lowercase helper that works on bash 3.2 (macOS): `${var,,}` is bash 4+
159
+ # and aborts the whole script with "bad substitution" on stock macOS.
160
+ claudio_lowercase() {
161
+ printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]'
162
+ }
163
+
164
+ # Emit a JSON string from a bash variable. Minimal escape set that covers
165
+ # backslash, double-quote, and control chars — sufficient for our own
166
+ # status/advisory text. Avoids taking a jq dependency.
167
+ claudio_json_escape() {
168
+ local value="${1:-}"
169
+ printf '%s' "$value" | awk '
170
+ BEGIN {
171
+ for (i = 0; i < 32; i++) esc[sprintf("%c", i)] = sprintf("\\u%04x", i)
172
+ esc["\\"] = "\\\\"
173
+ esc["\""] = "\\\""
174
+ esc["\n"] = "\\n"
175
+ esc["\t"] = "\\t"
176
+ esc["\r"] = "\\r"
177
+ }
178
+ {
179
+ if (NR > 1) {
180
+ printf "\\n"
181
+ }
182
+ for (i = 1; i <= length($0); i++) {
183
+ c = substr($0, i, 1)
184
+ if (c in esc) {
185
+ printf "%s", esc[c]
186
+ } else {
187
+ printf "%s", c
188
+ }
189
+ }
190
+ }
191
+ '
192
+ }
193
+
194
+ # Parse a top-level string field out of a JSON object read on stdin.
195
+ # Matches `"key"\s*:\s*"value"` with basic backslash-escape handling.
196
+ # Good enough for the hook schema (session_id/cwd/tool_name are strings);
197
+ # does not handle nested objects.
198
+ claudio_json_field() {
199
+ local key="${1:-}"
200
+ [[ -z "$key" ]] && return 0
201
+ python3 -c "
202
+ import json, sys
203
+ try:
204
+ data = json.load(sys.stdin)
205
+ except Exception:
206
+ sys.exit(0)
207
+ value = data.get('$key', '')
208
+ if isinstance(value, (dict, list)):
209
+ value = json.dumps(value)
210
+ elif value is None:
211
+ value = ''
212
+ sys.stdout.write(str(value))
213
+ "
214
+ }
215
+
216
+ # Extract a nested tool_input field (string). Example:
217
+ # claudio_tool_input_field file_path < payload
218
+ # Uses python3 for correctness — shell regex isn't safe on JSON.
219
+ claudio_tool_input_field() {
220
+ local key="${1:-}"
221
+ [[ -z "$key" ]] && return 0
222
+ python3 -c "
223
+ import json, sys
224
+ try:
225
+ data = json.load(sys.stdin)
226
+ except Exception:
227
+ sys.exit(0)
228
+ tool_input = data.get('tool_input') or {}
229
+ if not isinstance(tool_input, dict):
230
+ sys.exit(0)
231
+ value = tool_input.get('$key', '')
232
+ if isinstance(value, (dict, list)):
233
+ value = json.dumps(value)
234
+ elif value is None:
235
+ value = ''
236
+ sys.stdout.write(str(value))
237
+ "
238
+ }
239
+
240
+ # Resolve a path as absolute, collapsing symlinks. Prints nothing on failure.
241
+ # `target` is untrusted hook input (tool_input.file_path / notebook_path),
242
+ # so it must never be interpolated into shell or Python source. We pass it
243
+ # via argv[1] to python3, or via the `--` argument sentinel to coreutils
244
+ # realpath when available.
245
+ claudio_realpath() {
246
+ local target="${1:-}"
247
+ [[ -z "$target" ]] && return 0
248
+ if command -v realpath >/dev/null 2>&1; then
249
+ realpath -m -- "$target" 2>/dev/null
250
+ return 0
251
+ fi
252
+ python3 - "$target" <<'PY' 2>/dev/null
253
+ import os
254
+ import sys
255
+
256
+ try:
257
+ sys.stdout.write(os.path.realpath(sys.argv[1]))
258
+ except Exception:
259
+ sys.exit(0)
260
+ PY
261
+ }
262
+
263
+ # Display-sanitize a filesystem-controlled string for transcript / stderr
264
+ # output. Every non-printable byte is replaced by a visible escape sequence,
265
+ # AND the whole value is wrapped in shell quoting. That produces a single-
266
+ # line, single-token form so a hostile filename cannot inject extra lines
267
+ # or prose into hook output. `shlex.quote` alone is insufficient — it
268
+ # preserves real newlines inside single quotes.
269
+ claudio_display_path() {
270
+ local target="${1:-}"
271
+ [[ -z "$target" ]] && return 0
272
+ python3 - "$target" <<'PY' 2>/dev/null
273
+ import shlex
274
+ import sys
275
+
276
+ try:
277
+ raw = sys.argv[1]
278
+ # 1. Escape every control character (newline, tab, etc.) and non-ASCII
279
+ # byte into a visible backslash-escape sequence. unicode_escape
280
+ # always yields ASCII-safe text.
281
+ visible = raw.encode("unicode_escape").decode("ascii")
282
+ # 2. Wrap in shell-quoting for unambiguous display as one token.
283
+ sys.stdout.write(shlex.quote(visible))
284
+ except Exception:
285
+ sys.exit(0)
286
+ PY
287
+ }
288
+
289
+ # List direct-child directories of `start_dir` that contain a
290
+ # `.codex/config.toml`. One absolute path per line, sorted by most-recently-
291
+ # modified config first.
292
+ #
293
+ # EVERY path component is validated without symlink following. We open the
294
+ # child dir with O_NOFOLLOW | O_DIRECTORY, then `.codex` relative to that
295
+ # fd with the same flags, then `config.toml` relative to the `.codex` fd
296
+ # with O_NOFOLLOW and a regular-file fstat check. A hostile layout where
297
+ # `<child>/.codex` is itself a symlink (even if it resolves to a real
298
+ # `.codex` dir somewhere else) is rejected — never follow-through.
299
+ # Scan capped at MAX_CHILD_SCAN to keep the sweep cheap.
300
+ claudio_list_child_codexa_repos() {
301
+ local start_dir="${1:-}"
302
+ [[ -z "$start_dir" ]] && return 0
303
+ [[ ! -d "$start_dir" ]] && return 0
304
+ local dir
305
+ dir="$(cd "$start_dir" 2>/dev/null && pwd -P)" || return 0
306
+ python3 - "$dir" <<'PY' 2>/dev/null
307
+ import os
308
+ import stat
309
+ import sys
310
+
311
+ # Two independent caps so a parent with many non-wired dirs can't burn the
312
+ # hook budget on lstat/open attempts.
313
+ # MAX_ENTRIES_SCANNED — cap on directory entries we inspect at all,
314
+ # regardless of whether any are wired. Guards against 10k-child parents.
315
+ # MAX_WIRED_ACCEPTED — cap on wired repos we list in the output. Keeps
316
+ # the banner and downstream work bounded.
317
+ MAX_ENTRIES_SCANNED = 256
318
+ MAX_WIRED_ACCEPTED = 64
319
+
320
+ start = sys.argv[1]
321
+ try:
322
+ entries = sorted(os.listdir(start))
323
+ except OSError:
324
+ sys.exit(0)
325
+
326
+
327
+ def safe_check_wired(parent_path):
328
+ """Return the config.toml st_mtime if parent_path is a wired repo,
329
+ with every path component opened via O_NOFOLLOW so intermediate
330
+ symlinks (`.codex -> /elsewhere/.codex`) are rejected. Returns None
331
+ otherwise."""
332
+ parent_fd = None
333
+ codex_fd = None
334
+ config_fd = None
335
+ try:
336
+ try:
337
+ parent_fd = os.open(
338
+ parent_path,
339
+ os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC,
340
+ )
341
+ except OSError:
342
+ return None
343
+ try:
344
+ codex_fd = os.open(
345
+ ".codex",
346
+ os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC,
347
+ dir_fd=parent_fd,
348
+ )
349
+ except OSError:
350
+ return None
351
+ try:
352
+ config_fd = os.open(
353
+ "config.toml",
354
+ os.O_RDONLY | os.O_NOFOLLOW | os.O_CLOEXEC,
355
+ dir_fd=codex_fd,
356
+ )
357
+ except OSError:
358
+ return None
359
+ try:
360
+ st = os.fstat(config_fd)
361
+ except OSError:
362
+ return None
363
+ if not stat.S_ISREG(st.st_mode):
364
+ return None
365
+ return st.st_mtime
366
+ finally:
367
+ for fd in (config_fd, codex_fd, parent_fd):
368
+ if fd is not None:
369
+ try:
370
+ os.close(fd)
371
+ except OSError:
372
+ pass
373
+
374
+
375
+ candidates = []
376
+ scanned = 0
377
+ accepted = 0
378
+ for name in entries:
379
+ if name.startswith("."):
380
+ continue
381
+ # Parent-scan output is consumed by shell loops. Refuse control-character
382
+ # names instead of letting a newline or tab become a fake second repo.
383
+ if any(ord(ch) < 32 or ord(ch) == 127 for ch in name):
384
+ continue
385
+ if scanned >= MAX_ENTRIES_SCANNED:
386
+ break
387
+ scanned += 1
388
+ path = os.path.join(start, name)
389
+ try:
390
+ st = os.lstat(path)
391
+ except OSError:
392
+ continue
393
+ # Must be a real directory. Symlinks are never followed.
394
+ if not stat.S_ISDIR(st.st_mode):
395
+ continue
396
+ mtime = safe_check_wired(path)
397
+ if mtime is None:
398
+ continue
399
+ candidates.append((mtime, path))
400
+ accepted += 1
401
+ if accepted >= MAX_WIRED_ACCEPTED:
402
+ break
403
+
404
+ # Most-recently-modified .codex/config.toml first.
405
+ candidates.sort(key=lambda item: item[0], reverse=True)
406
+ for _, path in candidates:
407
+ sys.stdout.write(path)
408
+ sys.stdout.write("\n")
409
+ PY
410
+ }
411
+
412
+ # Locate the most-recent `<repo>/.codex/cache/codexa-tasks/latest.json`
413
+ # among the given repo paths. Every intermediate (.codex/, .codex/cache/,
414
+ # .codex/cache/codexa-tasks/, latest.json) is opened with O_NOFOLLOW so a
415
+ # hostile symlinked intermediate is rejected before we trust the mtime.
416
+ # Prints "<repo>\t<mtime_epoch>" for the top `limit` repos, skipping any
417
+ # without a valid snapshot.
418
+ claudio_rank_child_repos_by_snapshot() {
419
+ local limit="${1:-3}"
420
+ shift || return 0
421
+ [[ $# -eq 0 ]] && return 0
422
+ python3 - "$limit" "$@" <<'PY' 2>/dev/null
423
+ import os
424
+ import stat
425
+ import sys
426
+
427
+ limit = max(1, min(int(sys.argv[1] or "3"), 32))
428
+ repos = sys.argv[2:]
429
+
430
+
431
+ def safe_snapshot_mtime(repo):
432
+ fds = []
433
+
434
+ def open_component(name, parent_fd, *, is_dir):
435
+ flags = os.O_RDONLY | os.O_NOFOLLOW | os.O_CLOEXEC
436
+ if is_dir:
437
+ flags |= os.O_DIRECTORY
438
+ try:
439
+ fd = os.open(name, flags, dir_fd=parent_fd)
440
+ except OSError:
441
+ return None
442
+ fds.append(fd)
443
+ return fd
444
+
445
+ try:
446
+ try:
447
+ repo_fd = os.open(
448
+ repo,
449
+ os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC,
450
+ )
451
+ except OSError:
452
+ return None
453
+ fds.append(repo_fd)
454
+
455
+ codex_fd = open_component(".codex", repo_fd, is_dir=True)
456
+ if codex_fd is None:
457
+ return None
458
+ cache_fd = open_component("cache", codex_fd, is_dir=True)
459
+ if cache_fd is None:
460
+ return None
461
+ tasks_fd = open_component("codexa-tasks", cache_fd, is_dir=True)
462
+ if tasks_fd is None:
463
+ return None
464
+ snap_fd = open_component("latest.json", tasks_fd, is_dir=False)
465
+ if snap_fd is None:
466
+ return None
467
+ try:
468
+ st = os.fstat(snap_fd)
469
+ except OSError:
470
+ return None
471
+ if not stat.S_ISREG(st.st_mode):
472
+ return None
473
+ return st.st_mtime
474
+ finally:
475
+ for fd in fds:
476
+ try:
477
+ os.close(fd)
478
+ except OSError:
479
+ pass
480
+
481
+
482
+ scored = []
483
+ for repo in repos:
484
+ mtime = safe_snapshot_mtime(repo)
485
+ if mtime is None:
486
+ continue
487
+ scored.append((mtime, repo))
488
+
489
+ scored.sort(key=lambda item: item[0], reverse=True)
490
+ for mtime, repo in scored[:limit]:
491
+ sys.stdout.write(f"{repo}\t{int(mtime)}\n")
492
+ PY
493
+ }
494
+
495
+ # Wrap a block of repo-controlled text in an unambiguous data fence and
496
+ # sanitize each line so a malicious line that starts with "SYSTEM:",
497
+ # "USER:", etc. cannot look like a fresh prompt-turn boundary. Control
498
+ # characters are escaped to visible backslash forms. Size is capped at
499
+ # `max_bytes` (default 8192) — any excess is replaced by a truncation
500
+ # marker — so a large hostile README can't inflate session context.
501
+ #
502
+ # Usage: claudio_fence_block TITLE "raw text" [MAX_BYTES]
503
+ claudio_fence_block() {
504
+ local title="${1:-DATA}"
505
+ local text="${2:-}"
506
+ local max_bytes="${3:-8192}"
507
+ [[ -z "$text" ]] && return 0
508
+ python3 - "$title" "$text" "$max_bytes" <<'PY' 2>/dev/null
509
+ import sys
510
+
511
+ title = sys.argv[1]
512
+ text = sys.argv[2]
513
+ try:
514
+ max_bytes = int(sys.argv[3])
515
+ except ValueError:
516
+ max_bytes = 8192
517
+
518
+ if len(text) > max_bytes:
519
+ text = text[:max_bytes] + f"\n[...truncated, {len(text) - max_bytes} bytes omitted...]"
520
+
521
+ safe_lines = []
522
+ for raw_line in text.splitlines():
523
+ # 1. Escape every control character / non-ASCII byte so nothing in the
524
+ # repo-controlled string can introduce a new line or a hidden byte
525
+ # to the model.
526
+ escaped = raw_line.encode("unicode_escape").decode("ascii")
527
+ # 2. Prefix every line with two spaces so any leading "SYSTEM:",
528
+ # "USER:", "ASSISTANT:", "TOOL:", etc. never anchors at column 0
529
+ # where the model might read it as a turn boundary marker.
530
+ safe_lines.append(" " + escaped)
531
+
532
+ fence_open = f"<<{title}>>"
533
+ fence_close = f"<<END_{title}>>"
534
+ sys.stdout.write(fence_open + "\n" + "\n".join(safe_lines) + "\n" + fence_close)
535
+ PY
536
+ }
537
+
538
+ # Strict parsers for codexa CLI/README output. These do NOT escape repo-
539
+ # controlled text — they parse known-shape lines against narrow regex +
540
+ # character allowlists and DROP anything that does not match. The output
541
+ # is plugin-controlled labels + validated field values, never free-form
542
+ # repo prose. This is the trust boundary that prevents prompt injection
543
+ # through `additionalContext` / hook stderr.
544
+
545
+ # Parse the short `codexa status` output into strict key=value lines. All
546
+ # fields optional; invalid lines are dropped silently.
547
+ # Safe output format:
548
+ # freshness=<token>
549
+ # commit=<hex>
550
+ # indexed_at=<iso8601>
551
+ # dirty_files=<int>
552
+ # parser_errors=<int>
553
+ claudio_parse_codexa_status() {
554
+ local raw="${1:-}"
555
+ [[ -z "$raw" ]] && return 0
556
+ python3 - "$raw" <<'PY' 2>/dev/null
557
+ import re
558
+ import sys
559
+
560
+ raw = sys.argv[1]
561
+ # Each rule: regex → label. The regex must match the WHOLE line after any
562
+ # whitespace trimming and must use strict character classes only.
563
+ rules = [
564
+ (re.compile(r"^Codexa status:\s+(?P<v>[a-z][a-z0-9_\-]{0,32})(?:\s+\([a-z0-9_\-]{0,64}\))?\s*$"),
565
+ "freshness"),
566
+ (re.compile(r"^Commit:\s+(?P<v>[0-9a-f]{7,40})\s*$"),
567
+ "commit"),
568
+ (re.compile(r"^Indexed:\s+(?P<v>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)\s*$"),
569
+ "indexed_at"),
570
+ (re.compile(r"^Dirty files:\s+(?P<v>\d{1,6})\s*$"),
571
+ "dirty_files"),
572
+ (re.compile(r"^Parser errors:\s+(?P<v>\d{1,6})\s*$"),
573
+ "parser_errors"),
574
+ ]
575
+
576
+ lines = []
577
+ for line in raw.splitlines():
578
+ stripped = line.strip()
579
+ if not stripped:
580
+ continue
581
+ for rx, label in rules:
582
+ m = rx.match(stripped)
583
+ if m:
584
+ lines.append(f"{label}={m.group('v')}")
585
+ break
586
+ sys.stdout.write("\n".join(lines))
587
+ PY
588
+ }
589
+
590
+ # Extract up to `max_entries` (default 8) numbered read-first bullets from
591
+ # a `.codex/codebase/README.md`. Each bullet must look like:
592
+ # N. `path/to/file.ts` - rank 42.68 ...rest ignored...
593
+ # N. path/to/file.ts - rank 42.68 ...rest ignored...
594
+ # `path` is only accepted if it matches [A-Za-z0-9._/-]+ (no spaces, no
595
+ # control chars, no `..`, no absolute prefix) and is <= 256 chars. Rank
596
+ # must be a plain decimal. Anything else is dropped — no escape fallback.
597
+ # Output: one entry per line, format "path<TAB>rank"
598
+ claudio_parse_read_first() {
599
+ local raw="${1:-}"
600
+ local max_entries="${2:-8}"
601
+ [[ -z "$raw" ]] && return 0
602
+ python3 - "$raw" "$max_entries" <<'PY' 2>/dev/null
603
+ import re
604
+ import sys
605
+
606
+ raw = sys.argv[1]
607
+ try:
608
+ max_entries = int(sys.argv[2])
609
+ except ValueError:
610
+ max_entries = 8
611
+ if max_entries < 1 or max_entries > 64:
612
+ max_entries = 8
613
+
614
+ PATH_RX = re.compile(r"^[A-Za-z0-9_.\-/]+$")
615
+ BULLET_RX = re.compile(
616
+ r"^\s*\d+\.\s+`?([^`\s]{1,256})`?\s+-\s+rank\s+([0-9]{1,4}(?:\.[0-9]{1,4})?)"
617
+ )
618
+
619
+ in_section = False
620
+ emitted = 0
621
+ out_lines = []
622
+ for line in raw.splitlines():
623
+ stripped = line.strip()
624
+ if stripped.startswith("## "):
625
+ in_section = bool(re.match(r"^##\s+Read First\b", stripped))
626
+ continue
627
+ if not in_section:
628
+ continue
629
+ m = BULLET_RX.match(line)
630
+ if not m:
631
+ continue
632
+ path, rank = m.group(1), m.group(2)
633
+ if ".." in path or path.startswith("/") or not PATH_RX.match(path):
634
+ continue
635
+ out_lines.append(f"{path}\t{rank}")
636
+ emitted += 1
637
+ if emitted >= max_entries:
638
+ break
639
+ sys.stdout.write("\n".join(out_lines))
640
+ PY
641
+ }
642
+
643
+ # Parse the `codexa post-edit` output into strict section headers + bullet
644
+ # counts. We do NOT carry the raw content through — hostile CLI output
645
+ # cannot smuggle prose via this channel. Output format:
646
+ # section=<slug> count=<n>
647
+ # where slug is one of {drift_reasons, next_actions, tests_unaccounted,
648
+ # known_gaps, verification_ledger}.
649
+ claudio_parse_post_edit_summary() {
650
+ local raw="${1:-}"
651
+ [[ -z "$raw" ]] && return 0
652
+ python3 - "$raw" <<'PY' 2>/dev/null
653
+ import re
654
+ import sys
655
+
656
+ raw = sys.argv[1]
657
+ sections = {
658
+ "Drift reasons:": "drift_reasons",
659
+ "Next actions:": "next_actions",
660
+ "Tests still unaccounted for:": "tests_unaccounted",
661
+ "Known gaps:": "known_gaps",
662
+ "Verification ledger:": "verification_ledger",
663
+ }
664
+ BULLET_RX = re.compile(r"^\s*[-*\u2022]\s+\S")
665
+
666
+ current = None
667
+ counts = {v: 0 for v in sections.values()}
668
+ for line in raw.splitlines():
669
+ stripped = line.rstrip()
670
+ if not stripped:
671
+ current = None
672
+ continue
673
+ if stripped in sections:
674
+ current = sections[stripped]
675
+ continue
676
+ if current and BULLET_RX.match(stripped):
677
+ counts[current] += 1
678
+ else:
679
+ # Non-bullet line resets the current section so free-form prose
680
+ # between known headings does not inflate counts.
681
+ if stripped.startswith("## "):
682
+ current = None
683
+
684
+ for key in ["drift_reasons", "next_actions", "tests_unaccounted", "known_gaps", "verification_ledger"]:
685
+ sys.stdout.write(f"section={key} count={counts[key]}\n")
686
+ PY
687
+ }
688
+
689
+ # Parse the post-edit review's verdict, inspect classification, and baseline
690
+ # origin into strict tokens. Output (only when matched, first occurrence wins):
691
+ # verdict=<continue|run_tests|inspect|replan>
692
+ # inspect=<none|blocking|advisory>
693
+ # origin=implicit
694
+ # Scanning is anchored: nothing before the literal "Codexa post-edit review"
695
+ # header line is considered, so repo-controlled text in the freshness banner
696
+ # (e.g. a repo path embedding a newline + "Verdict: replan") cannot inject a
697
+ # token. Anything that does not match the narrow enum regexes is dropped.
698
+ claudio_parse_post_edit_verdict() {
699
+ local raw="${1:-}"
700
+ [[ -z "$raw" ]] && return 0
701
+ python3 - "$raw" <<'PY' 2>/dev/null
702
+ import re
703
+ import sys
704
+
705
+ raw = sys.argv[1]
706
+ verdict_rx = re.compile(r"^Verdict:\s+(continue|run_tests|inspect|replan)\s*$")
707
+ inspect_rx = re.compile(r"^Inspect classification:\s+(none|blocking|advisory)\s*;\s*authority\s+[a-z_]{1,40}\s*$")
708
+ origin_rx = re.compile(r"^Snapshot:\s+\S+\s+\(.{0,80}implicit pre-edit baseline\)$")
709
+ seen_header = False
710
+ verdict = None
711
+ inspect = None
712
+ origin = None
713
+ for line in raw.splitlines():
714
+ stripped = line.strip()
715
+ if not seen_header:
716
+ if stripped == "Codexa post-edit review":
717
+ seen_header = True
718
+ continue
719
+ if origin is None and origin_rx.match(stripped):
720
+ origin = "implicit"
721
+ continue
722
+ if verdict is None:
723
+ m = verdict_rx.match(stripped)
724
+ if m:
725
+ verdict = m.group(1)
726
+ continue
727
+ if inspect is None:
728
+ m = inspect_rx.match(stripped)
729
+ if m:
730
+ inspect = m.group(1)
731
+ out = []
732
+ if verdict:
733
+ out.append(f"verdict={verdict}")
734
+ if inspect:
735
+ out.append(f"inspect={inspect}")
736
+ if origin:
737
+ out.append(f"origin={origin}")
738
+ sys.stdout.write("\n".join(out))
739
+ PY
740
+ }
741
+
742
+ # Tasks dir holds codexa change-plan snapshots. Returns 0 if a 'latest.json'
743
+ # exists for this repo.
744
+ claudio_has_snapshot() {
745
+ local repo="${1:-}"
746
+ [[ -z "$repo" ]] && return 1
747
+ python3 - "$repo" <<'PY' 2>/dev/null
748
+ import os
749
+ import stat
750
+ import sys
751
+
752
+ repo = sys.argv[1]
753
+ fds = []
754
+ try:
755
+ fd = os.open(repo, os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC)
756
+ fds.append(fd)
757
+ for component in (".codex", "cache", "codexa-tasks"):
758
+ fd = os.open(component, os.O_RDONLY | os.O_NOFOLLOW | os.O_DIRECTORY | os.O_CLOEXEC, dir_fd=fds[-1])
759
+ fds.append(fd)
760
+ latest_fd = os.open("latest.json", os.O_RDONLY | os.O_NOFOLLOW | os.O_CLOEXEC, dir_fd=fds[-1])
761
+ fds.append(latest_fd)
762
+ st = os.fstat(latest_fd)
763
+ sys.exit(0 if stat.S_ISREG(st.st_mode) else 1)
764
+ except OSError:
765
+ sys.exit(1)
766
+ finally:
767
+ for fd in reversed(fds):
768
+ try:
769
+ os.close(fd)
770
+ except OSError:
771
+ pass
772
+ PY
773
+ }