npm - codex-genesis-harness - Versions diffs - 0.1.0 → 0.1.4 - Mend

codex-genesis-harness 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (328) hide show

package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/list-changed-files.sh RENAMED Viewed

@@ -1,12 +1,12 @@
-#!/usr/bin/env bash
-set -euo pipefail
-root="${1:-.}"
-cd "$root"
-if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
-  git status --short
-else
-  echo "Not a git repository; changed files cannot be listed with git."
-fi
+#!/usr/bin/env bash
+set -euo pipefail
+root="${1:-.}"
+cd "$root"
+if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git status --short
+else
+  echo "Not a git repository; changed files cannot be listed with git."
+fi

package/.codex/skills/genesis-harness/scripts/offload-log.sh ADDED Viewed

@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# ==============================================================================
+# Genesis Codex Harness — Tool Call Offloading Script
+# ==============================================================================
+# Captures standard output & error of any command, saves it to a durable log file,
+# and prints a trimmed head-and-tail summary if it exceeds a line count threshold.
+# Protects the AI context window against Context Rot.
+# ==============================================================================
+set -uo pipefail
+THRESHOLD_LINES=50
+SCRATCH_DIR="scratch/tool_outputs"
+usage() {
+  echo "Usage: $0 [--threshold lines] <command> [args...]" >&2
+  exit 2
+}
+if [ "$#" -eq 0 ]; then
+  usage
+fi
+if [ "$1" = "--threshold" ]; then
+  THRESHOLD_LINES="$2"
+  shift 2
+fi
+if [ "$#" -eq 0 ]; then
+  usage
+fi
+mkdir -p "$SCRATCH_DIR"
+# Generate unique log file path
+TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
+RAND_ID=$((RANDOM % 10000))
+LOG_FILE="$SCRATCH_DIR/output_${TIMESTAMP}_${RAND_ID}.log"
+# Execute command and capture output
+set +e
+"$@" > "$LOG_FILE" 2>&1
+EXIT_CODE=$?
+set -e
+# Calculate output size
+LINE_COUNT=$(wc -l < "$LOG_FILE")
+if [ "$LINE_COUNT" -le "$THRESHOLD_LINES" ]; then
+  # Print normally if within threshold
+  cat "$LOG_FILE"
+else
+  # Offload and print head/tail summary
+  HEAD_LINES=$((THRESHOLD_LINES / 2))
+  TAIL_LINES=$((THRESHOLD_LINES / 2))
+  echo "=============================================================================="
+  echo "==> [OFFLOADED] Command output exceeded threshold ($LINE_COUNT lines > $THRESHOLD_LINES lines)."
+  echo "==> Full log written to disk: file://$(pwd)/$LOG_FILE"
+  echo "=============================================================================="
+  head -n "$HEAD_LINES" "$LOG_FILE"
+  echo ""
+  echo "... [TRUNCATED $(($LINE_COUNT - THRESHOLD_LINES)) LINES] ..."
+  echo "Read file://$LOG_FILE to view the full execution history."
+  echo ""
+  tail -n "$TAIL_LINES" "$LOG_FILE"
+fi
+exit $EXIT_CODE

package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/run-verification.sh RENAMED Viewed

@@ -1,47 +1,47 @@
-#!/usr/bin/env bash
-set -euo pipefail
-root="${1:-.}"
-cd "$root"
-run_if() {
-  local label="$1"
-  shift
-  echo "==> $label"
-  "$@"
-}
-if [ -f package.json ]; then
-  if command -v npm >/dev/null 2>&1; then
-    npm run lint --if-present
-    npm run typecheck --if-present
-    npm test --if-present
-    npm run build --if-present
-  fi
-elif [ -f pyproject.toml ] || [ -f requirements.txt ]; then
-  command -v pytest >/dev/null 2>&1 && run_if "pytest" pytest -q
-  command -v ruff >/dev/null 2>&1 && run_if "ruff" ruff check .
-  command -v mypy >/dev/null 2>&1 && run_if "mypy" mypy .
-elif [ -f Cargo.toml ]; then
-  run_if "cargo test" cargo test
-  run_if "cargo clippy" cargo clippy -- -D warnings
-elif [ -f go.mod ]; then
-  run_if "go test" go test ./...
-elif [ -f composer.json ]; then
-  command -v composer >/dev/null 2>&1 && composer test || true
-else
-  echo "No known verification command detected. Add project-specific commands to .planning/SMOKE_TESTS.md."
-fi
-for check in \
-  check-required-planning-files.sh \
-  check-task-tracking.sh \
-  check-no-debug-logs.sh \
-  check-spec-changelog.sh \
-  check-architecture-boundaries.sh
-do
-  script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-  if [ -x "$script_dir/$check" ]; then
-    run_if "$check" "$script_dir/$check" "$root"
-  fi
-done
+#!/usr/bin/env bash
+set -euo pipefail
+root="${1:-.}"
+cd "$root"
+run_if() {
+  local label="$1"
+  shift
+  echo "==> $label"
+  "$@"
+}
+if [ -f package.json ]; then
+  if command -v npm >/dev/null 2>&1; then
+    npm run lint --if-present
+    npm run typecheck --if-present
+    npm test --if-present
+    npm run build --if-present
+  fi
+elif [ -f pyproject.toml ] || [ -f requirements.txt ]; then
+  command -v pytest >/dev/null 2>&1 && run_if "pytest" pytest -q
+  command -v ruff >/dev/null 2>&1 && run_if "ruff" ruff check .
+  command -v mypy >/dev/null 2>&1 && run_if "mypy" mypy .
+elif [ -f Cargo.toml ]; then
+  run_if "cargo test" cargo test
+  run_if "cargo clippy" cargo clippy -- -D warnings
+elif [ -f go.mod ]; then
+  run_if "go test" go test ./...
+elif [ -f composer.json ]; then
+  command -v composer >/dev/null 2>&1 && composer test || true
+else
+  echo "No known verification command detected. Add project-specific commands to .planning/SMOKE_TESTS.md."
+fi
+for check in \
+  check-required-planning-files.sh \
+  check-task-tracking.sh \
+  check-no-debug-logs.sh \
+  check-spec-changelog.sh \
+  check-architecture-boundaries.sh
+do
+  script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+  if [ -x "$script_dir/$check" ]; then
+    run_if "$check" "$script_dir/$check" "$root"
+  fi
+done

package/.codex/skills/genesis-harness/scripts/run-verify-loop.sh ADDED Viewed

@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# ==============================================================================
+# Genesis Codex Harness — Ralph Loop (Verify-Fix) Orchestrator
+# ==============================================================================
+# Executes verification commands, tracks iteration counts to prevent infinite loops,
+# and logs diagnostic failure reports to disk for step resumption.
+# Enforces the self-verification safety invariant.
+# ==============================================================================
+set -uo pipefail
+MAX_ITERATIONS=5
+PLANNING_DIR=".planning"
+LOOP_COUNT_FILE="$PLANNING_DIR/VERIFY_LOOP_COUNT"
+FAILURE_LOG="$PLANNING_DIR/LAST_VERIFICATION_FAILURE.md"
+usage() {
+  echo "Usage: $0 [--max-iterations N] <verification_command> [args...]" >&2
+  exit 2
+}
+if [ "$#" -eq 0 ]; then
+  usage
+fi
+if [ "$1" = "--max-iterations" ]; then
+  MAX_ITERATIONS="$2"
+  shift 2
+fi
+if [ "$#" -eq 0 ]; then
+  usage
+fi
+mkdir -p "$PLANNING_DIR"
+# Initialize loop count
+CURRENT_ITERATION=1
+if [ -f "$LOOP_COUNT_FILE" ]; then
+  CURRENT_ITERATION=$(cat "$LOOP_COUNT_FILE")
+  CURRENT_ITERATION=$((CURRENT_ITERATION + 1))
+fi
+echo "$CURRENT_ITERATION" > "$LOOP_COUNT_FILE"
+echo "==> [VERIFY LOOP] Iteration $CURRENT_ITERATION of $MAX_ITERATIONS"
+if [ "$CURRENT_ITERATION" -gt "$MAX_ITERATIONS" ]; then
+  echo "=============================================================================="
+  echo "==> [BLOCKED] Verify-Fix loop exceeded maximum iteration count ($MAX_ITERATIONS)."
+  echo "==> Potentially stuck in an infinite loop. Halting for human intervention."
+  echo "=============================================================================="
+  rm -f "$LOOP_COUNT_FILE"
+  exit 99
+fi
+# Execute verification command
+set +e
+"$@" > "$FAILURE_LOG" 2>&1
+EXIT_CODE=$?
+set -e
+if [ $EXIT_CODE -eq 0 ]; then
+  echo "==> [VERIFY LOOP] Pass! Verification completed successfully."
+  rm -f "$LOOP_COUNT_FILE"
+  rm -f "$FAILURE_LOG"
+else
+  echo "=============================================================================="
+  echo "==> [FAILURE] Verification failed with exit code $EXIT_CODE."
+  echo "==> Failure diagnostic written to: file://$(pwd)/$FAILURE_LOG"
+  echo "==> Please inspect the log, fix the root cause, and re-run verification."
+  echo "=============================================================================="
+fi
+exit $EXIT_CODE

package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/update-state.sh RENAMED Viewed

@@ -1,33 +1,33 @@
-#!/usr/bin/env bash
-set -euo pipefail
-usage() {
-  echo "Usage: $0 <root> <current-phase> <active-work> <last-completed-task> <next-task> <latest-verification>" >&2
-  exit 2
-}
-root="${1:-}"
-phase="${2:-}"
-active="${3:-}"
-last="${4:-}"
-next="${5:-}"
-verification="${6:-}"
-[ -n "$root" ] && [ -n "$phase" ] && [ -n "$active" ] && [ -n "$last" ] && [ -n "$next" ] && [ -n "$verification" ] || usage
-cd "$root"
-mkdir -p .planning
-cat > .planning/STATE.md <<EOF
-# State
-Current project state: [~] Active
-Current phase: $phase
-Current feature or bug: $active
-Last completed task: $last
-Next task: $next
-Blocked items: None recorded
-Latest verification result: $verification
-EOF
-echo ".planning/STATE.md"
+#!/usr/bin/env bash
+set -euo pipefail
+usage() {
+  echo "Usage: $0 <root> <current-phase> <active-work> <last-completed-task> <next-task> <latest-verification>" >&2
+  exit 2
+}
+root="${1:-}"
+phase="${2:-}"
+active="${3:-}"
+last="${4:-}"
+next="${5:-}"
+verification="${6:-}"
+[ -n "$root" ] && [ -n "$phase" ] && [ -n "$active" ] && [ -n "$last" ] && [ -n "$next" ] && [ -n "$verification" ] || usage
+cd "$root"
+mkdir -p .planning
+cat > .planning/STATE.md <<EOF
+# State
+Current project state: [~] Active
+Current phase: $phase
+Current feature or bug: $active
+Last completed task: $last
+Next task: $next
+Blocked items: None recorded
+Latest verification result: $verification
+EOF
+echo ".planning/STATE.md"

package/.codex/skills/genesis-harness/templates/harness-workflow-template.md ADDED Viewed

@@ -0,0 +1,10 @@
+# Harness Workflow Template
+## Intent
+## Failing Test
+## Fixture
+## Contract
+## Implementation
+## Verification
+## Memory Update

package/.codex/skills/genesis-harness-engineering/SKILL.md ADDED Viewed

@@ -0,0 +1,159 @@
+---
+name: harness-engineering-skill
+description: "Evolve the Codex harness itself: verification loops, repository memory, test-first scaffolds, resumability, observability, and autonomous workflow reliability. Use for changes to this repository's skill system or harness architecture."
+---
+# Harness Engineering Skill
+**Automated evolution, verification scaling, and structural safety loop engineering for Codex developer environments**
+---
+## Purpose
+To provide a robust, self-healing, and deterministic development environment for autonomous coding agents (Codex). Harness Engineering shifts the focus from writing soft, non-deterministic natural language prompts to building hard, mechanically-enforced environmental constraints (feedback loops, test runners, custom lints, and state managers) that prevent agentic drift and guarantee software quality.
+By scaling and optimizing this harness, we achieve two primary objectives:
+1.  **Extreme Feedback Velocity**: Compressing the time from code generation to execution feedback (TDD loop) to allow rapid self-correction.
+2.  **Safety & Invariance Enforcement**: Ensuring that all changes satisfy strict codebase constraints (lints, typechecks, API contracts, and E2E journeys) before completion is declared.
+---
+## When to use
+Use this skill when:
+-   **Modifying Core Verification Logic**: Adding new checks to `scripts/verify.sh` or expanding `scripts/run-evals.sh`.
+-   **Evolving Codex Skills**: Modifying existing `.codex/skills/` metadata, workflows, templates, or checklists.
+-   **Updating API, Agent, or Event Contracts**: Defining new schemas, request/response fixtures, or event topologies.
+-   **Extending Observability**: Adding tracking files, decision-logging formats, or failure telemetry inside `observability/`.
+-   **Configuring Distribution Channels**: Modifying the npm CLI wrapper (`bin/genesis-harness.js`), installers (`install.sh`), or CI/CD pipelines.
+---
+## When NOT to use
+Do not use this skill for:
+-   Implementing application-specific business logic or customer features.
+-   General codebase research that does not affect the agentic operating environment.
+-   One-off scripting tasks unrelated to the reliability and verification loop of Codex.
+---
+## Inputs required
+To evolve the harness, Codex must have access to:
+1.  **Current Operating Memory**: `.codebase/CURRENT_STATE.md` and `.codebase/MODULE_INDEX.md`.
+2.  **Harness Test Matrix**: `.codebase/TEST_MATRIX.md` showing existing verification paths.
+3.  **Target Workflow Specs**: The contract or script being evolved (e.g., `bin/genesis-harness.js`, `scripts/verify.sh`).
+4.  **Verification Gap**: The failure scenario or regression that is not currently caught by the test suite.
+---
+## Outputs required
+Every harness evolution must deliver:
+1.  **Failing Regression Test**: A new verification case added to `scripts/verify.sh` or `scripts/run-evals.sh` that fails *before* implementation.
+2.  **Atomic Implementation Change**: The minimum code, script, or structural edit required to pass the test.
+3.  **Clean Run Log**: Execution proof showing `scripts/verify.sh` and `scripts/run-evals.sh` passing cleanly with exit code `0`.
+4.  **Normalized Script Files**: LF line ending assurance on all modified `.sh` files to prevent Windows compatibility bugs.
+5.  **Durable Memory Update**: A recovery checkpoint written to `.codebase/RECOVERY_POINTS.md`.
+---
+## Required tests
+-   **Red-Green-Refactor Flow**: A failing assertion must be added to the test suite (`scripts/verify.sh` or `scripts/run-evals.sh`) prior to code modification.
+-   **Command Integrity Check**: The CLI `genesis-harness` must be verified using `npm run pack:check` and local execution tests in a separate sandbox.
+-   **Cross-Platform Validation**: All scripts must be checked for line-ending and interpreter syntax compatibility across POSIX/Bash environments on Windows/WSL and Linux.
+---
+## Required fixtures
+-   **Contract Templates**: Up-to-date `.json` schema templates for all agent, event, API, and UI interaction contracts.
+-   **Scaffold Fixtures**: Reusable sample files representing typical project states (like the `sample-feature` and `sample-bug` structures generated in test sandboxes).
+-   **Expected Validation Outputs**: JSON or Markdown files representing the expected logs or decision outputs of a successful run.
+---
+## Required contract updates
+-   **CLI-Skill Protocol**: Any change to CLI arguments, environment variable mappings, or directory output paths must be immediately reflected in `.codex-plugin/plugin.json`.
+-   **Agent 캐릭터 Contracts**: Changes to agent execution behavior must be documented in `contracts/agents/` schemas.
+-   **Event Topologies**: Updates to the event broker simulation must be verified against `contracts/events/` schemas.
+---
+## Required codebase map updates
+-   **Module Index Sync**: Newly added skills or templates must be registered in `.codebase/MODULE_INDEX.md` and `.codex/SKILLS_INDEX.md`.
+-   **Architecture Log**: Major design shifts in the harness workflow must be logged as Architecture Decision Records under `.codebase/decisions/` or `.planning/decisions/`.
+-   **State Transition**: Recalculate and update the architecture and quality scores inside `.codebase/CURRENT_STATE.md`.
+---
+## Token saving rules
+-   **Strict Ephemerality**: Treat the model's context window as volatile. Write key state updates to on-disk files (`task.md` and `.codebase/` memory) between steps instead of carrying huge logs in the prompt history.
+-   **On-Demand Loading**: Never load multiple `SKILL.md` files at once. Read the `.codebase/context/` summaries to target specific sub-directories.
+-   **Keep Commands Silent**: Suppress verbose compiler or script outputs (`>/dev/null`) unless a failure occurs, saving significant context tokens.
+---
+## Acceptance criteria
+A harness modification is accepted *only* when:
+-   [x] The new check fails first in a fresh, isolated run of the verification pipeline.
+-   [x] The minimal change is implemented, resulting in a successful pass of `verify.sh` and `run-evals.sh` with exit code `0`.
+-   [x] All changed `.sh` files are validated to have LF line endings and parse cleanly without syntax warnings.
+-   [x] CLI dry-run package checks (`npm run pack:check`) confirm package integrity.
+-   [x] The evolution is documented in `.codebase/RECOVERY_POINTS.md` with exact recovery instructions.
+---
+## Common mistakes
+-   **Writing prose instructions**: Attempting to guide the model using soft natural-language descriptions in READMEs instead of writing hard validation scripts.
+-   **Duplicating context maps**: Storing identical configuration maps across multiple skills instead of importing standard `.codebase/` definitions.
+-   **CRLF Line Ending Traps**: Checking in scripts with Windows line-endings (`\r\n`), causing execution syntax errors in WSL/POSIX shells.
+-   **Generator-Evaluator Co-location**: Allowing the agent to grade its own output format using regex within its prompt instead of executing an external compiler or validator command.
+---
+## Recovery workflow
+If a harness evolution breaks the local environment or causes test failures:
+1.  **Isolate the Failing State**: Stop all active background processes and subagents.
+2.  **Revert to Safety**: Run `git checkout -- <file>` to revert to the last committed stable state.
+3.  **Read Recovery Point**: Inspect `.codebase/RECOVERY_POINTS.md` to identify the nearest stable checkpoint.
+4.  **Write Minimal Reproducer**: Create a single, isolated script in the `scratch/` directory that demonstrates only the failure.
+5.  **Re-attempt TDD**: Implement the fix on top of the reproducer before integrating back into the main harness.
+---
+## Core Invariants: The PEV Loop
+Every agentic activity within this harness must strictly adhere to the **Plan-Execute-Verify (PEV)** pipeline:
+```txt
+   +---------------------------------------------------+
+   |             Plan Phase (DoR Verified)             |
+   |  - Research patterns and write on-disk PLAN.md    |
+   +---------------------------------------------------+
+                             |
+                             v
+   +---------------------------------------------------+
+   |            Execute Phase (TDD Enforced)           |
+   |  - Create failing unit test/verification check    |
+   |  - Write minimal code to pass verification       |
+   +---------------------------------------------------+
+                             |
+                             v
+   +---------------------------------------------------+
+   |             Verify Phase (DoD Verified)           |
+   |  - Execute verify.sh and run-evals.sh             |
+   |  - Normalize line-endings and format files        |
+   +---------------------------------------------------+
+```
+Never skip a phase, and never claim completion without verifiable evidence of test passing.

package/.codex/skills/genesis-harness-engineering/agents/openai.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+interface:
+  display_name: "Harness Engineering Skill"
+  short_description: "Improve Codex harness reliability"
+  default_prompt: "Use $harness-engineering-skill to evolve the harness with tests first."
+policy:
+  allow_implicit_invocation: true

package/.codex/skills/genesis-harness-engineering/checklists/checklist.md ADDED Viewed

@@ -0,0 +1,48 @@
+# Checklist: Pre-Flight Harness Validation & Compliance
+Use this checklist prior to committing or declaring completion on any evolution of the **Genesis Codex Harness** operating environment.
+---
+## 🏗 Part 1: Structural & File Invariants
+- [ ] **Folder Structure Compliance**: Every skill has a `SKILL.md`, `agents/openai.yaml`, `templates/`, and `examples/`. Playbook-based skills also include `checklists/`, `playbooks/`, and `observability/`.
+- [ ] **Line Ending Invariants**: All script files recursively (`*.sh`) are verified to have LF line endings to avoid WSL/macOS parse failures on Windows engines.
+- [ ] **Executable Bit**: All scripts inside `scripts/` and skill-specific script folders have executable flags set (`chmod +x`).
+---
+## 📝 Part 2: Skill Frontmatter & Section Checks
+- [ ] **Casing Validation**: Standard skill sections match exact capitalization rules:
+  - `## Purpose`
+  - `## When to use`
+  - `## When NOT to use`
+  - `## Inputs required`
+  - `## Outputs required`
+  - `## Required tests`
+  - `## Required fixtures`
+  - `## Required contract updates`
+  - `## Required codebase map updates`
+  - `## Token saving rules`
+  - `## Acceptance criteria`
+  - `## Common mistakes`
+  - `## Recovery workflow`
+- [ ] **Metadata Alignment**: The frontmatter `name:` matches the legacy expected string checked inside `scripts/verify.sh` for directory mapping.
+---
+## 🧪 Part 3: Test Suite & Distribution Execution
+- [ ] **Red-State Invariant**: The test failure was demonstrated *first* before implementing the changes.
+- [ ] **Verification Smoke Pass**: `bash scripts/verify.sh` completes cleanly with exit code `0`.
+- [ ] **Regression Eval Pass**: `bash scripts/run-evals.sh` completes cleanly with exit code `0`.
+- [ ] **CLI Dry-Run Integrity**: `npm run pack:check` runs successfully and package size is correct.
+- [ ] **Uninstall Isolation**: The `uninstall.sh` script leaves no stale files behind in custom home locations.
+---
+## 💾 Part 4: Memory Invariance
+- [ ] **Current State Updated**: Skill counts, quality metrics, and completion statuses are synchronized in `.codebase/CURRENT_STATE.md`.
+- [ ] **Recovery Point Saved**: Exact rollback steps and verification logs are saved in `.codebase/RECOVERY_POINTS.md`.

package/.codex/skills/genesis-harness-engineering/examples/example.md ADDED Viewed

@@ -0,0 +1,57 @@
+# Example: Harness Evolution - Integrating `genesis-performance-profiling`
+This example demonstrates how an engineer (human or AI) safely scales the **Genesis Codex Harness** to support a new playbook-based skill called `genesis-performance-profiling`.
+---
+## Step 1: Write a Failing Verification Test (RED State)
+First, we add the new skill directory to the `skill_names` array inside `scripts/verify.sh` so that it is included in the test loop:
+```diff
+ skill_names=(
+   genesis-harness
+   genesis-new-design
+   genesis-upgrade-design
++  genesis-performance-profiling
+ )
+```
+And we add its case mapping inside `verify_one`:
+```diff
++    genesis-performance-profiling)
++      verify_skill_metadata "$skill_dir" "genesis-performance-profiling"
++      verify_playbook_skill "$skill_dir"
++      ;;
+```
+When we run `bash scripts/verify.sh`, it fails immediately because the directory `genesis-performance-profiling` does not exist yet. This confirms our validation test is active and skeptical!
+---
+## Step 2: Implement the Skill (GREEN State)
+We create the directory and populate all required files according to the playbook protocol:
+- `SKILL.md` (with all 13 standard headers and metadata).
+- `agents/openai.yaml`.
+- `templates/`, `checklists/`, `playbooks/`, `observability/`, and `examples/` directories.
+---
+## Step 3: Normalize Line Endings
+We run our CRLF-to-LF conversion command to guarantee cross-platform shell compatibility:
+```bash
+python -c "import os; [open(os.path.join(root, f), 'wb').write(content) for root, dirs, files in os.walk('.') for f in files if f.endswith('.sh') for content in [open(os.path.join(root, f), 'rb').read().replace(b'\r\n', b'\n')]]"
+```
+---
+## Step 4: Run E2E Verification
+We run all validation checks locally to confirm that the new skill is fully integrated and compliant:
+```bash
+$ bash scripts/verify.sh
+verify passed: /mnt/d/PROJECT/codex-genesis-harness/.codex/skills
+$ bash scripts/run-evals.sh
+evals passed
+```
+---
+## Step 5: Update Codebase Memory
+We register our evolutionary step in [.codebase/CURRENT_STATE.md](file:///.codebase/CURRENT_STATE.md) and [.codebase/RECOVERY_POINTS.md](file:///.codebase/RECOVERY_POINTS.md) before pushing our changes to version control!