codeharness 0.13.2 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.13.2",
3
+ "version": "0.16.1",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
@@ -9,6 +9,7 @@
9
9
  "files": [
10
10
  "dist",
11
11
  "bin",
12
+ "templates/Dockerfile.verify",
12
13
  "ralph/**/*.sh",
13
14
  "ralph/AGENTS.md"
14
15
  ],
package/ralph/ralph.sh CHANGED
@@ -7,6 +7,9 @@
7
7
 
8
8
  set -e
9
9
 
10
+ # DEBUG: catch unexpected exits from set -e
11
+ trap 'echo "[$(date "+%Y-%m-%d %H:%M:%S")] [FATAL] ralph.sh died at line $LINENO (exit code: $?)" >> "${LOG_DIR:-ralph/logs}/ralph_crash.log" 2>/dev/null' ERR
12
+
10
13
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
14
  source "$SCRIPT_DIR/lib/date_utils.sh"
12
15
  source "$SCRIPT_DIR/lib/timeout_utils.sh"
@@ -581,6 +584,11 @@ execute_iteration() {
581
584
  local deadline=$(( $(date +%s) + timeout_seconds ))
582
585
  echo "$deadline" > "ralph/.iteration_deadline"
583
586
 
587
+ # DEBUG: log the command being run
588
+ log_status "DEBUG" "Command: ${CLAUDE_CMD_ARGS[*]}"
589
+ log_status "DEBUG" "Output file: $output_file"
590
+ log_status "DEBUG" "LIVE_OUTPUT=$LIVE_OUTPUT, timeout=${timeout_seconds}s"
591
+
584
592
  log_status "INFO" "Starting $(driver_display_name) (timeout: ${ITERATION_TIMEOUT_MINUTES}m)..."
585
593
 
586
594
  # Execute with timeout
@@ -606,6 +614,8 @@ execute_iteration() {
606
614
  local claude_pid=$!
607
615
  local progress_counter=0
608
616
 
617
+ log_status "DEBUG" "Background PID: $claude_pid"
618
+
609
619
  while kill -0 $claude_pid 2>/dev/null; do
610
620
  progress_counter=$((progress_counter + 1))
611
621
  if [[ -f "$output_file" && -s "$output_file" ]]; then
@@ -614,8 +624,23 @@ execute_iteration() {
614
624
  sleep 10
615
625
  done
616
626
 
627
+ # Protect wait from set -e — capture exit code without crashing
628
+ set +e
617
629
  wait $claude_pid
618
630
  exit_code=$?
631
+ set -e
632
+ log_status "DEBUG" "Claude exited with code: $exit_code, output size: $(wc -c < "$output_file" 2>/dev/null || echo 0) bytes"
633
+
634
+ # If output is empty and exit code is non-zero, log diagnostic info
635
+ if [[ ! -s "$output_file" && $exit_code -ne 0 ]]; then
636
+ log_status "ERROR" "Claude produced no output and exited with code $exit_code"
637
+ log_status "DEBUG" "Checking if claude binary is responsive..."
638
+ if claude --version > /dev/null 2>&1; then
639
+ log_status "DEBUG" "claude binary OK: $(claude --version 2>&1)"
640
+ else
641
+ log_status "ERROR" "claude binary not responding"
642
+ fi
643
+ fi
619
644
  fi
620
645
 
621
646
  if [[ $exit_code -eq 0 ]]; then
@@ -0,0 +1,28 @@
1
+ # Black-box verification environment
2
+ # Installs the project as a user would (from tarball), plus verification tools.
3
+ # NO source code enters the image — only the built artifact.
4
+ FROM node:20-slim
5
+
6
+ ARG TARBALL=package.tgz
7
+
8
+ # System utilities
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ curl \
11
+ jq \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Verification tools + Claude Code CLI
15
+ RUN npm install -g showboat @anthropic-ai/claude-code
16
+
17
+ # Install the project from tarball (same as a user would)
18
+ COPY ${TARBALL} /tmp/${TARBALL}
19
+ RUN npm install -g /tmp/${TARBALL} && rm /tmp/${TARBALL}
20
+
21
+ # OTEL environment pointing to host observability stack
22
+ ENV OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4318
23
+ ENV OTEL_SERVICE_NAME=codeharness-verify
24
+ ENV OTEL_TRACES_EXPORTER=otlp
25
+ ENV OTEL_METRICS_EXPORTER=otlp
26
+ ENV OTEL_LOGS_EXPORTER=otlp
27
+
28
+ WORKDIR /workspace