codeharness 0.25.1 → 0.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,6 +52,8 @@ interface AnalyzerConfig {
52
52
  readonly tool?: string;
53
53
  /** Directory containing analysis rules, relative to project root. Default: 'patches/observability/' */
54
54
  readonly rulesDir?: string;
55
+ /** Additional rule directories to include in the scan, relative to project root. Default: ['patches/error-handling/'] */
56
+ readonly additionalRulesDirs?: readonly string[];
55
57
  /** Timeout for the analysis subprocess in milliseconds. Default: 60000 */
56
58
  readonly timeout?: number;
57
59
  /**
@@ -15,6 +15,7 @@ function fail(error, context) {
15
15
 
16
16
  // src/modules/observability/analyzer.ts
17
17
  var DEFAULT_RULES_DIR = "patches/observability/";
18
+ var ADDITIONAL_RULES_DIRS = ["patches/error-handling/"];
18
19
  var DEFAULT_TIMEOUT = 6e4;
19
20
  var FUNCTION_NO_LOG_RULE = "function-no-debug-log";
20
21
  var CATCH_WITHOUT_LOGGING_RULE = "catch-without-logging";
@@ -48,7 +49,8 @@ function analyze(projectDir, config) {
48
49
  const rulesDir = config?.rulesDir ?? DEFAULT_RULES_DIR;
49
50
  const timeout = config?.timeout ?? DEFAULT_TIMEOUT;
50
51
  const fullRulesDir = join(projectDir, rulesDir);
51
- const rawResult = runSemgrep(projectDir, fullRulesDir, timeout);
52
+ const additionalDirs = (config?.additionalRulesDirs ?? ADDITIONAL_RULES_DIRS).map((d) => join(projectDir, d));
53
+ const rawResult = runSemgrep(projectDir, fullRulesDir, timeout, additionalDirs);
52
54
  if (!rawResult.success) {
53
55
  return fail(rawResult.error);
54
56
  }
@@ -73,11 +75,15 @@ function checkSemgrepInstalled() {
73
75
  return false;
74
76
  }
75
77
  }
76
- function runSemgrep(projectDir, rulesDir, timeout = DEFAULT_TIMEOUT) {
78
+ function runSemgrep(projectDir, rulesDir, timeout = DEFAULT_TIMEOUT, additionalRulesDirs = []) {
77
79
  try {
80
+ const configArgs = ["--config", rulesDir];
81
+ for (const dir of additionalRulesDirs) {
82
+ configArgs.push("--config", dir);
83
+ }
78
84
  const stdout = execFileSync(
79
85
  "semgrep",
80
- ["scan", "--config", rulesDir, "--json", projectDir],
86
+ ["scan", ...configArgs, "--json", projectDir],
81
87
  { encoding: "utf-8", timeout, stdio: ["pipe", "pipe", "pipe"] }
82
88
  );
83
89
  const parsed = JSON.parse(stdout);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.25.1",
3
+ "version": "0.25.2",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
@@ -13,6 +13,11 @@
13
13
  "templates/Dockerfile.verify",
14
14
  "templates/Dockerfile.verify.rust",
15
15
  "templates/Dockerfile.verify.generic",
16
+ "templates/dockerfiles/",
17
+ "templates/compose/",
18
+ "templates/prompts/",
19
+ "templates/docs/",
20
+ "templates/otlp/",
16
21
  "ralph/**/*.sh",
17
22
  "ralph/AGENTS.md"
18
23
  ],
@@ -28,7 +33,9 @@
28
33
  "build": "tsup",
29
34
  "test": "bats tests/",
30
35
  "test:unit": "vitest run",
31
- "test:coverage": "vitest run --coverage"
36
+ "test:coverage": "vitest run --coverage",
37
+ "lint": "eslint src/",
38
+ "lint:sizes": "bash scripts/check-file-sizes.sh"
32
39
  },
33
40
  "dependencies": {
34
41
  "@inkjs/ui": "^2.0.0",
@@ -38,6 +45,7 @@
38
45
  "yaml": "^2.8.2"
39
46
  },
40
47
  "devDependencies": {
48
+ "@eslint/js": "^10.0.1",
41
49
  "@opentelemetry/auto-instrumentations-node": "^0.71.0",
42
50
  "@opentelemetry/exporter-logs-otlp-http": "^0.213.0",
43
51
  "@opentelemetry/exporter-metrics-otlp-http": "^0.213.0",
@@ -45,9 +53,11 @@
45
53
  "@types/node": "^25.5.0",
46
54
  "@types/react": "^19.2.14",
47
55
  "@vitest/coverage-v8": "^4.1.0",
56
+ "eslint": "^10.1.0",
48
57
  "ink-testing-library": "^4.0.0",
49
58
  "tsup": "^8.5.1",
50
59
  "typescript": "^5.9.3",
60
+ "typescript-eslint": "^8.57.2",
51
61
  "vitest": "^4.1.0"
52
62
  }
53
63
  }
@@ -18,7 +18,7 @@ Before writing code, read the relevant `AGENTS.md` file for the module being cha
18
18
 
19
19
  ### Observability
20
20
 
21
- Run `semgrep scan --config patches/observability/` before committing and fix any gaps.
21
+ Run `semgrep scan --config patches/observability/ --config patches/error-handling/` before committing and fix any gaps.
22
22
 
23
23
  After running tests, verify telemetry is flowing:
24
24
  - Query VictoriaLogs to confirm log events from test runs
@@ -0,0 +1,29 @@
1
+ # patches/error-handling/ — Semgrep Rules for Error Handling Enforcement
2
+
3
+ Standalone Semgrep YAML rules for detecting dangerous error-swallowing patterns. Each `.yaml` file is a complete Semgrep config — no build step, no TypeScript. Deleting a rule file removes that check.
4
+
5
+ ## Rules
6
+
7
+ ### Python
8
+
9
+ | File | Purpose | Severity |
10
+ |------|---------|----------|
11
+ | no-bare-except.yaml | Detects `except Exception: pass` and `except Exception: ...` (bare error swallowing) | ERROR |
12
+
13
+ ## Test Fixtures
14
+
15
+ ### Python
16
+
17
+ | File | Purpose |
18
+ |------|---------|
19
+ | __tests__/no-bare-except.py | Test cases for no-bare-except rules (annotated with `# ruleid:` / `# ok:`) |
20
+
21
+ ## Testing
22
+
23
+ Run `semgrep --test patches/error-handling/` to execute all test fixtures against their rules.
24
+
25
+ ## Integration
26
+
27
+ The review enforcement patch (`patches/review/enforcement.md`) and dev enforcement patch (`patches/dev/enforcement.md`) instruct agents to run Semgrep with `--config patches/error-handling/` in addition to `--config patches/observability/`.
28
+
29
+ The `hooks/post-write-check.sh` hook provides fast grep-based detection of `except Exception: pass` in Python files for immediate feedback during development.
@@ -0,0 +1,38 @@
1
+ # Test cases for no-bare-except-pass and no-bare-except-ellipsis Semgrep rules
2
+
3
+ # ruleid: no-bare-except-pass
4
+ try:
5
+ do_something()
6
+ except Exception:
7
+ pass
8
+
9
+ # ruleid: no-bare-except-ellipsis
10
+ try:
11
+ do_something()
12
+ except Exception:
13
+ ...
14
+
15
+ # ok: no-bare-except-pass
16
+ try:
17
+ do_something()
18
+ except Exception as e:
19
+ logger.error("Failed: %s", e)
20
+
21
+ # ok: no-bare-except-pass
22
+ try:
23
+ do_something()
24
+ except Exception as e:
25
+ print(f"Error: {e}")
26
+ raise
27
+
28
+ # ok: no-bare-except-pass
29
+ try:
30
+ do_something()
31
+ except ValueError:
32
+ pass
33
+
34
+ # ok: no-bare-except-ellipsis
35
+ try:
36
+ do_something()
37
+ except Exception as e:
38
+ logging.warning("Ignored: %s", e)
@@ -0,0 +1,22 @@
1
+ rules:
2
+ - id: no-bare-except-pass
3
+ pattern: |
4
+ try:
5
+ ...
6
+ except Exception:
7
+ pass
8
+ message: "Bare `except Exception: pass` swallows errors silently. Handle the error, log it, or add a # IGNORE: comment explaining why."
9
+ severity: ERROR
10
+ languages: [python]
11
+ metadata:
12
+ category: error-handling
13
+ confidence: HIGH
14
+
15
+ - id: no-bare-except-ellipsis
16
+ pattern-regex: 'except\s+Exception\s*:\s*\n\s+\.\.\.\s*$'
17
+ message: "Bare `except Exception: ...` swallows errors silently. Handle the error, log it, or add a # IGNORE: comment explaining why."
18
+ severity: ERROR
19
+ languages: [python]
20
+ metadata:
21
+ category: error-handling
22
+ confidence: HIGH
@@ -27,7 +27,7 @@ The proof must pass black-box enforcement:
27
27
 
28
28
  ### Observability
29
29
 
30
- Run `semgrep scan --config patches/observability/ --json` against changed files and report gaps.
30
+ Run `semgrep scan --config patches/observability/ --config patches/error-handling/ --json` against changed files and report gaps.
31
31
 
32
32
  - For each gap found, list it as a review issue: file path, line number, and description (e.g., "src/lib/docker.ts:42 — catch block without logging")
33
33
  - Semgrep JSON output fields to extract: `check_id`, `path`, `start.line`, `extra.message`
package/ralph/ralph.sh CHANGED
@@ -192,6 +192,10 @@ update_status() {
192
192
  flagged_json=$(jq -R -s 'split("\n") | map(select(length > 0))' < "$FLAGGED_STORIES_FILE")
193
193
  fi
194
194
 
195
+ # Get current story key for status tracking
196
+ local current_story
197
+ current_story=$(get_current_task)
198
+
195
199
  jq -n \
196
200
  --arg timestamp "$(get_iso_timestamp)" \
197
201
  --argjson loop_count "$loop_count" \
@@ -202,6 +206,7 @@ update_status() {
202
206
  --arg status "$status" \
203
207
  --arg exit_reason "$exit_reason" \
204
208
  --arg version "$VERSION" \
209
+ --arg story "${current_story:-}" \
205
210
  --argjson stories_total "$stories_total" \
206
211
  --argjson stories_completed "$stories_completed" \
207
212
  --argjson stories_remaining "$stories_remaining" \
@@ -216,6 +221,7 @@ update_status() {
216
221
  max_iterations: $max_iterations,
217
222
  last_action: $last_action,
218
223
  status: $status,
224
+ story: $story,
219
225
  exit_reason: $exit_reason,
220
226
  stories_total: $stories_total,
221
227
  stories_completed: $stories_completed,
@@ -228,8 +234,36 @@ update_status() {
228
234
  # codeharness: Task picking is handled by /harness-run skill inside each Claude session.
229
235
  # Ralph just spawns sessions and checks sprint-status.yaml for completion.
230
236
  get_current_task() {
231
- # No-op task picking is done by the /harness-run skill, not Ralph.
232
- echo ""
237
+ # Read the first in-progress or ready-for-dev story from sprint-state.json.
238
+ # Task picking is done by /harness-run, but Ralph needs the story key
239
+ # for timeout reports and status tracking.
240
+ local state_file="sprint-state.json"
241
+ if [[ ! -f "$state_file" ]]; then
242
+ echo ""
243
+ return 0
244
+ fi
245
+
246
+ # First try to find an in-progress story
247
+ local story_key
248
+ story_key=$(jq -r '
249
+ .stories // {} | to_entries[]
250
+ | select(.value.status == "in-progress")
251
+ | .key
252
+ ' "$state_file" 2>/dev/null | head -1)
253
+
254
+ if [[ -n "$story_key" ]]; then
255
+ echo "$story_key"
256
+ return 0
257
+ fi
258
+
259
+ # Fall back to the first ready-for-dev story
260
+ story_key=$(jq -r '
261
+ .stories // {} | to_entries[]
262
+ | select(.value.status == "ready-for-dev")
263
+ | .key
264
+ ' "$state_file" 2>/dev/null | head -1)
265
+
266
+ echo "${story_key:-}"
233
267
  return 0
234
268
  }
235
269
 
@@ -243,6 +277,16 @@ check_sprint_complete() {
243
277
 
244
278
  local total=0
245
279
  local done_count=0
280
+ local flagged_count=0
281
+
282
+ # Load flagged stories for comparison
283
+ local -A flagged_map
284
+ if [[ -f "$FLAGGED_STORIES_FILE" ]]; then
285
+ while IFS= read -r flagged_key; do
286
+ flagged_key=$(echo "$flagged_key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
287
+ [[ -n "$flagged_key" ]] && flagged_map["$flagged_key"]=1
288
+ done < "$FLAGGED_STORIES_FILE"
289
+ fi
246
290
 
247
291
  while IFS=: read -r key value; do
248
292
  # Trim whitespace
@@ -257,6 +301,10 @@ check_sprint_complete() {
257
301
  total=$((total + 1))
258
302
  if [[ "$value" == "done" ]]; then
259
303
  done_count=$((done_count + 1))
304
+ elif [[ -n "${flagged_map[$key]+x}" ]]; then
305
+ # Retry-exhausted/flagged stories count as "effectively done"
306
+ # — no autonomous work can be done on them
307
+ flagged_count=$((flagged_count + 1))
260
308
  fi
261
309
  fi
262
310
  done < "$SPRINT_STATUS_FILE"
@@ -265,7 +313,8 @@ check_sprint_complete() {
265
313
  return 1
266
314
  fi
267
315
 
268
- [[ $done_count -eq $total ]]
316
+ # Sprint is complete if all stories are either done or flagged (no autonomous work left)
317
+ [[ $((done_count + flagged_count)) -eq $total ]]
269
318
  }
270
319
 
271
320
  # codeharness: Replaces all_tasks_complete() with sprint-status.yaml check.
@@ -781,6 +830,13 @@ execute_iteration() {
781
830
  fi
782
831
  fi
783
832
 
833
+ # If harness-run reported NO_WORK, don't count file changes as progress.
834
+ # Writing session-issues.md with "NO_WORK" creates git diffs but is NOT real progress.
835
+ if grep -qE 'Result: NO_WORK|no actionable stories remain' "$output_file" 2>/dev/null; then
836
+ files_changed=0
837
+ log_status "INFO" "NO_WORK detected — overriding files_changed to 0 for circuit breaker"
838
+ fi
839
+
784
840
  local has_errors="false"
785
841
  # Only check non-JSON lines for errors. Stream-json output is NDJSON
786
842
  # (one JSON object per line), so any line starting with '{' is Claude
@@ -0,0 +1,18 @@
1
+ # Generated by codeharness — do not edit manually
2
+ name: codeharness-collector
3
+
4
+ services:
5
+ otel-collector:
6
+ image: otel/opentelemetry-collector-contrib:0.96.0
7
+ labels:
8
+ com.codeharness.stack: collector
9
+ ports:
10
+ - "4317:4317"
11
+ - "4318:4318"
12
+ volumes:
13
+ - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro
14
+ restart: unless-stopped
15
+
16
+ networks:
17
+ default:
18
+ name: codeharness-collector-net
@@ -0,0 +1,55 @@
1
+ # Generated by codeharness — do not edit manually
2
+ receivers:
3
+ otlp:
4
+ protocols:
5
+ grpc:
6
+ endpoint: 0.0.0.0:4317
7
+ http:
8
+ endpoint: 0.0.0.0:4318
9
+
10
+ processors:
11
+ resource/default:
12
+ attributes:
13
+ - key: service.name
14
+ value: "unknown"
15
+ action: insert
16
+
17
+ exporters:
18
+ otlphttp/logs:
19
+ endpoint: http://victoria-logs:9428/insert/opentelemetry
20
+ tls:
21
+ insecure: true
22
+
23
+ prometheusremotewrite:
24
+ endpoint: http://victoria-metrics:8428/api/v1/write
25
+ tls:
26
+ insecure: true
27
+
28
+ otlphttp/traces:
29
+ endpoint: http://victoria-traces:4318
30
+ tls:
31
+ insecure: true
32
+
33
+ service:
34
+ pipelines:
35
+ logs:
36
+ receivers:
37
+ - otlp
38
+ processors:
39
+ - resource/default
40
+ exporters:
41
+ - otlphttp/logs
42
+ metrics:
43
+ receivers:
44
+ - otlp
45
+ processors:
46
+ - resource/default
47
+ exporters:
48
+ - prometheusremotewrite
49
+ traces:
50
+ receivers:
51
+ - otlp
52
+ processors:
53
+ - resource/default
54
+ exporters:
55
+ - otlphttp/traces
@@ -0,0 +1,55 @@
1
+ # Generated by codeharness — do not edit manually
2
+ receivers:
3
+ otlp:
4
+ protocols:
5
+ grpc:
6
+ endpoint: 0.0.0.0:4317
7
+ http:
8
+ endpoint: 0.0.0.0:4318
9
+
10
+ processors:
11
+ resource/default:
12
+ attributes:
13
+ - key: service.name
14
+ value: "unknown"
15
+ action: insert
16
+
17
+ exporters:
18
+ otlphttp/logs:
19
+ endpoint: {{LOGS_URL}}/insert/opentelemetry
20
+ tls:
21
+ insecure: true
22
+
23
+ prometheusremotewrite:
24
+ endpoint: {{METRICS_URL}}/api/v1/write
25
+ tls:
26
+ insecure: true
27
+
28
+ otlphttp/traces:
29
+ endpoint: {{TRACES_URL}}
30
+ tls:
31
+ insecure: true
32
+
33
+ service:
34
+ pipelines:
35
+ logs:
36
+ receivers:
37
+ - otlp
38
+ processors:
39
+ - resource/default
40
+ exporters:
41
+ - otlphttp/logs
42
+ metrics:
43
+ receivers:
44
+ - otlp
45
+ processors:
46
+ - resource/default
47
+ exporters:
48
+ - prometheusremotewrite
49
+ traces:
50
+ receivers:
51
+ - otlp
52
+ processors:
53
+ - resource/default
54
+ exporters:
55
+ - otlphttp/traces
@@ -0,0 +1,57 @@
1
+ # Generated by codeharness — do not edit manually
2
+ name: codeharness-shared
3
+
4
+ services:
5
+ victoria-logs:
6
+ image: victoriametrics/victoria-logs:v1.15.0-victorialogs
7
+ labels:
8
+ com.codeharness.stack: shared
9
+ ports:
10
+ - "9428:9428"
11
+ volumes:
12
+ - victoria-logs-data:/vlogs
13
+ restart: unless-stopped
14
+
15
+ victoria-metrics:
16
+ image: victoriametrics/victoria-metrics:v1.106.1
17
+ labels:
18
+ com.codeharness.stack: shared
19
+ ports:
20
+ - "8428:8428"
21
+ volumes:
22
+ - victoria-metrics-data:/victoria-metrics-data
23
+ restart: unless-stopped
24
+
25
+ victoria-traces:
26
+ image: jaegertracing/all-in-one:1.56
27
+ labels:
28
+ com.codeharness.stack: shared
29
+ ports:
30
+ - "14268:14268"
31
+ - "16686:16686"
32
+ environment:
33
+ - COLLECTOR_OTLP_ENABLED=true
34
+ restart: unless-stopped
35
+
36
+ otel-collector:
37
+ image: otel/opentelemetry-collector-contrib:0.96.0
38
+ labels:
39
+ com.codeharness.stack: shared
40
+ ports:
41
+ - "4317:4317"
42
+ - "4318:4318"
43
+ volumes:
44
+ - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro
45
+ depends_on:
46
+ - victoria-logs
47
+ - victoria-metrics
48
+ - victoria-traces
49
+ restart: unless-stopped
50
+
51
+ volumes:
52
+ victoria-logs-data:
53
+ victoria-metrics-data:
54
+
55
+ networks:
56
+ default:
57
+ name: codeharness-shared-net
@@ -0,0 +1,13 @@
1
+ # Base image — pinned version for reproducibility
2
+ FROM node:22-slim
3
+
4
+ # System utilities for verification
5
+ RUN apt-get update && apt-get install -y --no-install-recommends bash curl jq git && rm -rf /var/lib/apt/lists/*
6
+
7
+ # Install project binary (update this for your project)
8
+ RUN npm install -g placeholder && npm cache clean --force
9
+
10
+ # Run as non-root user
11
+ USER node
12
+
13
+ WORKDIR /workspace
@@ -0,0 +1,15 @@
1
+ # NOTE: Customize COPY paths for your monorepo layout. Each build stage should only copy its own sources.
2
+ {{BUILD_STAGES}}
3
+ # === Runtime stage ===
4
+ FROM debian:bookworm-slim
5
+
6
+ # System utilities for verification
7
+ RUN apt-get update && apt-get install -y --no-install-recommends curl jq && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install artifacts from build stages
10
+ {{COPY_DIRECTIVES}}
11
+
12
+ # Run as non-root user
13
+ USER nobody
14
+
15
+ WORKDIR /workspace
@@ -0,0 +1,16 @@
1
+ # Base image — pinned version for reproducibility
2
+ FROM node:22-slim
3
+
4
+ ARG TARBALL=package.tgz
5
+
6
+ # System utilities for verification
7
+ RUN apt-get update && apt-get install -y --no-install-recommends curl jq && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install project from tarball (black-box: no source code)
10
+ COPY ${TARBALL} /tmp/${TARBALL}
11
+ RUN npm install -g /tmp/${TARBALL} && rm /tmp/${TARBALL}
12
+
13
+ # Run as non-root user
14
+ USER node
15
+
16
+ WORKDIR /workspace
@@ -0,0 +1,14 @@
1
+ # Base image — pinned version for reproducibility
2
+ FROM python:3.12-slim
3
+
4
+ # System utilities for verification
5
+ RUN apt-get update && apt-get install -y --no-install-recommends curl jq && rm -rf /var/lib/apt/lists/*
6
+
7
+ # Install project from wheel or sdist
8
+ COPY dist/ /tmp/dist/
9
+ RUN pip install /tmp/dist/*.whl && rm -rf /tmp/dist/ && pip cache purge
10
+
11
+ # Run as non-root user
12
+ USER nobody
13
+
14
+ WORKDIR /workspace
@@ -0,0 +1,24 @@
1
+ # === Builder stage ===
2
+ FROM rust:1.82-slim AS builder
3
+
4
+ WORKDIR /build
5
+
6
+ # Copy project files
7
+ COPY . .
8
+
9
+ # Build release binary
10
+ RUN cargo build --release
11
+
12
+ # === Runtime stage ===
13
+ FROM debian:bookworm-slim
14
+
15
+ # System utilities for verification
16
+ RUN apt-get update && apt-get install -y --no-install-recommends curl jq && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Install compiled binary from builder (update 'myapp' to your binary name)
19
+ COPY --from=builder /build/target/release/myapp /usr/local/bin/myapp
20
+
21
+ # Run as non-root user
22
+ USER nobody
23
+
24
+ WORKDIR /workspace
@@ -0,0 +1,36 @@
1
+ # {{PROJECT_NAME}}
2
+
3
+ ## Quick Start
4
+
5
+ ```bash
6
+ # Install
7
+ {{INSTALL_COMMAND}}
8
+
9
+ # Initialize the project
10
+ codeharness init
11
+
12
+ # Check project status
13
+ codeharness status
14
+ ```
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ {{INSTALL_COMMAND}}
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ After installation, initialize {{PROJECT_NAME}} in your project directory:
25
+
26
+ ```bash
27
+ codeharness init
28
+ ```
29
+
30
+ This sets up the harness with stack detection, observability, and documentation scaffolding.
31
+
32
+ ## CLI Reference
33
+
34
+ ```
35
+ {{CLI_HELP_OUTPUT}}
36
+ ```