job-forge 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.codex/config.toml +8 -0
  2. package/.cursor/mcp.json +21 -0
  3. package/.cursor/rules/main.mdc +519 -0
  4. package/.mcp.json +21 -0
  5. package/.opencode/agents/general-free.md +85 -0
  6. package/.opencode/agents/general-paid.md +39 -0
  7. package/.opencode/agents/glm-minimal.md +50 -0
  8. package/.opencode/skills/job-forge.md +185 -0
  9. package/AGENTS.md +514 -0
  10. package/CLAUDE.md +514 -0
  11. package/LICENSE +21 -0
  12. package/README.md +195 -0
  13. package/batch/README.md +60 -0
  14. package/batch/batch-prompt.md +399 -0
  15. package/batch/batch-runner.sh +673 -0
  16. package/bin/create-job-forge.mjs +375 -0
  17. package/bin/job-forge.mjs +120 -0
  18. package/bin/sync.mjs +141 -0
  19. package/config/profile.example.yml +67 -0
  20. package/cv-sync-check.mjs +128 -0
  21. package/dedup-tracker.mjs +201 -0
  22. package/docs/ARCHITECTURE.md +220 -0
  23. package/docs/CUSTOMIZATION.md +101 -0
  24. package/docs/MODEL-ROUTING.md +195 -0
  25. package/docs/README.md +54 -0
  26. package/docs/SETUP.md +186 -0
  27. package/docs/demo.gif +0 -0
  28. package/fonts/dm-sans-latin-ext.woff2 +0 -0
  29. package/fonts/dm-sans-latin.woff2 +0 -0
  30. package/fonts/space-grotesk-latin-ext.woff2 +0 -0
  31. package/fonts/space-grotesk-latin.woff2 +0 -0
  32. package/generate-pdf.mjs +168 -0
  33. package/iso/agents/general-free.md +90 -0
  34. package/iso/agents/general-paid.md +44 -0
  35. package/iso/agents/glm-minimal.md +55 -0
  36. package/iso/commands/job-forge.md +188 -0
  37. package/iso/config.json +7 -0
  38. package/iso/instructions.md +514 -0
  39. package/iso/mcp.json +15 -0
  40. package/merge-tracker.mjs +377 -0
  41. package/modes/README.md +30 -0
  42. package/modes/_shared-calibration.md +26 -0
  43. package/modes/_shared.md +272 -0
  44. package/modes/apply.md +257 -0
  45. package/modes/auto-pipeline.md +70 -0
  46. package/modes/batch.md +110 -0
  47. package/modes/compare.md +23 -0
  48. package/modes/contact.md +82 -0
  49. package/modes/deep.md +99 -0
  50. package/modes/followup.md +68 -0
  51. package/modes/negotiation.md +146 -0
  52. package/modes/offer.md +199 -0
  53. package/modes/pdf.md +121 -0
  54. package/modes/pipeline.md +83 -0
  55. package/modes/project.md +30 -0
  56. package/modes/rejection.md +92 -0
  57. package/modes/scan.md +185 -0
  58. package/modes/tracker.md +31 -0
  59. package/modes/training.md +27 -0
  60. package/normalize-statuses.mjs +152 -0
  61. package/opencode.json +28 -0
  62. package/package.json +78 -0
  63. package/scripts/add-tags.mjs +894 -0
  64. package/scripts/cursor-agent-loop.sh +211 -0
  65. package/scripts/cursor-agent-stream-format.py +134 -0
  66. package/scripts/next-num.mjs +33 -0
  67. package/scripts/release/check-source.mjs +37 -0
  68. package/scripts/render-report-header.mjs +78 -0
  69. package/scripts/session-report.mjs +129 -0
  70. package/scripts/slugify.mjs +27 -0
  71. package/scripts/today.mjs +20 -0
  72. package/scripts/token-usage-report.mjs +315 -0
  73. package/scripts/tracker-line.mjs +67 -0
  74. package/scripts/verify-greenhouse-urls.mjs +195 -0
  75. package/templates/cv-template.html +395 -0
  76. package/templates/portals.example.yml +3140 -0
  77. package/templates/states.yml +62 -0
  78. package/tracker-lib.mjs +257 -0
  79. package/verify-pipeline.mjs +267 -0
@@ -0,0 +1,673 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # job-forge batch runner — standalone orchestrator for opencode run workers
5
+ # Reads batch-input.tsv, delegates each offer to an opencode run worker,
6
+ # tracks state in batch-state.tsv for resumability.
7
+
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
10
+ BATCH_DIR="$SCRIPT_DIR"
11
+ INPUT_FILE="$BATCH_DIR/batch-input.tsv"
12
+ STATE_FILE="$BATCH_DIR/batch-state.tsv"
13
+ PROMPT_FILE="$BATCH_DIR/batch-prompt.md"
14
+ LOGS_DIR="$BATCH_DIR/logs"
15
+ TRACKER_DIR="$BATCH_DIR/tracker-additions"
16
+ REPORTS_DIR="$PROJECT_DIR/reports"
17
+ APPLICATIONS_FILE="$PROJECT_DIR/data/applications.md"
18
+ LOCK_FILE="$BATCH_DIR/batch-runner.pid"
19
+
20
+ # Defaults
21
+ PARALLEL=1
22
+ DRY_RUN=false
23
+ RETRY_FAILED=false
24
+ START_FROM=0
25
+ MAX_RETRIES=2
26
+ # Bundle size: each worker processes N offers sequentially in one opencode run.
27
+ # Amortizes the ~10K-token system prompt + tool schema prefix across N offers
28
+ # instead of paying it per-offer. Set to 1 for legacy per-offer mode.
29
+ BUNDLE_SIZE=5
30
+
31
+ usage() {
32
+ cat <<'USAGE'
33
+ job-forge batch runner — process job offers in batch via opencode run workers
34
+ Uses your default opencode model.
35
+
36
+ Usage: batch-runner.sh [OPTIONS]
37
+
38
+ Options:
39
+ --parallel N Number of parallel workers (default: 1)
40
+ --bundle-size N Offers per worker invocation (default: 5, use 1 for
41
+ legacy per-offer mode). Each worker processes N
42
+ offers sequentially, amortizing the system prompt.
43
+ --dry-run Show what would be processed, don't execute
44
+ --retry-failed Only retry offers marked as "failed" in state
45
+ --start-from N Start from offer ID N (skip earlier IDs)
46
+ --max-retries N Max retry attempts per offer (default: 2)
47
+ -h, --help Show this help
48
+
49
+ Files:
50
+ batch-input.tsv Input offers (id, url, source, notes)
51
+ batch-state.tsv Processing state (auto-managed)
52
+ batch-prompt.md Prompt template for workers
53
+ logs/ Per-offer logs
54
+ tracker-additions/ Tracker lines for post-batch merge
55
+
56
+ Examples:
57
+ # Dry run to see pending offers
58
+ ./batch-runner.sh --dry-run
59
+
60
+ # Process all pending
61
+ ./batch-runner.sh
62
+
63
+ # Retry only failed offers
64
+ ./batch-runner.sh --retry-failed
65
+
66
+ # Process 2 at a time starting from ID 10
67
+ ./batch-runner.sh --parallel 2 --start-from 10
68
+ USAGE
69
+ }
70
+
71
+ # Parse arguments
72
+ while [[ $# -gt 0 ]]; do
73
+ case "$1" in
74
+ --parallel) PARALLEL="$2"; shift 2 ;;
75
+ --bundle-size) BUNDLE_SIZE="$2"; shift 2 ;;
76
+ --dry-run) DRY_RUN=true; shift ;;
77
+ --retry-failed) RETRY_FAILED=true; shift ;;
78
+ --start-from) START_FROM="$2"; shift 2 ;;
79
+ --max-retries) MAX_RETRIES="$2"; shift 2 ;;
80
+ -h|--help) usage; exit 0 ;;
81
+ *) echo "Unknown option: $1"; usage; exit 1 ;;
82
+ esac
83
+ done
84
+
85
+ # Lock file to prevent double execution
86
+ acquire_lock() {
87
+ if [[ -f "$LOCK_FILE" ]]; then
88
+ local old_pid
89
+ old_pid=$(cat "$LOCK_FILE")
90
+ if kill -0 "$old_pid" 2>/dev/null; then
91
+ echo "ERROR: Another batch-runner is already running (PID $old_pid)"
92
+ echo "If this is stale, remove $LOCK_FILE"
93
+ exit 1
94
+ else
95
+ echo "WARN: Stale lock file found (PID $old_pid not running). Removing."
96
+ rm -f "$LOCK_FILE"
97
+ fi
98
+ fi
99
+ echo $$ > "$LOCK_FILE"
100
+ }
101
+
102
+ release_lock() {
103
+ rm -f "$LOCK_FILE"
104
+ }
105
+
106
+ trap release_lock EXIT
107
+
108
+ # Validate prerequisites
109
+ check_prerequisites() {
110
+ if [[ ! -f "$INPUT_FILE" ]]; then
111
+ echo "ERROR: $INPUT_FILE not found. Add offers first."
112
+ exit 1
113
+ fi
114
+
115
+ if [[ ! -f "$PROMPT_FILE" ]]; then
116
+ echo "ERROR: $PROMPT_FILE not found."
117
+ exit 1
118
+ fi
119
+
120
+ if ! command -v opencode &>/dev/null; then
121
+ echo "ERROR: 'opencode' CLI not found in PATH."
122
+ exit 1
123
+ fi
124
+
125
+ mkdir -p "$LOGS_DIR" "$TRACKER_DIR" "$REPORTS_DIR"
126
+ }
127
+
128
+ # Initialize state file if it doesn't exist
129
+ init_state() {
130
+ if [[ ! -f "$STATE_FILE" ]]; then
131
+ printf 'id\turl\tstatus\tstarted_at\tcompleted_at\treport_num\tscore\terror\tretries\n' > "$STATE_FILE"
132
+ fi
133
+ }
134
+
135
+ # Get status of an offer from state file
136
+ get_status() {
137
+ local id="$1"
138
+ if [[ ! -f "$STATE_FILE" ]]; then
139
+ echo "none"
140
+ return
141
+ fi
142
+ local status
143
+ status=$(awk -F'\t' -v id="$id" '$1 == id { print $3 }' "$STATE_FILE")
144
+ echo "${status:-none}"
145
+ }
146
+
147
+ # Get retry count for an offer
148
+ get_retries() {
149
+ local id="$1"
150
+ if [[ ! -f "$STATE_FILE" ]]; then
151
+ echo "0"
152
+ return
153
+ fi
154
+ local retries
155
+ retries=$(awk -F'\t' -v id="$id" '$1 == id { print $9 }' "$STATE_FILE")
156
+ echo "${retries:-0}"
157
+ }
158
+
159
+ # Calculate next report number
160
+ next_report_num() {
161
+ local max_num=0
162
+ if [[ -d "$REPORTS_DIR" ]]; then
163
+ for f in "$REPORTS_DIR"/*.md; do
164
+ [[ -f "$f" ]] || continue
165
+ local basename
166
+ basename=$(basename "$f")
167
+ local num="${basename%%-*}"
168
+ num=$((10#$num)) # Remove leading zeros for arithmetic
169
+ if (( num > max_num )); then
170
+ max_num=$num
171
+ fi
172
+ done
173
+ fi
174
+ # Also check state file for assigned report numbers
175
+ if [[ -f "$STATE_FILE" ]]; then
176
+ while IFS=$'\t' read -r _ _ _ _ _ rnum _ _ _; do
177
+ [[ "$rnum" == "report_num" || "$rnum" == "-" || -z "$rnum" ]] && continue
178
+ local n=$((10#$rnum))
179
+ if (( n > max_num )); then
180
+ max_num=$n
181
+ fi
182
+ done < "$STATE_FILE"
183
+ fi
184
+ printf '%03d' $((max_num + 1))
185
+ }
186
+
187
+ # Update or insert state for an offer
188
+ update_state() {
189
+ local id="$1" url="$2" status="$3" started="$4" completed="$5" report_num="$6" score="$7" error="$8" retries="$9"
190
+
191
+ if [[ ! -f "$STATE_FILE" ]]; then
192
+ init_state
193
+ fi
194
+
195
+ local tmp="$STATE_FILE.tmp"
196
+ local found=false
197
+
198
+ # Write header
199
+ head -1 "$STATE_FILE" > "$tmp"
200
+
201
+ # Process existing lines
202
+ while IFS=$'\t' read -r sid surl sstatus sstarted scompleted sreport sscore serror sretries; do
203
+ [[ "$sid" == "id" ]] && continue # skip header
204
+ if [[ "$sid" == "$id" ]]; then
205
+ printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
206
+ "$id" "$url" "$status" "$started" "$completed" "$report_num" "$score" "$error" "$retries" >> "$tmp"
207
+ found=true
208
+ else
209
+ printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
210
+ "$sid" "$surl" "$sstatus" "$sstarted" "$scompleted" "$sreport" "$sscore" "$serror" "$sretries" >> "$tmp"
211
+ fi
212
+ done < "$STATE_FILE"
213
+
214
+ if [[ "$found" == "false" ]]; then
215
+ printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
216
+ "$id" "$url" "$status" "$started" "$completed" "$report_num" "$score" "$error" "$retries" >> "$tmp"
217
+ fi
218
+
219
+ mv "$tmp" "$STATE_FILE"
220
+ }
221
+
222
+ # Process a single offer
223
+ process_offer() {
224
+ local id="$1" url="$2" source="$3" notes="$4"
225
+
226
+ local report_num
227
+ report_num=$(next_report_num)
228
+ local date
229
+ date=$(date +%Y-%m-%d)
230
+ local started_at
231
+ started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
232
+ local retries
233
+ retries=$(get_retries "$id")
234
+ local jd_file="/tmp/batch-jd-${id}.txt"
235
+
236
+ echo "--- Processing offer #$id: $url (report $report_num, attempt $((retries + 1)))"
237
+
238
+ # Mark as in-progress
239
+ update_state "$id" "$url" "processing" "$started_at" "-" "$report_num" "-" "-" "$retries"
240
+
241
+ # Build the prompt with placeholders replaced
242
+ local prompt
243
+ prompt="Process this job offer. Execute the full pipeline: A-F evaluation + report .md + PDF + tracker line."
244
+ prompt="$prompt URL: $url"
245
+ prompt="$prompt JD file: $jd_file"
246
+ prompt="$prompt Report number: $report_num"
247
+ prompt="$prompt Date: $date"
248
+ prompt="$prompt Batch ID: $id"
249
+
250
+ local log_file="$LOGS_DIR/${report_num}-${id}.log"
251
+
252
+ # Launch opencode run worker (uses default model).
253
+ # Pass batch-prompt.md unmodified so every worker shares a byte-identical
254
+ # system prompt — otherwise sed-substituted per-job values would bust the
255
+ # opencode prompt cache on every run. Per-job values (URL, JD file, report
256
+ # num, date, batch ID) are in the user message; the worker resolves the
257
+ # {{...}} placeholders itself by reading them from there.
258
+ local exit_code=0
259
+ opencode run \
260
+ --dangerously-skip-permissions \
261
+ --file "$PROMPT_FILE" \
262
+ "$prompt" \
263
+ > "$log_file" 2>&1 || exit_code=$?
264
+
265
+ local completed_at
266
+ completed_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
267
+
268
+ if [[ $exit_code -eq 0 ]]; then
269
+ # Try to extract score from worker output
270
+ local score="-"
271
+ local score_match
272
+ score_match=$(grep -oP '"score":\s*[\d.]+' "$log_file" 2>/dev/null | head -1 | grep -oP '[\d.]+' || true)
273
+ if [[ -n "$score_match" ]]; then
274
+ score="$score_match"
275
+ fi
276
+
277
+ update_state "$id" "$url" "completed" "$started_at" "$completed_at" "$report_num" "$score" "-" "$retries"
278
+ echo " ✅ Completed (score: $score, report: $report_num)"
279
+ else
280
+ retries=$((retries + 1))
281
+ local error_msg
282
+ error_msg=$(tail -5 "$log_file" 2>/dev/null | tr '\n' ' ' | cut -c1-200 || echo "Unknown error (exit code $exit_code)")
283
+ update_state "$id" "$url" "failed" "$started_at" "$completed_at" "$report_num" "-" "$error_msg" "$retries"
284
+ echo " ❌ Failed (attempt $retries, exit code $exit_code)"
285
+ fi
286
+ }
287
+
288
+ # Process a BUNDLE of offers in one opencode run.
289
+ # Amortizes the ~10K-token system prompt across N offers instead of paying
290
+ # it per-offer. Args: space-separated list of offer IDs.
291
+ process_bundle() {
292
+ local -a bundle_ids=("$@")
293
+ local count=${#bundle_ids[@]}
294
+ if (( count == 0 )); then return 0; fi
295
+ if (( count == 1 )); then
296
+ # Single-offer bundle is just legacy behavior — use the existing per-offer path
297
+ local id="${bundle_ids[0]}"
298
+ local row
299
+ row=$(awk -F'\t' -v id="$id" '$1 == id { print $0; exit }' "$INPUT_FILE")
300
+ IFS=$'\t' read -r _id url source notes <<< "$row"
301
+ process_offer "$id" "$url" "$source" "$notes"
302
+ return
303
+ fi
304
+
305
+ local date
306
+ date=$(date +%Y-%m-%d)
307
+ local started_at
308
+ started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
309
+
310
+ # Build per-offer spec array
311
+ local spec_json="["
312
+ local first=true
313
+ local -a assigned_report_nums=()
314
+ local next_num
315
+ next_num=$(next_report_num)
316
+ local n=$((10#$next_num))
317
+
318
+ for id in "${bundle_ids[@]}"; do
319
+ local row
320
+ row=$(awk -F'\t' -v id="$id" '$1 == id { print $0; exit }' "$INPUT_FILE")
321
+ IFS=$'\t' read -r _id url source notes <<< "$row"
322
+ local report_num
323
+ report_num=$(printf '%03d' "$n")
324
+ n=$((n + 1))
325
+ assigned_report_nums+=("$report_num")
326
+ local jd_file="/tmp/batch-jd-${id}.txt"
327
+ local retries
328
+ retries=$(get_retries "$id")
329
+
330
+ update_state "$id" "$url" "processing" "$started_at" "-" "$report_num" "-" "-" "$retries"
331
+
332
+ if [[ "$first" == "true" ]]; then first=false; else spec_json+=","; fi
333
+ spec_json+=$(printf '{"id":"%s","url":"%s","jd_file":"%s","report_num":"%s","date":"%s"}' \
334
+ "$id" "$url" "$jd_file" "$report_num" "$date")
335
+ done
336
+ spec_json+="]"
337
+
338
+ local bundle_tag
339
+ bundle_tag="bundle-$(IFS='_'; echo "${bundle_ids[*]}")"
340
+ local log_file="$LOGS_DIR/${bundle_tag}.log"
341
+ echo "--- Processing bundle of $count offers: ${bundle_ids[*]}"
342
+
343
+ local prompt
344
+ prompt=$(cat <<EOF
345
+ Process these $count offers sequentially using the full pipeline in batch-prompt.md
346
+ (Step 1 JD retrieval → Steps 2-6 evaluate/report/PDF/tracker line). **Do each
347
+ offer fully before starting the next.** Continue to the next offer even if one
348
+ fails. After each offer, emit ONE single-line JSON on its own line with this
349
+ exact shape (no extra prose, no code fences around it):
350
+
351
+ {"id":"<id>","status":"completed|failed","report_num":"<num>","company":"...","role":"...","score":<num-or-null>,"pdf":"<path-or-null>","report":"<path-or-null>","error":"<msg-or-null>"}
352
+
353
+ The orchestrator parses these lines to update state — anything between status
354
+ JSONs is fine but do NOT omit or reorder the required keys.
355
+
356
+ Offers:
357
+ $spec_json
358
+ EOF
359
+ )
360
+
361
+ local exit_code=0
362
+ opencode run \
363
+ --dangerously-skip-permissions \
364
+ --file "$PROMPT_FILE" \
365
+ "$prompt" \
366
+ > "$log_file" 2>&1 || exit_code=$?
367
+
368
+ local completed_at
369
+ completed_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
370
+
371
+ # Parse per-offer status JSONs from the log. One per line, matching the
372
+ # shape above. Missing entries mean the worker didn't reach that offer —
373
+ # mark them as failed.
374
+ local -A seen=()
375
+ while IFS= read -r json_line; do
376
+ [[ "$json_line" =~ \"id\":\"([^\"]+)\" ]] || continue
377
+ local id="${BASH_REMATCH[1]}"
378
+ [[ -n "${seen[$id]:-}" ]] && continue
379
+ seen[$id]=1
380
+ local status="failed"
381
+ [[ "$json_line" =~ \"status\":\"completed\" ]] && status="completed"
382
+ local score="-"
383
+ if [[ "$json_line" =~ \"score\":([0-9.]+) ]]; then score="${BASH_REMATCH[1]}"; fi
384
+ local report_num="-"
385
+ if [[ "$json_line" =~ \"report_num\":\"([^\"]+)\" ]]; then report_num="${BASH_REMATCH[1]}"; fi
386
+ local error_msg="-"
387
+ if [[ "$json_line" =~ \"error\":\"([^\"]+)\" ]]; then error_msg="${BASH_REMATCH[1]}"; fi
388
+ local url
389
+ url=$(awk -F'\t' -v id="$id" '$1 == id { print $2; exit }' "$INPUT_FILE")
390
+ local retries
391
+ retries=$(get_retries "$id")
392
+ if [[ "$status" == "failed" ]]; then retries=$((retries + 1)); fi
393
+ update_state "$id" "$url" "$status" "$started_at" "$completed_at" "$report_num" "$score" "$error_msg" "$retries"
394
+ echo " $([ "$status" == "completed" ] && echo ✅ || echo ❌) #${id} (status=$status, score=$score, report=$report_num)"
395
+ done < "$log_file"
396
+
397
+ # Any offer in the bundle not seen in the output → mark failed
398
+ for id in "${bundle_ids[@]}"; do
399
+ if [[ -z "${seen[$id]:-}" ]]; then
400
+ local url
401
+ url=$(awk -F'\t' -v id="$id" '$1 == id { print $2; exit }' "$INPUT_FILE")
402
+ local retries
403
+ retries=$(get_retries "$id")
404
+ retries=$((retries + 1))
405
+ update_state "$id" "$url" "failed" "$started_at" "$completed_at" "-" "-" \
406
+ "Worker finished without emitting status JSON for this offer" "$retries"
407
+ echo " ❌ #${id} (no status emitted — worker may have stopped early)"
408
+ fi
409
+ done
410
+
411
+ if [[ $exit_code -ne 0 ]]; then
412
+ echo " ⚠️ Worker exit code $exit_code — see $log_file"
413
+ fi
414
+ }
415
+
416
+ # Merge tracker additions into applications.md
417
+ merge_tracker() {
418
+ echo ""
419
+ echo "=== Merging tracker additions ==="
420
+ node "$PROJECT_DIR/merge-tracker.mjs"
421
+ echo ""
422
+ echo "=== Verifying pipeline integrity ==="
423
+ node "$PROJECT_DIR/verify-pipeline.mjs" || echo "⚠️ Verification found issues (see above)"
424
+ }
425
+
426
+ # Log per-session token usage and warn on expensive sessions
427
+ # (Opencode has no SessionEnd hook; this is the closest substitute for batch runs.)
428
+ cost_report() {
429
+ # Only look at sessions started after this batch began. Uses --since-minutes
430
+ # with a generous floor so long batches are still covered.
431
+ local since=${1:-120}
432
+ echo ""
433
+ echo "=== Token usage (last ${since} min, warn at \$1.00) ==="
434
+ if command -v npx &>/dev/null; then
435
+ npx --no-install job-forge session-report --since-minutes "$since" --log --warn-at 1.00 \
436
+ || echo "(session-report unavailable; run 'job-forge session-report' manually)"
437
+ fi
438
+ }
439
+
440
+ # Print summary
441
+ print_summary() {
442
+ echo ""
443
+ echo "=== Batch Summary ==="
444
+
445
+ if [[ ! -f "$STATE_FILE" ]]; then
446
+ echo "No state file found."
447
+ return
448
+ fi
449
+
450
+ local total=0 completed=0 failed=0 pending=0
451
+ local score_sum=0 score_count=0
452
+
453
+ while IFS=$'\t' read -r sid _ sstatus _ _ _ sscore _ _; do
454
+ [[ "$sid" == "id" ]] && continue
455
+ total=$((total + 1))
456
+ case "$sstatus" in
457
+ completed) completed=$((completed + 1))
458
+ if [[ "$sscore" != "-" && -n "$sscore" ]]; then
459
+ score_sum=$(echo "$score_sum + $sscore" | bc 2>/dev/null || echo "$score_sum")
460
+ score_count=$((score_count + 1))
461
+ fi
462
+ ;;
463
+ failed) failed=$((failed + 1)) ;;
464
+ *) pending=$((pending + 1)) ;;
465
+ esac
466
+ done < "$STATE_FILE"
467
+
468
+ echo "Total: $total | Completed: $completed | Failed: $failed | Pending: $pending"
469
+
470
+ if (( score_count > 0 )); then
471
+ local avg
472
+ avg=$(echo "scale=1; $score_sum / $score_count" | bc 2>/dev/null || echo "N/A")
473
+ echo "Average score: $avg/5 ($score_count scored)"
474
+ fi
475
+ }
476
+
477
+ # Main
478
+ main() {
479
+ check_prerequisites
480
+
481
+ if [[ "$DRY_RUN" == "false" ]]; then
482
+ acquire_lock
483
+ fi
484
+
485
+ init_state
486
+
487
+ # Count input offers (skip header, ignore blank lines)
488
+ local total_input
489
+ total_input=$(tail -n +2 "$INPUT_FILE" | grep -c '[^[:space:]]' 2>/dev/null || true)
490
+ total_input="${total_input:-0}"
491
+
492
+ if (( total_input == 0 )); then
493
+ echo "No offers in $INPUT_FILE. Add offers first."
494
+ exit 0
495
+ fi
496
+
497
+ echo "=== job-forge batch runner ==="
498
+ echo "Parallel: $PARALLEL | Max retries: $MAX_RETRIES"
499
+ echo "Input: $total_input offers"
500
+ echo ""
501
+
502
+ # Build list of offers to process
503
+ local -a pending_ids=()
504
+ local -a pending_urls=()
505
+ local -a pending_sources=()
506
+ local -a pending_notes=()
507
+
508
+ while IFS=$'\t' read -r id url source notes; do
509
+ [[ "$id" == "id" ]] && continue # skip header
510
+ [[ -z "$id" || -z "$url" ]] && continue
511
+
512
+ # Skip if before start-from
513
+ if (( id < START_FROM )); then
514
+ continue
515
+ fi
516
+
517
+ local status
518
+ status=$(get_status "$id")
519
+
520
+ if [[ "$RETRY_FAILED" == "true" ]]; then
521
+ # Only process failed offers
522
+ if [[ "$status" != "failed" ]]; then
523
+ continue
524
+ fi
525
+ # Check retry limit
526
+ local retries
527
+ retries=$(get_retries "$id")
528
+ if (( retries >= MAX_RETRIES )); then
529
+ echo "SKIP #$id: max retries ($MAX_RETRIES) reached"
530
+ continue
531
+ fi
532
+ else
533
+ # Skip completed offers
534
+ if [[ "$status" == "completed" ]]; then
535
+ continue
536
+ fi
537
+ # Skip failed offers that hit retry limit (unless --retry-failed)
538
+ if [[ "$status" == "failed" ]]; then
539
+ local retries
540
+ retries=$(get_retries "$id")
541
+ if (( retries >= MAX_RETRIES )); then
542
+ echo "SKIP #$id: failed and max retries reached (use --retry-failed to force)"
543
+ continue
544
+ fi
545
+ fi
546
+ fi
547
+
548
+ pending_ids+=("$id")
549
+ pending_urls+=("$url")
550
+ pending_sources+=("$source")
551
+ pending_notes+=("$notes")
552
+ done < "$INPUT_FILE"
553
+
554
+ local pending_count=${#pending_ids[@]}
555
+
556
+ if (( pending_count == 0 )); then
557
+ echo "No offers to process."
558
+ print_summary
559
+ exit 0
560
+ fi
561
+
562
+ echo "Pending: $pending_count offers"
563
+ echo ""
564
+
565
+ # Dry run: just list
566
+ if [[ "$DRY_RUN" == "true" ]]; then
567
+ echo "=== DRY RUN (no processing) ==="
568
+ for i in "${!pending_ids[@]}"; do
569
+ local status
570
+ status=$(get_status "${pending_ids[$i]}")
571
+ echo " #${pending_ids[$i]}: ${pending_urls[$i]} [${pending_sources[$i]}] (status: $status)"
572
+ done
573
+ echo ""
574
+ echo "Would process $pending_count offers"
575
+ exit 0
576
+ fi
577
+
578
+ # Partition pending into bundles of BUNDLE_SIZE
579
+ local -a bundles=()
580
+ local b_current=""
581
+ local b_count=0
582
+ for id in "${pending_ids[@]}"; do
583
+ if [[ -z "$b_current" ]]; then
584
+ b_current="$id"
585
+ else
586
+ b_current+=" $id"
587
+ fi
588
+ b_count=$((b_count + 1))
589
+ if (( b_count >= BUNDLE_SIZE )); then
590
+ bundles+=("$b_current")
591
+ b_current=""
592
+ b_count=0
593
+ fi
594
+ done
595
+ if [[ -n "$b_current" ]]; then bundles+=("$b_current"); fi
596
+ local bundle_count=${#bundles[@]}
597
+ echo "Partitioned into $bundle_count bundle(s) of up to $BUNDLE_SIZE offer(s) each"
598
+
599
+ # Process bundles
600
+ if (( PARALLEL <= 1 )); then
601
+ # Sequential processing (one bundle at a time)
602
+ for b in "${bundles[@]}"; do
603
+ # shellcheck disable=SC2206
604
+ local -a ids_in_bundle=($b)
605
+ process_bundle "${ids_in_bundle[@]}"
606
+ done
607
+ else
608
+ # Prime the opencode prompt cache with the first bundle alone so its
609
+ # ~10K-token system prompt is written to cache, then remaining parallel
610
+ # bundles read from cache instead of each writing their own copy.
611
+ local start_idx=0
612
+ if (( bundle_count > 1 )); then
613
+ echo "Priming prompt cache with first bundle: ${bundles[0]}"
614
+ # shellcheck disable=SC2206
615
+ local -a prime_ids=(${bundles[0]})
616
+ process_bundle "${prime_ids[@]}"
617
+ start_idx=1
618
+ fi
619
+
620
+ # Parallel processing with job control
621
+ local running=0
622
+ local -a pids=()
623
+ local -a pid_ids=()
624
+
625
+ for i in "${!bundles[@]}"; do
626
+ if (( i < start_idx )); then
627
+ continue
628
+ fi
629
+ # Wait if we're at parallel limit
630
+ while (( running >= PARALLEL )); do
631
+ # Wait for any child to finish
632
+ for j in "${!pids[@]}"; do
633
+ if ! kill -0 "${pids[$j]}" 2>/dev/null; then
634
+ wait "${pids[$j]}" 2>/dev/null || true
635
+ unset 'pids[j]'
636
+ unset 'pid_ids[j]'
637
+ running=$((running - 1))
638
+ fi
639
+ done
640
+ # Compact arrays
641
+ pids=("${pids[@]}")
642
+ pid_ids=("${pid_ids[@]}")
643
+ sleep 1
644
+ done
645
+
646
+ # Launch a bundle worker in background
647
+ # shellcheck disable=SC2206
648
+ local -a ids_in_bundle=(${bundles[$i]})
649
+ process_bundle "${ids_in_bundle[@]}" &
650
+ pids+=($!)
651
+ pid_ids+=("bundle-${i}")
652
+ running=$((running + 1))
653
+ done
654
+
655
+ # Wait for remaining workers
656
+ for pid in "${pids[@]}"; do
657
+ wait "$pid" 2>/dev/null || true
658
+ done
659
+ fi
660
+
661
+ # Merge tracker additions
662
+ merge_tracker
663
+
664
+ # Print summary
665
+ print_summary
666
+
667
+ # Auto-log token usage for this batch to data/token-usage.tsv and
668
+ # flag any session that exceeded the $1 budget. No-op if opencode DB
669
+ # isn't available (e.g. batch ran on a CI runner without opencode).
670
+ cost_report 180
671
+ }
672
+
673
+ main "$@"