job-forge 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex/config.toml +8 -0
- package/.cursor/mcp.json +21 -0
- package/.cursor/rules/main.mdc +519 -0
- package/.mcp.json +21 -0
- package/.opencode/agents/general-free.md +85 -0
- package/.opencode/agents/general-paid.md +39 -0
- package/.opencode/agents/glm-minimal.md +50 -0
- package/.opencode/skills/job-forge.md +185 -0
- package/AGENTS.md +514 -0
- package/CLAUDE.md +514 -0
- package/LICENSE +21 -0
- package/README.md +195 -0
- package/batch/README.md +60 -0
- package/batch/batch-prompt.md +399 -0
- package/batch/batch-runner.sh +673 -0
- package/bin/create-job-forge.mjs +375 -0
- package/bin/job-forge.mjs +120 -0
- package/bin/sync.mjs +141 -0
- package/config/profile.example.yml +67 -0
- package/cv-sync-check.mjs +128 -0
- package/dedup-tracker.mjs +201 -0
- package/docs/ARCHITECTURE.md +220 -0
- package/docs/CUSTOMIZATION.md +101 -0
- package/docs/MODEL-ROUTING.md +195 -0
- package/docs/README.md +54 -0
- package/docs/SETUP.md +186 -0
- package/docs/demo.gif +0 -0
- package/fonts/dm-sans-latin-ext.woff2 +0 -0
- package/fonts/dm-sans-latin.woff2 +0 -0
- package/fonts/space-grotesk-latin-ext.woff2 +0 -0
- package/fonts/space-grotesk-latin.woff2 +0 -0
- package/generate-pdf.mjs +168 -0
- package/iso/agents/general-free.md +90 -0
- package/iso/agents/general-paid.md +44 -0
- package/iso/agents/glm-minimal.md +55 -0
- package/iso/commands/job-forge.md +188 -0
- package/iso/config.json +7 -0
- package/iso/instructions.md +514 -0
- package/iso/mcp.json +15 -0
- package/merge-tracker.mjs +377 -0
- package/modes/README.md +30 -0
- package/modes/_shared-calibration.md +26 -0
- package/modes/_shared.md +272 -0
- package/modes/apply.md +257 -0
- package/modes/auto-pipeline.md +70 -0
- package/modes/batch.md +110 -0
- package/modes/compare.md +23 -0
- package/modes/contact.md +82 -0
- package/modes/deep.md +99 -0
- package/modes/followup.md +68 -0
- package/modes/negotiation.md +146 -0
- package/modes/offer.md +199 -0
- package/modes/pdf.md +121 -0
- package/modes/pipeline.md +83 -0
- package/modes/project.md +30 -0
- package/modes/rejection.md +92 -0
- package/modes/scan.md +185 -0
- package/modes/tracker.md +31 -0
- package/modes/training.md +27 -0
- package/normalize-statuses.mjs +152 -0
- package/opencode.json +28 -0
- package/package.json +78 -0
- package/scripts/add-tags.mjs +894 -0
- package/scripts/cursor-agent-loop.sh +211 -0
- package/scripts/cursor-agent-stream-format.py +134 -0
- package/scripts/next-num.mjs +33 -0
- package/scripts/release/check-source.mjs +37 -0
- package/scripts/render-report-header.mjs +78 -0
- package/scripts/session-report.mjs +129 -0
- package/scripts/slugify.mjs +27 -0
- package/scripts/today.mjs +20 -0
- package/scripts/token-usage-report.mjs +315 -0
- package/scripts/tracker-line.mjs +67 -0
- package/scripts/verify-greenhouse-urls.mjs +195 -0
- package/templates/cv-template.html +395 -0
- package/templates/portals.example.yml +3140 -0
- package/templates/states.yml +62 -0
- package/tracker-lib.mjs +257 -0
- package/verify-pipeline.mjs +267 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# job-forge batch runner — standalone orchestrator for opencode run workers
|
|
5
|
+
# Reads batch-input.tsv, delegates each offer to an opencode run worker,
|
|
6
|
+
# tracks state in batch-state.tsv for resumability.
|
|
7
|
+
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
+
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
10
|
+
BATCH_DIR="$SCRIPT_DIR"
|
|
11
|
+
INPUT_FILE="$BATCH_DIR/batch-input.tsv"
|
|
12
|
+
STATE_FILE="$BATCH_DIR/batch-state.tsv"
|
|
13
|
+
PROMPT_FILE="$BATCH_DIR/batch-prompt.md"
|
|
14
|
+
LOGS_DIR="$BATCH_DIR/logs"
|
|
15
|
+
TRACKER_DIR="$BATCH_DIR/tracker-additions"
|
|
16
|
+
REPORTS_DIR="$PROJECT_DIR/reports"
|
|
17
|
+
APPLICATIONS_FILE="$PROJECT_DIR/data/applications.md"
|
|
18
|
+
LOCK_FILE="$BATCH_DIR/batch-runner.pid"
|
|
19
|
+
|
|
20
|
+
# Defaults
|
|
21
|
+
PARALLEL=1
|
|
22
|
+
DRY_RUN=false
|
|
23
|
+
RETRY_FAILED=false
|
|
24
|
+
START_FROM=0
|
|
25
|
+
MAX_RETRIES=2
|
|
26
|
+
# Bundle size: each worker processes N offers sequentially in one opencode run.
|
|
27
|
+
# Amortizes the ~10K-token system prompt + tool schema prefix across N offers
|
|
28
|
+
# instead of paying it per-offer. Set to 1 for legacy per-offer mode.
|
|
29
|
+
BUNDLE_SIZE=5
|
|
30
|
+
|
|
31
|
+
usage() {
|
|
32
|
+
cat <<'USAGE'
|
|
33
|
+
job-forge batch runner — process job offers in batch via opencode run workers
|
|
34
|
+
Uses your default opencode model.
|
|
35
|
+
|
|
36
|
+
Usage: batch-runner.sh [OPTIONS]
|
|
37
|
+
|
|
38
|
+
Options:
|
|
39
|
+
--parallel N Number of parallel workers (default: 1)
|
|
40
|
+
--bundle-size N Offers per worker invocation (default: 5, use 1 for
|
|
41
|
+
legacy per-offer mode). Each worker processes N
|
|
42
|
+
offers sequentially, amortizing the system prompt.
|
|
43
|
+
--dry-run Show what would be processed, don't execute
|
|
44
|
+
--retry-failed Only retry offers marked as "failed" in state
|
|
45
|
+
--start-from N Start from offer ID N (skip earlier IDs)
|
|
46
|
+
--max-retries N Max retry attempts per offer (default: 2)
|
|
47
|
+
-h, --help Show this help
|
|
48
|
+
|
|
49
|
+
Files:
|
|
50
|
+
batch-input.tsv Input offers (id, url, source, notes)
|
|
51
|
+
batch-state.tsv Processing state (auto-managed)
|
|
52
|
+
batch-prompt.md Prompt template for workers
|
|
53
|
+
logs/ Per-offer logs
|
|
54
|
+
tracker-additions/ Tracker lines for post-batch merge
|
|
55
|
+
|
|
56
|
+
Examples:
|
|
57
|
+
# Dry run to see pending offers
|
|
58
|
+
./batch-runner.sh --dry-run
|
|
59
|
+
|
|
60
|
+
# Process all pending
|
|
61
|
+
./batch-runner.sh
|
|
62
|
+
|
|
63
|
+
# Retry only failed offers
|
|
64
|
+
./batch-runner.sh --retry-failed
|
|
65
|
+
|
|
66
|
+
# Process 2 at a time starting from ID 10
|
|
67
|
+
./batch-runner.sh --parallel 2 --start-from 10
|
|
68
|
+
USAGE
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Parse arguments
|
|
72
|
+
while [[ $# -gt 0 ]]; do
|
|
73
|
+
case "$1" in
|
|
74
|
+
--parallel) PARALLEL="$2"; shift 2 ;;
|
|
75
|
+
--bundle-size) BUNDLE_SIZE="$2"; shift 2 ;;
|
|
76
|
+
--dry-run) DRY_RUN=true; shift ;;
|
|
77
|
+
--retry-failed) RETRY_FAILED=true; shift ;;
|
|
78
|
+
--start-from) START_FROM="$2"; shift 2 ;;
|
|
79
|
+
--max-retries) MAX_RETRIES="$2"; shift 2 ;;
|
|
80
|
+
-h|--help) usage; exit 0 ;;
|
|
81
|
+
*) echo "Unknown option: $1"; usage; exit 1 ;;
|
|
82
|
+
esac
|
|
83
|
+
done
|
|
84
|
+
|
|
85
|
+
# Lock file to prevent double execution
|
|
86
|
+
acquire_lock() {
|
|
87
|
+
if [[ -f "$LOCK_FILE" ]]; then
|
|
88
|
+
local old_pid
|
|
89
|
+
old_pid=$(cat "$LOCK_FILE")
|
|
90
|
+
if kill -0 "$old_pid" 2>/dev/null; then
|
|
91
|
+
echo "ERROR: Another batch-runner is already running (PID $old_pid)"
|
|
92
|
+
echo "If this is stale, remove $LOCK_FILE"
|
|
93
|
+
exit 1
|
|
94
|
+
else
|
|
95
|
+
echo "WARN: Stale lock file found (PID $old_pid not running). Removing."
|
|
96
|
+
rm -f "$LOCK_FILE"
|
|
97
|
+
fi
|
|
98
|
+
fi
|
|
99
|
+
echo $$ > "$LOCK_FILE"
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
release_lock() {
|
|
103
|
+
rm -f "$LOCK_FILE"
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
trap release_lock EXIT
|
|
107
|
+
|
|
108
|
+
# Validate prerequisites
|
|
109
|
+
check_prerequisites() {
|
|
110
|
+
if [[ ! -f "$INPUT_FILE" ]]; then
|
|
111
|
+
echo "ERROR: $INPUT_FILE not found. Add offers first."
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
if [[ ! -f "$PROMPT_FILE" ]]; then
|
|
116
|
+
echo "ERROR: $PROMPT_FILE not found."
|
|
117
|
+
exit 1
|
|
118
|
+
fi
|
|
119
|
+
|
|
120
|
+
if ! command -v opencode &>/dev/null; then
|
|
121
|
+
echo "ERROR: 'opencode' CLI not found in PATH."
|
|
122
|
+
exit 1
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
mkdir -p "$LOGS_DIR" "$TRACKER_DIR" "$REPORTS_DIR"
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Initialize state file if it doesn't exist
|
|
129
|
+
init_state() {
|
|
130
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
131
|
+
printf 'id\turl\tstatus\tstarted_at\tcompleted_at\treport_num\tscore\terror\tretries\n' > "$STATE_FILE"
|
|
132
|
+
fi
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
# Get status of an offer from state file
|
|
136
|
+
get_status() {
|
|
137
|
+
local id="$1"
|
|
138
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
139
|
+
echo "none"
|
|
140
|
+
return
|
|
141
|
+
fi
|
|
142
|
+
local status
|
|
143
|
+
status=$(awk -F'\t' -v id="$id" '$1 == id { print $3 }' "$STATE_FILE")
|
|
144
|
+
echo "${status:-none}"
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# Get retry count for an offer
|
|
148
|
+
get_retries() {
|
|
149
|
+
local id="$1"
|
|
150
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
151
|
+
echo "0"
|
|
152
|
+
return
|
|
153
|
+
fi
|
|
154
|
+
local retries
|
|
155
|
+
retries=$(awk -F'\t' -v id="$id" '$1 == id { print $9 }' "$STATE_FILE")
|
|
156
|
+
echo "${retries:-0}"
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# Calculate next report number
|
|
160
|
+
next_report_num() {
|
|
161
|
+
local max_num=0
|
|
162
|
+
if [[ -d "$REPORTS_DIR" ]]; then
|
|
163
|
+
for f in "$REPORTS_DIR"/*.md; do
|
|
164
|
+
[[ -f "$f" ]] || continue
|
|
165
|
+
local basename
|
|
166
|
+
basename=$(basename "$f")
|
|
167
|
+
local num="${basename%%-*}"
|
|
168
|
+
num=$((10#$num)) # Remove leading zeros for arithmetic
|
|
169
|
+
if (( num > max_num )); then
|
|
170
|
+
max_num=$num
|
|
171
|
+
fi
|
|
172
|
+
done
|
|
173
|
+
fi
|
|
174
|
+
# Also check state file for assigned report numbers
|
|
175
|
+
if [[ -f "$STATE_FILE" ]]; then
|
|
176
|
+
while IFS=$'\t' read -r _ _ _ _ _ rnum _ _ _; do
|
|
177
|
+
[[ "$rnum" == "report_num" || "$rnum" == "-" || -z "$rnum" ]] && continue
|
|
178
|
+
local n=$((10#$rnum))
|
|
179
|
+
if (( n > max_num )); then
|
|
180
|
+
max_num=$n
|
|
181
|
+
fi
|
|
182
|
+
done < "$STATE_FILE"
|
|
183
|
+
fi
|
|
184
|
+
printf '%03d' $((max_num + 1))
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Update or insert state for an offer
|
|
188
|
+
update_state() {
|
|
189
|
+
local id="$1" url="$2" status="$3" started="$4" completed="$5" report_num="$6" score="$7" error="$8" retries="$9"
|
|
190
|
+
|
|
191
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
192
|
+
init_state
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
local tmp="$STATE_FILE.tmp"
|
|
196
|
+
local found=false
|
|
197
|
+
|
|
198
|
+
# Write header
|
|
199
|
+
head -1 "$STATE_FILE" > "$tmp"
|
|
200
|
+
|
|
201
|
+
# Process existing lines
|
|
202
|
+
while IFS=$'\t' read -r sid surl sstatus sstarted scompleted sreport sscore serror sretries; do
|
|
203
|
+
[[ "$sid" == "id" ]] && continue # skip header
|
|
204
|
+
if [[ "$sid" == "$id" ]]; then
|
|
205
|
+
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
|
|
206
|
+
"$id" "$url" "$status" "$started" "$completed" "$report_num" "$score" "$error" "$retries" >> "$tmp"
|
|
207
|
+
found=true
|
|
208
|
+
else
|
|
209
|
+
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
|
|
210
|
+
"$sid" "$surl" "$sstatus" "$sstarted" "$scompleted" "$sreport" "$sscore" "$serror" "$sretries" >> "$tmp"
|
|
211
|
+
fi
|
|
212
|
+
done < "$STATE_FILE"
|
|
213
|
+
|
|
214
|
+
if [[ "$found" == "false" ]]; then
|
|
215
|
+
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
|
|
216
|
+
"$id" "$url" "$status" "$started" "$completed" "$report_num" "$score" "$error" "$retries" >> "$tmp"
|
|
217
|
+
fi
|
|
218
|
+
|
|
219
|
+
mv "$tmp" "$STATE_FILE"
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# Process a single offer
|
|
223
|
+
process_offer() {
|
|
224
|
+
local id="$1" url="$2" source="$3" notes="$4"
|
|
225
|
+
|
|
226
|
+
local report_num
|
|
227
|
+
report_num=$(next_report_num)
|
|
228
|
+
local date
|
|
229
|
+
date=$(date +%Y-%m-%d)
|
|
230
|
+
local started_at
|
|
231
|
+
started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
232
|
+
local retries
|
|
233
|
+
retries=$(get_retries "$id")
|
|
234
|
+
local jd_file="/tmp/batch-jd-${id}.txt"
|
|
235
|
+
|
|
236
|
+
echo "--- Processing offer #$id: $url (report $report_num, attempt $((retries + 1)))"
|
|
237
|
+
|
|
238
|
+
# Mark as in-progress
|
|
239
|
+
update_state "$id" "$url" "processing" "$started_at" "-" "$report_num" "-" "-" "$retries"
|
|
240
|
+
|
|
241
|
+
# Build the prompt with placeholders replaced
|
|
242
|
+
local prompt
|
|
243
|
+
prompt="Process this job offer. Execute the full pipeline: A-F evaluation + report .md + PDF + tracker line."
|
|
244
|
+
prompt="$prompt URL: $url"
|
|
245
|
+
prompt="$prompt JD file: $jd_file"
|
|
246
|
+
prompt="$prompt Report number: $report_num"
|
|
247
|
+
prompt="$prompt Date: $date"
|
|
248
|
+
prompt="$prompt Batch ID: $id"
|
|
249
|
+
|
|
250
|
+
local log_file="$LOGS_DIR/${report_num}-${id}.log"
|
|
251
|
+
|
|
252
|
+
# Launch opencode run worker (uses default model).
|
|
253
|
+
# Pass batch-prompt.md unmodified so every worker shares a byte-identical
|
|
254
|
+
# system prompt — otherwise sed-substituted per-job values would bust the
|
|
255
|
+
# opencode prompt cache on every run. Per-job values (URL, JD file, report
|
|
256
|
+
# num, date, batch ID) are in the user message; the worker resolves the
|
|
257
|
+
# {{...}} placeholders itself by reading them from there.
|
|
258
|
+
local exit_code=0
|
|
259
|
+
opencode run \
|
|
260
|
+
--dangerously-skip-permissions \
|
|
261
|
+
--file "$PROMPT_FILE" \
|
|
262
|
+
"$prompt" \
|
|
263
|
+
> "$log_file" 2>&1 || exit_code=$?
|
|
264
|
+
|
|
265
|
+
local completed_at
|
|
266
|
+
completed_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
267
|
+
|
|
268
|
+
if [[ $exit_code -eq 0 ]]; then
|
|
269
|
+
# Try to extract score from worker output
|
|
270
|
+
local score="-"
|
|
271
|
+
local score_match
|
|
272
|
+
score_match=$(grep -oP '"score":\s*[\d.]+' "$log_file" 2>/dev/null | head -1 | grep -oP '[\d.]+' || true)
|
|
273
|
+
if [[ -n "$score_match" ]]; then
|
|
274
|
+
score="$score_match"
|
|
275
|
+
fi
|
|
276
|
+
|
|
277
|
+
update_state "$id" "$url" "completed" "$started_at" "$completed_at" "$report_num" "$score" "-" "$retries"
|
|
278
|
+
echo " ✅ Completed (score: $score, report: $report_num)"
|
|
279
|
+
else
|
|
280
|
+
retries=$((retries + 1))
|
|
281
|
+
local error_msg
|
|
282
|
+
error_msg=$(tail -5 "$log_file" 2>/dev/null | tr '\n' ' ' | cut -c1-200 || echo "Unknown error (exit code $exit_code)")
|
|
283
|
+
update_state "$id" "$url" "failed" "$started_at" "$completed_at" "$report_num" "-" "$error_msg" "$retries"
|
|
284
|
+
echo " ❌ Failed (attempt $retries, exit code $exit_code)"
|
|
285
|
+
fi
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
# Process a BUNDLE of offers in one opencode run.
|
|
289
|
+
# Amortizes the ~10K-token system prompt across N offers instead of paying
|
|
290
|
+
# it per-offer. Args: space-separated list of offer IDs.
|
|
291
|
+
process_bundle() {
|
|
292
|
+
local -a bundle_ids=("$@")
|
|
293
|
+
local count=${#bundle_ids[@]}
|
|
294
|
+
if (( count == 0 )); then return 0; fi
|
|
295
|
+
if (( count == 1 )); then
|
|
296
|
+
# Single-offer bundle is just legacy behavior — use the existing per-offer path
|
|
297
|
+
local id="${bundle_ids[0]}"
|
|
298
|
+
local row
|
|
299
|
+
row=$(awk -F'\t' -v id="$id" '$1 == id { print $0; exit }' "$INPUT_FILE")
|
|
300
|
+
IFS=$'\t' read -r _id url source notes <<< "$row"
|
|
301
|
+
process_offer "$id" "$url" "$source" "$notes"
|
|
302
|
+
return
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
local date
|
|
306
|
+
date=$(date +%Y-%m-%d)
|
|
307
|
+
local started_at
|
|
308
|
+
started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
309
|
+
|
|
310
|
+
# Build per-offer spec array
|
|
311
|
+
local spec_json="["
|
|
312
|
+
local first=true
|
|
313
|
+
local -a assigned_report_nums=()
|
|
314
|
+
local next_num
|
|
315
|
+
next_num=$(next_report_num)
|
|
316
|
+
local n=$((10#$next_num))
|
|
317
|
+
|
|
318
|
+
for id in "${bundle_ids[@]}"; do
|
|
319
|
+
local row
|
|
320
|
+
row=$(awk -F'\t' -v id="$id" '$1 == id { print $0; exit }' "$INPUT_FILE")
|
|
321
|
+
IFS=$'\t' read -r _id url source notes <<< "$row"
|
|
322
|
+
local report_num
|
|
323
|
+
report_num=$(printf '%03d' "$n")
|
|
324
|
+
n=$((n + 1))
|
|
325
|
+
assigned_report_nums+=("$report_num")
|
|
326
|
+
local jd_file="/tmp/batch-jd-${id}.txt"
|
|
327
|
+
local retries
|
|
328
|
+
retries=$(get_retries "$id")
|
|
329
|
+
|
|
330
|
+
update_state "$id" "$url" "processing" "$started_at" "-" "$report_num" "-" "-" "$retries"
|
|
331
|
+
|
|
332
|
+
if [[ "$first" == "true" ]]; then first=false; else spec_json+=","; fi
|
|
333
|
+
spec_json+=$(printf '{"id":"%s","url":"%s","jd_file":"%s","report_num":"%s","date":"%s"}' \
|
|
334
|
+
"$id" "$url" "$jd_file" "$report_num" "$date")
|
|
335
|
+
done
|
|
336
|
+
spec_json+="]"
|
|
337
|
+
|
|
338
|
+
local bundle_tag
|
|
339
|
+
bundle_tag="bundle-$(IFS='_'; echo "${bundle_ids[*]}")"
|
|
340
|
+
local log_file="$LOGS_DIR/${bundle_tag}.log"
|
|
341
|
+
echo "--- Processing bundle of $count offers: ${bundle_ids[*]}"
|
|
342
|
+
|
|
343
|
+
local prompt
|
|
344
|
+
prompt=$(cat <<EOF
|
|
345
|
+
Process these $count offers sequentially using the full pipeline in batch-prompt.md
|
|
346
|
+
(Step 1 JD retrieval → Steps 2-6 evaluate/report/PDF/tracker line). **Do each
|
|
347
|
+
offer fully before starting the next.** Continue to the next offer even if one
|
|
348
|
+
fails. After each offer, emit ONE single-line JSON on its own line with this
|
|
349
|
+
exact shape (no extra prose, no code fences around it):
|
|
350
|
+
|
|
351
|
+
{"id":"<id>","status":"completed|failed","report_num":"<num>","company":"...","role":"...","score":<num-or-null>,"pdf":"<path-or-null>","report":"<path-or-null>","error":"<msg-or-null>"}
|
|
352
|
+
|
|
353
|
+
The orchestrator parses these lines to update state — anything between status
|
|
354
|
+
JSONs is fine but do NOT omit or reorder the required keys.
|
|
355
|
+
|
|
356
|
+
Offers:
|
|
357
|
+
$spec_json
|
|
358
|
+
EOF
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
local exit_code=0
|
|
362
|
+
opencode run \
|
|
363
|
+
--dangerously-skip-permissions \
|
|
364
|
+
--file "$PROMPT_FILE" \
|
|
365
|
+
"$prompt" \
|
|
366
|
+
> "$log_file" 2>&1 || exit_code=$?
|
|
367
|
+
|
|
368
|
+
local completed_at
|
|
369
|
+
completed_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
370
|
+
|
|
371
|
+
# Parse per-offer status JSONs from the log. One per line, matching the
|
|
372
|
+
# shape above. Missing entries mean the worker didn't reach that offer —
|
|
373
|
+
# mark them as failed.
|
|
374
|
+
local -A seen=()
|
|
375
|
+
while IFS= read -r json_line; do
|
|
376
|
+
[[ "$json_line" =~ \"id\":\"([^\"]+)\" ]] || continue
|
|
377
|
+
local id="${BASH_REMATCH[1]}"
|
|
378
|
+
[[ -n "${seen[$id]:-}" ]] && continue
|
|
379
|
+
seen[$id]=1
|
|
380
|
+
local status="failed"
|
|
381
|
+
[[ "$json_line" =~ \"status\":\"completed\" ]] && status="completed"
|
|
382
|
+
local score="-"
|
|
383
|
+
if [[ "$json_line" =~ \"score\":([0-9.]+) ]]; then score="${BASH_REMATCH[1]}"; fi
|
|
384
|
+
local report_num="-"
|
|
385
|
+
if [[ "$json_line" =~ \"report_num\":\"([^\"]+)\" ]]; then report_num="${BASH_REMATCH[1]}"; fi
|
|
386
|
+
local error_msg="-"
|
|
387
|
+
if [[ "$json_line" =~ \"error\":\"([^\"]+)\" ]]; then error_msg="${BASH_REMATCH[1]}"; fi
|
|
388
|
+
local url
|
|
389
|
+
url=$(awk -F'\t' -v id="$id" '$1 == id { print $2; exit }' "$INPUT_FILE")
|
|
390
|
+
local retries
|
|
391
|
+
retries=$(get_retries "$id")
|
|
392
|
+
if [[ "$status" == "failed" ]]; then retries=$((retries + 1)); fi
|
|
393
|
+
update_state "$id" "$url" "$status" "$started_at" "$completed_at" "$report_num" "$score" "$error_msg" "$retries"
|
|
394
|
+
echo " $([ "$status" == "completed" ] && echo ✅ || echo ❌) #${id} (status=$status, score=$score, report=$report_num)"
|
|
395
|
+
done < "$log_file"
|
|
396
|
+
|
|
397
|
+
# Any offer in the bundle not seen in the output → mark failed
|
|
398
|
+
for id in "${bundle_ids[@]}"; do
|
|
399
|
+
if [[ -z "${seen[$id]:-}" ]]; then
|
|
400
|
+
local url
|
|
401
|
+
url=$(awk -F'\t' -v id="$id" '$1 == id { print $2; exit }' "$INPUT_FILE")
|
|
402
|
+
local retries
|
|
403
|
+
retries=$(get_retries "$id")
|
|
404
|
+
retries=$((retries + 1))
|
|
405
|
+
update_state "$id" "$url" "failed" "$started_at" "$completed_at" "-" "-" \
|
|
406
|
+
"Worker finished without emitting status JSON for this offer" "$retries"
|
|
407
|
+
echo " ❌ #${id} (no status emitted — worker may have stopped early)"
|
|
408
|
+
fi
|
|
409
|
+
done
|
|
410
|
+
|
|
411
|
+
if [[ $exit_code -ne 0 ]]; then
|
|
412
|
+
echo " ⚠️ Worker exit code $exit_code — see $log_file"
|
|
413
|
+
fi
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
# Merge tracker additions into applications.md
|
|
417
|
+
merge_tracker() {
|
|
418
|
+
echo ""
|
|
419
|
+
echo "=== Merging tracker additions ==="
|
|
420
|
+
node "$PROJECT_DIR/merge-tracker.mjs"
|
|
421
|
+
echo ""
|
|
422
|
+
echo "=== Verifying pipeline integrity ==="
|
|
423
|
+
node "$PROJECT_DIR/verify-pipeline.mjs" || echo "⚠️ Verification found issues (see above)"
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
# Log per-session token usage and warn on expensive sessions
|
|
427
|
+
# (Opencode has no SessionEnd hook; this is the closest substitute for batch runs.)
|
|
428
|
+
cost_report() {
|
|
429
|
+
# Only look at sessions started after this batch began. Uses --since-minutes
|
|
430
|
+
# with a generous floor so long batches are still covered.
|
|
431
|
+
local since=${1:-120}
|
|
432
|
+
echo ""
|
|
433
|
+
echo "=== Token usage (last ${since} min, warn at \$1.00) ==="
|
|
434
|
+
if command -v npx &>/dev/null; then
|
|
435
|
+
npx --no-install job-forge session-report --since-minutes "$since" --log --warn-at 1.00 \
|
|
436
|
+
|| echo "(session-report unavailable; run 'job-forge session-report' manually)"
|
|
437
|
+
fi
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
# Print summary
|
|
441
|
+
print_summary() {
|
|
442
|
+
echo ""
|
|
443
|
+
echo "=== Batch Summary ==="
|
|
444
|
+
|
|
445
|
+
if [[ ! -f "$STATE_FILE" ]]; then
|
|
446
|
+
echo "No state file found."
|
|
447
|
+
return
|
|
448
|
+
fi
|
|
449
|
+
|
|
450
|
+
local total=0 completed=0 failed=0 pending=0
|
|
451
|
+
local score_sum=0 score_count=0
|
|
452
|
+
|
|
453
|
+
while IFS=$'\t' read -r sid _ sstatus _ _ _ sscore _ _; do
|
|
454
|
+
[[ "$sid" == "id" ]] && continue
|
|
455
|
+
total=$((total + 1))
|
|
456
|
+
case "$sstatus" in
|
|
457
|
+
completed) completed=$((completed + 1))
|
|
458
|
+
if [[ "$sscore" != "-" && -n "$sscore" ]]; then
|
|
459
|
+
score_sum=$(echo "$score_sum + $sscore" | bc 2>/dev/null || echo "$score_sum")
|
|
460
|
+
score_count=$((score_count + 1))
|
|
461
|
+
fi
|
|
462
|
+
;;
|
|
463
|
+
failed) failed=$((failed + 1)) ;;
|
|
464
|
+
*) pending=$((pending + 1)) ;;
|
|
465
|
+
esac
|
|
466
|
+
done < "$STATE_FILE"
|
|
467
|
+
|
|
468
|
+
echo "Total: $total | Completed: $completed | Failed: $failed | Pending: $pending"
|
|
469
|
+
|
|
470
|
+
if (( score_count > 0 )); then
|
|
471
|
+
local avg
|
|
472
|
+
avg=$(echo "scale=1; $score_sum / $score_count" | bc 2>/dev/null || echo "N/A")
|
|
473
|
+
echo "Average score: $avg/5 ($score_count scored)"
|
|
474
|
+
fi
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
# Main
|
|
478
|
+
main() {
|
|
479
|
+
check_prerequisites
|
|
480
|
+
|
|
481
|
+
if [[ "$DRY_RUN" == "false" ]]; then
|
|
482
|
+
acquire_lock
|
|
483
|
+
fi
|
|
484
|
+
|
|
485
|
+
init_state
|
|
486
|
+
|
|
487
|
+
# Count input offers (skip header, ignore blank lines)
|
|
488
|
+
local total_input
|
|
489
|
+
total_input=$(tail -n +2 "$INPUT_FILE" | grep -c '[^[:space:]]' 2>/dev/null || true)
|
|
490
|
+
total_input="${total_input:-0}"
|
|
491
|
+
|
|
492
|
+
if (( total_input == 0 )); then
|
|
493
|
+
echo "No offers in $INPUT_FILE. Add offers first."
|
|
494
|
+
exit 0
|
|
495
|
+
fi
|
|
496
|
+
|
|
497
|
+
echo "=== job-forge batch runner ==="
|
|
498
|
+
echo "Parallel: $PARALLEL | Max retries: $MAX_RETRIES"
|
|
499
|
+
echo "Input: $total_input offers"
|
|
500
|
+
echo ""
|
|
501
|
+
|
|
502
|
+
# Build list of offers to process
|
|
503
|
+
local -a pending_ids=()
|
|
504
|
+
local -a pending_urls=()
|
|
505
|
+
local -a pending_sources=()
|
|
506
|
+
local -a pending_notes=()
|
|
507
|
+
|
|
508
|
+
while IFS=$'\t' read -r id url source notes; do
|
|
509
|
+
[[ "$id" == "id" ]] && continue # skip header
|
|
510
|
+
[[ -z "$id" || -z "$url" ]] && continue
|
|
511
|
+
|
|
512
|
+
# Skip if before start-from
|
|
513
|
+
if (( id < START_FROM )); then
|
|
514
|
+
continue
|
|
515
|
+
fi
|
|
516
|
+
|
|
517
|
+
local status
|
|
518
|
+
status=$(get_status "$id")
|
|
519
|
+
|
|
520
|
+
if [[ "$RETRY_FAILED" == "true" ]]; then
|
|
521
|
+
# Only process failed offers
|
|
522
|
+
if [[ "$status" != "failed" ]]; then
|
|
523
|
+
continue
|
|
524
|
+
fi
|
|
525
|
+
# Check retry limit
|
|
526
|
+
local retries
|
|
527
|
+
retries=$(get_retries "$id")
|
|
528
|
+
if (( retries >= MAX_RETRIES )); then
|
|
529
|
+
echo "SKIP #$id: max retries ($MAX_RETRIES) reached"
|
|
530
|
+
continue
|
|
531
|
+
fi
|
|
532
|
+
else
|
|
533
|
+
# Skip completed offers
|
|
534
|
+
if [[ "$status" == "completed" ]]; then
|
|
535
|
+
continue
|
|
536
|
+
fi
|
|
537
|
+
# Skip failed offers that hit retry limit (unless --retry-failed)
|
|
538
|
+
if [[ "$status" == "failed" ]]; then
|
|
539
|
+
local retries
|
|
540
|
+
retries=$(get_retries "$id")
|
|
541
|
+
if (( retries >= MAX_RETRIES )); then
|
|
542
|
+
echo "SKIP #$id: failed and max retries reached (use --retry-failed to force)"
|
|
543
|
+
continue
|
|
544
|
+
fi
|
|
545
|
+
fi
|
|
546
|
+
fi
|
|
547
|
+
|
|
548
|
+
pending_ids+=("$id")
|
|
549
|
+
pending_urls+=("$url")
|
|
550
|
+
pending_sources+=("$source")
|
|
551
|
+
pending_notes+=("$notes")
|
|
552
|
+
done < "$INPUT_FILE"
|
|
553
|
+
|
|
554
|
+
local pending_count=${#pending_ids[@]}
|
|
555
|
+
|
|
556
|
+
if (( pending_count == 0 )); then
|
|
557
|
+
echo "No offers to process."
|
|
558
|
+
print_summary
|
|
559
|
+
exit 0
|
|
560
|
+
fi
|
|
561
|
+
|
|
562
|
+
echo "Pending: $pending_count offers"
|
|
563
|
+
echo ""
|
|
564
|
+
|
|
565
|
+
# Dry run: just list
|
|
566
|
+
if [[ "$DRY_RUN" == "true" ]]; then
|
|
567
|
+
echo "=== DRY RUN (no processing) ==="
|
|
568
|
+
for i in "${!pending_ids[@]}"; do
|
|
569
|
+
local status
|
|
570
|
+
status=$(get_status "${pending_ids[$i]}")
|
|
571
|
+
echo " #${pending_ids[$i]}: ${pending_urls[$i]} [${pending_sources[$i]}] (status: $status)"
|
|
572
|
+
done
|
|
573
|
+
echo ""
|
|
574
|
+
echo "Would process $pending_count offers"
|
|
575
|
+
exit 0
|
|
576
|
+
fi
|
|
577
|
+
|
|
578
|
+
# Partition pending into bundles of BUNDLE_SIZE
|
|
579
|
+
local -a bundles=()
|
|
580
|
+
local b_current=""
|
|
581
|
+
local b_count=0
|
|
582
|
+
for id in "${pending_ids[@]}"; do
|
|
583
|
+
if [[ -z "$b_current" ]]; then
|
|
584
|
+
b_current="$id"
|
|
585
|
+
else
|
|
586
|
+
b_current+=" $id"
|
|
587
|
+
fi
|
|
588
|
+
b_count=$((b_count + 1))
|
|
589
|
+
if (( b_count >= BUNDLE_SIZE )); then
|
|
590
|
+
bundles+=("$b_current")
|
|
591
|
+
b_current=""
|
|
592
|
+
b_count=0
|
|
593
|
+
fi
|
|
594
|
+
done
|
|
595
|
+
if [[ -n "$b_current" ]]; then bundles+=("$b_current"); fi
|
|
596
|
+
local bundle_count=${#bundles[@]}
|
|
597
|
+
echo "Partitioned into $bundle_count bundle(s) of up to $BUNDLE_SIZE offer(s) each"
|
|
598
|
+
|
|
599
|
+
# Process bundles
|
|
600
|
+
if (( PARALLEL <= 1 )); then
|
|
601
|
+
# Sequential processing (one bundle at a time)
|
|
602
|
+
for b in "${bundles[@]}"; do
|
|
603
|
+
# shellcheck disable=SC2206
|
|
604
|
+
local -a ids_in_bundle=($b)
|
|
605
|
+
process_bundle "${ids_in_bundle[@]}"
|
|
606
|
+
done
|
|
607
|
+
else
|
|
608
|
+
# Prime the opencode prompt cache with the first bundle alone so its
|
|
609
|
+
# ~10K-token system prompt is written to cache, then remaining parallel
|
|
610
|
+
# bundles read from cache instead of each writing their own copy.
|
|
611
|
+
local start_idx=0
|
|
612
|
+
if (( bundle_count > 1 )); then
|
|
613
|
+
echo "Priming prompt cache with first bundle: ${bundles[0]}"
|
|
614
|
+
# shellcheck disable=SC2206
|
|
615
|
+
local -a prime_ids=(${bundles[0]})
|
|
616
|
+
process_bundle "${prime_ids[@]}"
|
|
617
|
+
start_idx=1
|
|
618
|
+
fi
|
|
619
|
+
|
|
620
|
+
# Parallel processing with job control
|
|
621
|
+
local running=0
|
|
622
|
+
local -a pids=()
|
|
623
|
+
local -a pid_ids=()
|
|
624
|
+
|
|
625
|
+
for i in "${!bundles[@]}"; do
|
|
626
|
+
if (( i < start_idx )); then
|
|
627
|
+
continue
|
|
628
|
+
fi
|
|
629
|
+
# Wait if we're at parallel limit
|
|
630
|
+
while (( running >= PARALLEL )); do
|
|
631
|
+
# Wait for any child to finish
|
|
632
|
+
for j in "${!pids[@]}"; do
|
|
633
|
+
if ! kill -0 "${pids[$j]}" 2>/dev/null; then
|
|
634
|
+
wait "${pids[$j]}" 2>/dev/null || true
|
|
635
|
+
unset 'pids[j]'
|
|
636
|
+
unset 'pid_ids[j]'
|
|
637
|
+
running=$((running - 1))
|
|
638
|
+
fi
|
|
639
|
+
done
|
|
640
|
+
# Compact arrays
|
|
641
|
+
pids=("${pids[@]}")
|
|
642
|
+
pid_ids=("${pid_ids[@]}")
|
|
643
|
+
sleep 1
|
|
644
|
+
done
|
|
645
|
+
|
|
646
|
+
# Launch a bundle worker in background
|
|
647
|
+
# shellcheck disable=SC2206
|
|
648
|
+
local -a ids_in_bundle=(${bundles[$i]})
|
|
649
|
+
process_bundle "${ids_in_bundle[@]}" &
|
|
650
|
+
pids+=($!)
|
|
651
|
+
pid_ids+=("bundle-${i}")
|
|
652
|
+
running=$((running + 1))
|
|
653
|
+
done
|
|
654
|
+
|
|
655
|
+
# Wait for remaining workers
|
|
656
|
+
for pid in "${pids[@]}"; do
|
|
657
|
+
wait "$pid" 2>/dev/null || true
|
|
658
|
+
done
|
|
659
|
+
fi
|
|
660
|
+
|
|
661
|
+
# Merge tracker additions
|
|
662
|
+
merge_tracker
|
|
663
|
+
|
|
664
|
+
# Print summary
|
|
665
|
+
print_summary
|
|
666
|
+
|
|
667
|
+
# Auto-log token usage for this batch to data/token-usage.tsv and
|
|
668
|
+
# flag any session that exceeded the $1 budget. No-op if opencode DB
|
|
669
|
+
# isn't available (e.g. batch ran on a CI runner without opencode).
|
|
670
|
+
cost_report 180
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
main "$@"
|