job-forge 2.14.12 → 2.14.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex/config.toml +1 -2
- package/.cursor/mcp.json +1 -6
- package/.cursor/rules/main.mdc +4 -1
- package/.mcp.json +1 -6
- package/.opencode/skills/job-forge.md +8 -3
- package/AGENTS.md +4 -1
- package/CLAUDE.md +4 -1
- package/README.md +4 -1
- package/batch/README.md +9 -2
- package/batch/batch-runner.sh +18 -2
- package/docs/ARCHITECTURE.md +11 -7
- package/docs/SETUP.md +1 -1
- package/iso/commands/job-forge.md +8 -3
- package/iso/instructions.md +4 -1
- package/iso/mcp.json +1 -2
- package/modes/apply.md +5 -2
- package/modes/batch.md +9 -0
- package/opencode.json +0 -3
- package/package.json +3 -2
- package/scripts/batch-orchestrator.mjs +886 -0
- package/scripts/telemetry.mjs +256 -20
package/.codex/config.toml
CHANGED
|
@@ -25,6 +25,5 @@ args = ["-y", "@razroo/gmail-mcp"]
|
|
|
25
25
|
env = { DISABLE_HTTP = "true" }
|
|
26
26
|
|
|
27
27
|
[mcp_servers.state-trace]
|
|
28
|
-
command = "
|
|
29
|
-
args = ["--from", "state-trace[mcp]", "state-trace-mcp"]
|
|
28
|
+
command = "state-trace-mcp"
|
|
30
29
|
env = { STATE_TRACE_STORAGE_PATH = ".state-trace/memory.db", STATE_TRACE_NAMESPACE = "job-forge", STATE_TRACE_CAPACITY_LIMIT = "256" }
|
package/.cursor/mcp.json
CHANGED
|
@@ -18,12 +18,7 @@
|
|
|
18
18
|
}
|
|
19
19
|
},
|
|
20
20
|
"state-trace": {
|
|
21
|
-
"command": "
|
|
22
|
-
"args": [
|
|
23
|
-
"--from",
|
|
24
|
-
"state-trace[mcp]",
|
|
25
|
-
"state-trace-mcp"
|
|
26
|
-
],
|
|
21
|
+
"command": "state-trace-mcp",
|
|
27
22
|
"env": {
|
|
28
23
|
"STATE_TRACE_STORAGE_PATH": ".state-trace/memory.db",
|
|
29
24
|
"STATE_TRACE_NAMESPACE": "job-forge",
|
package/.cursor/rules/main.mdc
CHANGED
|
@@ -12,7 +12,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
12
12
|
- [H1] Max 2 parallel `task` dispatches per message. For N jobs, run `ceil(N/2)` sequential rounds of 2. A round is not complete until both subagents return a final outcome (`APPLIED`, `APPLY FAILED`, `SKIP`, `Discarded`, or a written TSV path). A `task` tool result that only gives a session id / title is a launch acknowledgement, not completion. Applies in all modes, for all user phrasings ("urgent", "apply to 10 jobs now").
|
|
13
13
|
why: each subagent requires post-cleanup and racing more than 2 reliably loses at least one result. On 2026-04-25 the orchestrator launched round 2 while round 1 had only returned task ids, leaving four application subagents in flight and losing two provider recoveries
|
|
14
14
|
|
|
15
|
-
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch.
|
|
15
|
+
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch and pick a replacement from the remaining candidate list. Do not count duplicates toward a requested "apply to N jobs" total, and do not delegate obvious duplicates just so a subagent can return SKIP.
|
|
16
16
|
why: 2026-04 same-day batch collision — when two batches target the same role, `npx job-forge merge` updates the existing day-file row rather than appending, so grepping day files alone misses earlier-batch applies; merged/*.tsv is the only place the breadcrumb remains
|
|
17
17
|
|
|
18
18
|
- [H3] Before every batch of `task` dispatches that will use Geometra, call `geometra_list_sessions` then `geometra_disconnect({closeBrowser: true})`. Every round, no exceptions. Name this cleanup as an explicit "step 0" in your first-response plan for any multi-apply request — it is the most frequently skipped guardrail in practice, and skipping it produces cascade "Not connected" failures on the next dispatch.
|
|
@@ -56,6 +56,9 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
56
56
|
- [D6] Pick the mode from the **Routing** table below AND name it explicitly in your first response (e.g., "running auto-pipeline mode", "this is a `compare` request"). If no row matches the user's intent, ask which mode fits; do not guess.
|
|
57
57
|
why: silent mode picks mis-route work (a "negotiation" question answered in `offer` mode produces the wrong report shape); naming the mode out loud makes the routing decision reviewable and gives downstream dispatches a reliable anchor
|
|
58
58
|
|
|
59
|
+
- [D7] For standalone `batch` runs, prefer `batch/batch-runner.sh` instead of hand-rolling the loop. It delegates to `@razroo/iso-orchestrator`, persists workflow records in `.jobforge-runs/`, caps bundle fan-out, and mutexes state/report-number writes. Use `JOBFORGE_LEGACY_BATCH_RUNNER=1` only as a fallback.
|
|
60
|
+
why: the old Bash loop encoded resumability and parallelism manually; the iso-orchestrator path makes the durable control state inspectable and prevents report-number collisions under parallel bundles
|
|
61
|
+
|
|
59
62
|
## Procedure
|
|
60
63
|
|
|
61
64
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
package/.mcp.json
CHANGED
|
@@ -18,12 +18,7 @@
|
|
|
18
18
|
}
|
|
19
19
|
},
|
|
20
20
|
"state-trace": {
|
|
21
|
-
"command": "
|
|
22
|
-
"args": [
|
|
23
|
-
"--from",
|
|
24
|
-
"state-trace[mcp]",
|
|
25
|
-
"state-trace-mcp"
|
|
26
|
-
],
|
|
21
|
+
"command": "state-trace-mcp",
|
|
27
22
|
"env": {
|
|
28
23
|
"STATE_TRACE_STORAGE_PATH": ".state-trace/memory.db",
|
|
29
24
|
"STATE_TRACE_NAMESPACE": "job-forge",
|
|
@@ -137,13 +137,18 @@ When the user says "apply to N jobs", "process the pipeline", or similar, execut
|
|
|
137
137
|
|
|
138
138
|
```
|
|
139
139
|
Step 1 — Enumerate candidates
|
|
140
|
-
- Grep data/applications
|
|
140
|
+
- Grep data/applications/*.md for status "Evaluated" without loading every file into context
|
|
141
141
|
- Also read data/pipeline.md for unprocessed URLs
|
|
142
142
|
- Build ordered list: candidates = [job_1, job_2, ..., job_N]
|
|
143
143
|
|
|
144
144
|
Step 2 — Dedup against already-applied
|
|
145
|
-
- For each candidate,
|
|
146
|
-
|
|
145
|
+
- For each candidate, grep all four sources for URL and company+role:
|
|
146
|
+
data/pipeline.md, data/applications/*.md, batch/tracker-additions/*.tsv,
|
|
147
|
+
batch/tracker-additions/merged/*.tsv
|
|
148
|
+
- Drop any APPLIED / Applied match before counting toward N. Never re-apply.
|
|
149
|
+
- If a subagent later returns SKIP because it found a duplicate, treat that as
|
|
150
|
+
a missed preflight check; finish the current round, re-run dedupe, then pick
|
|
151
|
+
a replacement from the remaining candidates.
|
|
147
152
|
|
|
148
153
|
Step 3 — Pre-flight cleanup (once, before the loop)
|
|
149
154
|
- geometra_list_sessions()
|
package/AGENTS.md
CHANGED
|
@@ -7,7 +7,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
7
7
|
- [H1] Max 2 parallel `task` dispatches per message. For N jobs, run `ceil(N/2)` sequential rounds of 2. A round is not complete until both subagents return a final outcome (`APPLIED`, `APPLY FAILED`, `SKIP`, `Discarded`, or a written TSV path). A `task` tool result that only gives a session id / title is a launch acknowledgement, not completion. Applies in all modes, for all user phrasings ("urgent", "apply to 10 jobs now").
|
|
8
8
|
why: each subagent requires post-cleanup and racing more than 2 reliably loses at least one result. On 2026-04-25 the orchestrator launched round 2 while round 1 had only returned task ids, leaving four application subagents in flight and losing two provider recoveries
|
|
9
9
|
|
|
10
|
-
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch.
|
|
10
|
+
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch and pick a replacement from the remaining candidate list. Do not count duplicates toward a requested "apply to N jobs" total, and do not delegate obvious duplicates just so a subagent can return SKIP.
|
|
11
11
|
why: 2026-04 same-day batch collision — when two batches target the same role, `npx job-forge merge` updates the existing day-file row rather than appending, so grepping day files alone misses earlier-batch applies; merged/*.tsv is the only place the breadcrumb remains
|
|
12
12
|
|
|
13
13
|
- [H3] Before every batch of `task` dispatches that will use Geometra, call `geometra_list_sessions` then `geometra_disconnect({closeBrowser: true})`. Every round, no exceptions. Name this cleanup as an explicit "step 0" in your first-response plan for any multi-apply request — it is the most frequently skipped guardrail in practice, and skipping it produces cascade "Not connected" failures on the next dispatch.
|
|
@@ -51,6 +51,9 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
51
51
|
- [D6] Pick the mode from the **Routing** table below AND name it explicitly in your first response (e.g., "running auto-pipeline mode", "this is a `compare` request"). If no row matches the user's intent, ask which mode fits; do not guess.
|
|
52
52
|
why: silent mode picks mis-route work (a "negotiation" question answered in `offer` mode produces the wrong report shape); naming the mode out loud makes the routing decision reviewable and gives downstream dispatches a reliable anchor
|
|
53
53
|
|
|
54
|
+
- [D7] For standalone `batch` runs, prefer `batch/batch-runner.sh` instead of hand-rolling the loop. It delegates to `@razroo/iso-orchestrator`, persists workflow records in `.jobforge-runs/`, caps bundle fan-out, and mutexes state/report-number writes. Use `JOBFORGE_LEGACY_BATCH_RUNNER=1` only as a fallback.
|
|
55
|
+
why: the old Bash loop encoded resumability and parallelism manually; the iso-orchestrator path makes the durable control state inspectable and prevents report-number collisions under parallel bundles
|
|
56
|
+
|
|
54
57
|
## Procedure
|
|
55
58
|
|
|
56
59
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
package/CLAUDE.md
CHANGED
|
@@ -7,7 +7,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
7
7
|
- [H1] Max 2 parallel `task` dispatches per message. For N jobs, run `ceil(N/2)` sequential rounds of 2. A round is not complete until both subagents return a final outcome (`APPLIED`, `APPLY FAILED`, `SKIP`, `Discarded`, or a written TSV path). A `task` tool result that only gives a session id / title is a launch acknowledgement, not completion. Applies in all modes, for all user phrasings ("urgent", "apply to 10 jobs now").
|
|
8
8
|
why: each subagent requires post-cleanup and racing more than 2 reliably loses at least one result. On 2026-04-25 the orchestrator launched round 2 while round 1 had only returned task ids, leaving four application subagents in flight and losing two provider recoveries
|
|
9
9
|
|
|
10
|
-
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch.
|
|
10
|
+
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch and pick a replacement from the remaining candidate list. Do not count duplicates toward a requested "apply to N jobs" total, and do not delegate obvious duplicates just so a subagent can return SKIP.
|
|
11
11
|
why: 2026-04 same-day batch collision — when two batches target the same role, `npx job-forge merge` updates the existing day-file row rather than appending, so grepping day files alone misses earlier-batch applies; merged/*.tsv is the only place the breadcrumb remains
|
|
12
12
|
|
|
13
13
|
- [H3] Before every batch of `task` dispatches that will use Geometra, call `geometra_list_sessions` then `geometra_disconnect({closeBrowser: true})`. Every round, no exceptions. Name this cleanup as an explicit "step 0" in your first-response plan for any multi-apply request — it is the most frequently skipped guardrail in practice, and skipping it produces cascade "Not connected" failures on the next dispatch.
|
|
@@ -51,6 +51,9 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
51
51
|
- [D6] Pick the mode from the **Routing** table below AND name it explicitly in your first response (e.g., "running auto-pipeline mode", "this is a `compare` request"). If no row matches the user's intent, ask which mode fits; do not guess.
|
|
52
52
|
why: silent mode picks mis-route work (a "negotiation" question answered in `offer` mode produces the wrong report shape); naming the mode out loud makes the routing decision reviewable and gives downstream dispatches a reliable anchor
|
|
53
53
|
|
|
54
|
+
- [D7] For standalone `batch` runs, prefer `batch/batch-runner.sh` instead of hand-rolling the loop. It delegates to `@razroo/iso-orchestrator`, persists workflow records in `.jobforge-runs/`, caps bundle fan-out, and mutexes state/report-number writes. Use `JOBFORGE_LEGACY_BATCH_RUNNER=1` only as a fallback.
|
|
55
|
+
why: the old Bash loop encoded resumability and parallelism manually; the iso-orchestrator path makes the durable control state inspectable and prevents report-number collisions under parallel bundles
|
|
56
|
+
|
|
54
57
|
## Procedure
|
|
55
58
|
|
|
56
59
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
package/README.md
CHANGED
|
@@ -29,7 +29,7 @@ The scaffolded `opencode.json` already has three MCPs wired up — they launch a
|
|
|
29
29
|
|
|
30
30
|
- **Geometra** — browser automation + PDF generation
|
|
31
31
|
- **Gmail** — reads replies from recruiters
|
|
32
|
-
- **state-trace** — typed working memory for cross-session context (resumed batches, recent decisions, repeated portal quirks).
|
|
32
|
+
- **state-trace** — typed working memory for cross-session context (resumed batches, recent decisions, repeated portal quirks). Install once with `python3 -m pip install "state-trace[mcp]"`; the MCP command is `state-trace-mcp`.
|
|
33
33
|
|
|
34
34
|
`npm install` also materializes symlinks for every supported agent harness — OpenCode, Cursor, Claude Code, and Codex — so you can run `opencode`, `cursor`, `claude`, or `codex` in the same project and each picks up the shared MCP config and instructions.
|
|
35
35
|
|
|
@@ -73,6 +73,7 @@ JobForge turns opencode into a full job search command center. Instead of manual
|
|
|
73
73
|
| **Smart LinkedIn Outreach** | Reads evaluation reports to craft targeted messages using top proof points |
|
|
74
74
|
| **Portal Scanner** | 45+ companies pre-configured with fuzzy dedup for reposts |
|
|
75
75
|
| **Batch Processing** | Parallel evaluation with `opencode run` workers, with honest verification flagging |
|
|
76
|
+
| **Durable Batch Orchestration** | `batch-runner.sh` uses `@razroo/iso-orchestrator` for resumable bundle execution, bounded fan-out, mutexed state writes, and workflow records in `.jobforge-runs/`. |
|
|
76
77
|
| **Pipeline Integrity** | Automated merge, dedup, status normalization, health checks |
|
|
77
78
|
| **Cost-Aware Agent Routing** | Three subagents (`@general-free`, `@general-paid`, `@glm-minimal`) with per-task tool surfaces. On OpenCode, JobForge pins all tiers to `opencode-go/deepseek-v4-flash` so application runs avoid overloaded free-model pools. See [Subagent Routing in AGENTS.md](AGENTS.md) for the task-to-agent mapping. |
|
|
78
79
|
| **Trace + Telemetry** | `job-forge trace:*` exposes local OpenCode transcripts, and `job-forge telemetry:*` summarizes runs, child outcomes, provider errors, and pending tracker TSVs. |
|
|
@@ -144,6 +145,7 @@ my-search/
|
|
|
144
145
|
├── data/ # applications, pipeline, scan history (personal, gitignored)
|
|
145
146
|
├── reports/ # generated evaluation reports (personal, gitignored)
|
|
146
147
|
├── batch/{batch-input,batch-state}.tsv, tracker-additions/, logs/ # personal
|
|
148
|
+
├── .jobforge-runs/ # durable batch workflow records (generated)
|
|
147
149
|
├── AGENTS.md # personal overrides (opencode + codex)
|
|
148
150
|
├── CLAUDE.md # personal overrides (Claude Code), @-imports CLAUDE.harness.md
|
|
149
151
|
│
|
|
@@ -187,6 +189,7 @@ JobForge/
|
|
|
187
189
|
├── config/profile.example.yml # template for consumer's profile.yml
|
|
188
190
|
├── batch/{batch-prompt.md,batch-runner.sh} # batch orchestrator
|
|
189
191
|
├── scripts/
|
|
192
|
+
│ ├── batch-orchestrator.mjs # iso-orchestrator-backed batch control loop
|
|
190
193
|
│ ├── token-usage-report.mjs # opencode cost analyzer
|
|
191
194
|
│ └── release/check-source.mjs # version gate for npm publish
|
|
192
195
|
├── tracker-lib.mjs / merge-tracker.mjs / dedup-tracker.mjs / verify-pipeline.mjs
|
package/batch/README.md
CHANGED
|
@@ -6,13 +6,20 @@ The `batch/` folder holds the **parallel batch runner** for processing 10+ job U
|
|
|
6
6
|
|
|
7
7
|
| Path | Role |
|
|
8
8
|
|------|------|
|
|
9
|
-
| `batch-runner.sh` |
|
|
9
|
+
| `batch-runner.sh` | Compatibility entrypoint; delegates to the durable Node orchestrator by default |
|
|
10
10
|
| `batch-prompt.md` | Prompt template passed to each worker (keep evaluation and scoring instructions aligned with the canonical model in [`modes/_shared.md`](../modes/_shared.md) so batch scores match single-offer runs) |
|
|
11
11
|
| `README.md` | This file |
|
|
12
12
|
|
|
13
13
|
## Local-only files (gitignored when present)
|
|
14
14
|
|
|
15
|
-
Per [`.gitignore`](../.gitignore): `batch-input.tsv`, `batch-state.tsv`, `logs/*`,
|
|
15
|
+
Per [`.gitignore`](../.gitignore): `batch-input.tsv`, `batch-state.tsv`, `logs/*`, `tracker-additions/*.tsv`, and `.jobforge-runs/`. Empty dirs (`logs/`, `tracker-additions/`) use `.gitkeep` so the tree exists in a fresh clone.
|
|
16
|
+
|
|
17
|
+
The default runner uses `@razroo/iso-orchestrator` through
|
|
18
|
+
`scripts/batch-orchestrator.mjs`. It persists bundle steps and events in
|
|
19
|
+
`.jobforge-runs/`, caps worker fan-out with `workflow.forEach`, and serializes
|
|
20
|
+
state/report-number writes while parallel bundles run. Use
|
|
21
|
+
`JOBFORGE_LEGACY_BATCH_RUNNER=1 ./batch/batch-runner.sh` only to fall back to
|
|
22
|
+
the old shell loop.
|
|
16
23
|
|
|
17
24
|
## Input: `batch-input.tsv`
|
|
18
25
|
|
package/batch/batch-runner.sh
CHANGED
|
@@ -6,8 +6,24 @@ set -euo pipefail
|
|
|
6
6
|
# tracks state in batch-state.tsv for resumability.
|
|
7
7
|
|
|
8
8
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
-
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
10
|
-
|
|
9
|
+
PROJECT_DIR="${JOB_FORGE_PROJECT:-$(cd "$SCRIPT_DIR/.." && pwd)}"
|
|
10
|
+
|
|
11
|
+
# Default path: delegate to the durable Node orchestrator. Keep the legacy
|
|
12
|
+
# shell implementation below as an escape hatch while the new runner settles.
|
|
13
|
+
SOURCE="${BASH_SOURCE[0]}"
|
|
14
|
+
while [[ -L "$SOURCE" ]]; do
|
|
15
|
+
SOURCE_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
|
|
16
|
+
SOURCE="$(readlink "$SOURCE")"
|
|
17
|
+
[[ "$SOURCE" != /* ]] && SOURCE="$SOURCE_DIR/$SOURCE"
|
|
18
|
+
done
|
|
19
|
+
HARNESS_BATCH_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
|
|
20
|
+
HARNESS_DIR="$(cd "$HARNESS_BATCH_DIR/.." && pwd)"
|
|
21
|
+
if [[ "${JOBFORGE_LEGACY_BATCH_RUNNER:-}" != "1" && -f "$HARNESS_DIR/scripts/batch-orchestrator.mjs" ]]; then
|
|
22
|
+
export JOB_FORGE_PROJECT="$PROJECT_DIR"
|
|
23
|
+
exec node "$HARNESS_DIR/scripts/batch-orchestrator.mjs" "$@"
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
BATCH_DIR="$PROJECT_DIR/batch"
|
|
11
27
|
INPUT_FILE="$BATCH_DIR/batch-input.tsv"
|
|
12
28
|
STATE_FILE="$BATCH_DIR/batch-state.tsv"
|
|
13
29
|
PROMPT_FILE="$BATCH_DIR/batch-prompt.md"
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -131,11 +131,11 @@ For customization (archetypes, weights, tone), start with `_shared.md` and [CUST
|
|
|
131
131
|
The batch system processes multiple offers in parallel:
|
|
132
132
|
|
|
133
133
|
```
|
|
134
|
-
batch-input.tsv
|
|
135
|
-
(id, url, source, notes) (orchestrator)
|
|
136
|
-
|
|
137
|
-
batch-state.tsv
|
|
138
|
-
(
|
|
134
|
+
batch-input.tsv -> batch-runner.sh -> N x opencode run workers
|
|
135
|
+
(id, url, source, notes) (iso-orchestrator) (self-contained prompt)
|
|
136
|
+
|
|
|
137
|
+
batch-state.tsv + .jobforge-runs/
|
|
138
|
+
(progress + durable workflow record)
|
|
139
139
|
```
|
|
140
140
|
|
|
141
141
|
Each worker is a headless opencode instance (`opencode run`) that receives the full `batch-prompt.md` as context. Workers produce:
|
|
@@ -143,9 +143,13 @@ Each worker is a headless opencode instance (`opencode run`) that receives the f
|
|
|
143
143
|
- PDF
|
|
144
144
|
- Tracker TSV line
|
|
145
145
|
|
|
146
|
-
The orchestrator manages parallelism, state, retries, and resume.
|
|
146
|
+
The orchestrator manages parallelism, state, retries, and resume. The default
|
|
147
|
+
runner delegates to `scripts/batch-orchestrator.mjs`, which uses
|
|
148
|
+
`@razroo/iso-orchestrator` for bounded bundle fan-out, idempotent bundle steps,
|
|
149
|
+
and mutexed report-number/state writes. Set `JOBFORGE_LEGACY_BATCH_RUNNER=1`
|
|
150
|
+
only if you need the old shell loop.
|
|
147
151
|
|
|
148
|
-
**Local batch artifacts:** `batch/batch-input.tsv`, `batch/batch-state.tsv`, `batch/logs/`,
|
|
152
|
+
**Local batch artifacts:** `batch/batch-input.tsv`, `batch/batch-state.tsv`, `batch/logs/`, `batch/tracker-additions/*.tsv`, and `.jobforge-runs/` are created when you run the runner; they are gitignored (with `.gitkeep` in `batch/logs/` and `batch/tracker-additions/`). A fresh clone ships `batch/batch-runner.sh` and `batch/batch-prompt.md` only until you add an input file — see [`batch/README.md`](../batch/README.md) and `batch/batch-runner.sh --help` for the TSV layout and workflow.
|
|
149
153
|
|
|
150
154
|
## Data Flow
|
|
151
155
|
|
package/docs/SETUP.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
## Prerequisites
|
|
4
4
|
|
|
5
5
|
- [opencode](https://opencode.ai) installed and configured
|
|
6
|
-
- Node.js
|
|
6
|
+
- Node.js 20.6+ (for the CLI, PDF generation, tracker scripts, and durable batch orchestration)
|
|
7
7
|
- [`uv`](https://docs.astral.sh/uv/) installed (`brew install uv` on macOS, or `pipx install uv`). Used by the state-trace MCP to spawn its Python entry point on demand via `uvx`. Without `uv`, the state-trace MCP fails to start; the rest of JobForge keeps working.
|
|
8
8
|
- (Optional) Go (for the dashboard TUI) — use a toolchain that satisfies the `go` directive in [`dashboard/go.mod`](../dashboard/go.mod)
|
|
9
9
|
|
|
@@ -140,13 +140,18 @@ When the user says "apply to N jobs", "process the pipeline", or similar, execut
|
|
|
140
140
|
|
|
141
141
|
```
|
|
142
142
|
Step 1 — Enumerate candidates
|
|
143
|
-
- Grep data/applications
|
|
143
|
+
- Grep data/applications/*.md for status "Evaluated" without loading every file into context
|
|
144
144
|
- Also read data/pipeline.md for unprocessed URLs
|
|
145
145
|
- Build ordered list: candidates = [job_1, job_2, ..., job_N]
|
|
146
146
|
|
|
147
147
|
Step 2 — Dedup against already-applied
|
|
148
|
-
- For each candidate,
|
|
149
|
-
|
|
148
|
+
- For each candidate, grep all four sources for URL and company+role:
|
|
149
|
+
data/pipeline.md, data/applications/*.md, batch/tracker-additions/*.tsv,
|
|
150
|
+
batch/tracker-additions/merged/*.tsv
|
|
151
|
+
- Drop any APPLIED / Applied match before counting toward N. Never re-apply.
|
|
152
|
+
- If a subagent later returns SKIP because it found a duplicate, treat that as
|
|
153
|
+
a missed preflight check; finish the current round, re-run dedupe, then pick
|
|
154
|
+
a replacement from the remaining candidates.
|
|
150
155
|
|
|
151
156
|
Step 3 — Pre-flight cleanup (once, before the loop)
|
|
152
157
|
- geometra_list_sessions()
|
package/iso/instructions.md
CHANGED
|
@@ -7,7 +7,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
7
7
|
- [H1] Max 2 parallel `task` dispatches per message. For N jobs, run `ceil(N/2)` sequential rounds of 2. A round is not complete until both subagents return a final outcome (`APPLIED`, `APPLY FAILED`, `SKIP`, `Discarded`, or a written TSV path). A `task` tool result that only gives a session id / title is a launch acknowledgement, not completion. Applies in all modes, for all user phrasings ("urgent", "apply to 10 jobs now").
|
|
8
8
|
why: each subagent requires post-cleanup and racing more than 2 reliably loses at least one result. On 2026-04-25 the orchestrator launched round 2 while round 1 had only returned task ids, leaving four application subagents in flight and losing two provider recoveries
|
|
9
9
|
|
|
10
|
-
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch.
|
|
10
|
+
- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch and pick a replacement from the remaining candidate list. Do not count duplicates toward a requested "apply to N jobs" total, and do not delegate obvious duplicates just so a subagent can return SKIP.
|
|
11
11
|
why: 2026-04 same-day batch collision — when two batches target the same role, `npx job-forge merge` updates the existing day-file row rather than appending, so grepping day files alone misses earlier-batch applies; merged/*.tsv is the only place the breadcrumb remains
|
|
12
12
|
|
|
13
13
|
- [H3] Before every batch of `task` dispatches that will use Geometra, call `geometra_list_sessions` then `geometra_disconnect({closeBrowser: true})`. Every round, no exceptions. Name this cleanup as an explicit "step 0" in your first-response plan for any multi-apply request — it is the most frequently skipped guardrail in practice, and skipping it produces cascade "Not connected" failures on the next dispatch.
|
|
@@ -51,6 +51,9 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
51
51
|
- [D6] Pick the mode from the **Routing** table below AND name it explicitly in your first response (e.g., "running auto-pipeline mode", "this is a `compare` request"). If no row matches the user's intent, ask which mode fits; do not guess.
|
|
52
52
|
why: silent mode picks mis-route work (a "negotiation" question answered in `offer` mode produces the wrong report shape); naming the mode out loud makes the routing decision reviewable and gives downstream dispatches a reliable anchor
|
|
53
53
|
|
|
54
|
+
- [D7] For standalone `batch` runs, prefer `batch/batch-runner.sh` instead of hand-rolling the loop. It delegates to `@razroo/iso-orchestrator`, persists workflow records in `.jobforge-runs/`, caps bundle fan-out, and mutexes state/report-number writes. Use `JOBFORGE_LEGACY_BATCH_RUNNER=1` only as a fallback.
|
|
55
|
+
why: the old Bash loop encoded resumability and parallelism manually; the iso-orchestrator path makes the durable control state inspectable and prevents report-number collisions under parallel bundles
|
|
56
|
+
|
|
54
57
|
## Procedure
|
|
55
58
|
|
|
56
59
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
package/iso/mcp.json
CHANGED
package/modes/apply.md
CHANGED
|
@@ -176,7 +176,10 @@ When `location_constraints` is absent, use the prose fields:
|
|
|
176
176
|
|
|
177
177
|
```
|
|
178
178
|
Step 1 — Build the job list (N items)
|
|
179
|
-
Step 2 — Dedup:
|
|
179
|
+
Step 2 — Dedup: for each candidate, grep all four sources for the URL and for company+role:
|
|
180
|
+
data/pipeline.md, all data/applications/*.md day files,
|
|
181
|
+
batch/tracker-additions/*.tsv, batch/tracker-additions/merged/*.tsv.
|
|
182
|
+
Drop any already APPLIED before counting toward N; pick replacements from the remaining list.
|
|
180
183
|
Step 3 — geometra_list_sessions() + geometra_disconnect({closeBrowser: true}) [once, before loop]
|
|
181
184
|
Step 4 — For round in ceil(N/2):
|
|
182
185
|
pair = jobs[round*2 : round*2 + 2]
|
|
@@ -192,7 +195,7 @@ Step 6 — Reconcile outcomes (Hard Limit #6):
|
|
|
192
195
|
Step 7 — Summarize outcomes; do NOT auto-retry failures.
|
|
193
196
|
```
|
|
194
197
|
|
|
195
|
-
If a subagent fails, report it in the summary and let the user decide whether to retry. Never auto-retry — re-running a submit step risks duplicate applications.
|
|
198
|
+
If a subagent fails, report it in the summary and let the user decide whether to retry. Never auto-retry — re-running a submit step risks duplicate applications. If a subagent returns SKIP because it discovered a duplicate, treat that as a missed preflight check: finish the current round, then choose a replacement candidate only after re-running dedupe against all four sources.
|
|
196
199
|
|
|
197
200
|
**Outcome routing (Hard Limit #6 in `AGENTS.md`):**
|
|
198
201
|
- Subagents write `batch/tracker-additions/{num}-{slug}.tsv` — one TSV per job.
|
package/modes/batch.md
CHANGED
|
@@ -30,6 +30,7 @@ Each worker is a child `opencode run` with a clean 200K token context. The condu
|
|
|
30
30
|
## Read These Files
|
|
31
31
|
|
|
32
32
|
```
|
|
33
|
+
.jobforge-runs/ # Durable iso-orchestrator records (gitignored)
|
|
33
34
|
batch/
|
|
34
35
|
batch-input.tsv # URLs (from conductor or manual)
|
|
35
36
|
batch-state.tsv # Progress (auto-generated, gitignored)
|
|
@@ -66,12 +67,19 @@ d. Execute via Bash:
|
|
|
66
67
|
batch/batch-runner.sh [OPTIONS]
|
|
67
68
|
```
|
|
68
69
|
|
|
70
|
+
`batch-runner.sh` delegates to `scripts/batch-orchestrator.mjs` by default.
|
|
71
|
+
That Node runner uses `@razroo/iso-orchestrator` to persist workflow records in
|
|
72
|
+
`.jobforge-runs/`, cap bundle fan-out with `workflow.forEach`, and serialize
|
|
73
|
+
report-number/state writes while workers run in parallel. If a regression
|
|
74
|
+
requires the old shell loop, run with `JOBFORGE_LEGACY_BATCH_RUNNER=1`.
|
|
75
|
+
|
|
69
76
|
Options:
|
|
70
77
|
- `--dry-run` — list pending without executing
|
|
71
78
|
- `--retry-failed` — only retry failed ones
|
|
72
79
|
- `--start-from N` — start from ID N
|
|
73
80
|
- `--parallel N` — N workers in parallel
|
|
74
81
|
- `--max-retries N` — attempts per offer (default: 2)
|
|
82
|
+
- `--workflow-id ID` — durable workflow id (default: `jobforge-batch`)
|
|
75
83
|
|
|
76
84
|
## Read batch-state.tsv Format
|
|
77
85
|
|
|
@@ -85,6 +93,7 @@ id url status started_at completed_at report_num score error retries
|
|
|
85
93
|
## Use Resumability
|
|
86
94
|
|
|
87
95
|
- If it dies → re-run → reads `batch-state.tsv` → skips completed
|
|
96
|
+
- `.jobforge-runs/` keeps the durable run record, step outcomes, and bundle events
|
|
88
97
|
- Lock file (`batch-runner.pid`) prevents double execution
|
|
89
98
|
- Each worker is independent: failure on offer #47 does not affect the rest
|
|
90
99
|
|
package/opencode.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "job-forge",
|
|
3
|
-
"version": "2.14.
|
|
3
|
+
"version": "2.14.14",
|
|
4
4
|
"description": "AI-powered job search pipeline built on opencode",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -86,9 +86,10 @@
|
|
|
86
86
|
},
|
|
87
87
|
"license": "MIT",
|
|
88
88
|
"engines": {
|
|
89
|
-
"node": ">=
|
|
89
|
+
"node": ">=20.6.0"
|
|
90
90
|
},
|
|
91
91
|
"dependencies": {
|
|
92
|
+
"@razroo/iso-orchestrator": "^0.1.0",
|
|
92
93
|
"@razroo/iso-trace": "^0.4.0",
|
|
93
94
|
"playwright": "^1.58.1"
|
|
94
95
|
},
|