job-forge 2.14.44 → 2.14.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/general-free.md +4 -2
- package/.codex/config.toml +1 -1
- package/.cursor/mcp.json +2 -2
- package/.cursor/rules/agent-general-free.mdc +4 -2
- package/.cursor/rules/main.mdc +3 -3
- package/.mcp.json +2 -2
- package/.opencode/agents/general-free.md +4 -2
- package/.opencode/skills/job-forge.md +2 -1
- package/AGENTS.md +3 -3
- package/CLAUDE.md +3 -3
- package/bin/create-job-forge.mjs +2 -2
- package/bin/geometra-mcp-launcher.mjs +1 -1
- package/config/profile.example.yml +16 -14
- package/docs/SETUP.md +1 -1
- package/iso/agents/general-free.md +4 -2
- package/iso/commands/job-forge.md +2 -1
- package/iso/instructions.md +3 -3
- package/iso/mcp.json +2 -2
- package/modes/apply.md +6 -4
- package/modes/auto-pipeline.md +1 -1
- package/modes/pipeline.md +1 -1
- package/modes/reference-geometra.md +19 -10
- package/modes/reference-portals.md +20 -19
- package/modes/scan.md +3 -3
- package/opencode.json +2 -2
- package/package.json +2 -2
- package/scripts/check-iso-smoke.mjs +2 -2
- package/scripts/portal.mjs +36 -6
|
@@ -18,7 +18,9 @@ Call 3: geometra_connect({
|
|
|
18
18
|
isolated: true,
|
|
19
19
|
headless: true,
|
|
20
20
|
slowMo: 350,
|
|
21
|
-
|
|
21
|
+
browserMode: "stock",
|
|
22
|
+
blockDetection: true,
|
|
23
|
+
blockedSitePolicy: "manual-handoff"
|
|
22
24
|
})
|
|
23
25
|
```
|
|
24
26
|
|
|
@@ -26,7 +28,7 @@ Call 3: geometra_connect({
|
|
|
26
28
|
|
|
27
29
|
1. **Always run Call 1 and Call 2.** Do not skip Call 2 even if Call 1 returns an empty session list. `geometra_disconnect({ closeBrowser: true })` is a safe no-op on an empty pool.
|
|
28
30
|
2. **Do not reason about Call 1's output.** Don't look at it and decide "the pool looks clean, I'll skip Call 2". Just always call Call 2 next. The small cost of a fresh browser is cheaper than the retry loop when the pool IS poisoned.
|
|
29
|
-
3. **Always use `isolated: true, headless: true, slowMo: 350,
|
|
31
|
+
3. **Always use `isolated: true, headless: true, slowMo: 350, browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff"`** in Call 3. No other values. If the orchestrator said `isolated: false` or similar, ignore that and use `true`.
|
|
30
32
|
4. **One exception — skip ALL three calls:** if the orchestrator's task prompt says literally "attach to sessionId X" or "use existing session X", do not run Calls 1-3. Go straight to `geometra_page_model({ sessionId: "X" })` and proceed.
|
|
31
33
|
|
|
32
34
|
### Read Why This Exists
|
package/.codex/config.toml
CHANGED
|
@@ -18,7 +18,7 @@ model_provider = "openai"
|
|
|
18
18
|
[mcp_servers.geometra]
|
|
19
19
|
command = "npx"
|
|
20
20
|
args = ["--no-install", "job-forge", "mcp:geometra"]
|
|
21
|
-
env = { GEOMETRA_STEALTH = "
|
|
21
|
+
env = { GEOMETRA_STEALTH = "0", GEOMETRA_BROWSER = "stock" }
|
|
22
22
|
|
|
23
23
|
[mcp_servers.gmail]
|
|
24
24
|
command = "npx"
|
package/.cursor/mcp.json
CHANGED
|
@@ -17,7 +17,9 @@ Call 3: geometra_connect({
|
|
|
17
17
|
isolated: true,
|
|
18
18
|
headless: true,
|
|
19
19
|
slowMo: 350,
|
|
20
|
-
|
|
20
|
+
browserMode: "stock",
|
|
21
|
+
blockDetection: true,
|
|
22
|
+
blockedSitePolicy: "manual-handoff"
|
|
21
23
|
})
|
|
22
24
|
```
|
|
23
25
|
|
|
@@ -25,7 +27,7 @@ Call 3: geometra_connect({
|
|
|
25
27
|
|
|
26
28
|
1. **Always run Call 1 and Call 2.** Do not skip Call 2 even if Call 1 returns an empty session list. `geometra_disconnect({ closeBrowser: true })` is a safe no-op on an empty pool.
|
|
27
29
|
2. **Do not reason about Call 1's output.** Don't look at it and decide "the pool looks clean, I'll skip Call 2". Just always call Call 2 next. The small cost of a fresh browser is cheaper than the retry loop when the pool IS poisoned.
|
|
28
|
-
3. **Always use `isolated: true, headless: true, slowMo: 350,
|
|
30
|
+
3. **Always use `isolated: true, headless: true, slowMo: 350, browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff"`** in Call 3. No other values. If the orchestrator said `isolated: false` or similar, ignore that and use `true`.
|
|
29
31
|
4. **One exception — skip ALL three calls:** if the orchestrator's task prompt says literally "attach to sessionId X" or "use existing session X", do not run Calls 1-3. Go straight to `geometra_page_model({ sessionId: "X" })` and proceed.
|
|
30
32
|
|
|
31
33
|
### Read Why This Exists
|
package/.cursor/rules/main.mdc
CHANGED
|
@@ -33,8 +33,8 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
33
33
|
- [H7] Load-bearing facts passed to downstream subagents must originate from a file, not from prior subagent prose. Authoritative sources: `data/pipeline.md`, `data/scan-history.tsv`, `batch/scan-output-*.md`, `reports/{num}-*.md` with `**URL:**` / `**Score:**` headers, emitted score JSON validated by `npx job-forge score:check --input ...`, `batch/tracker-additions/*.tsv`, cached JD content returned by `npx job-forge cache:get --url ...`, source path/line pointers returned by `npx job-forge index:query ...`, materialized fact records returned by `npx job-forge facts:query ...`, selected next actions returned by `npx job-forge prioritize:select ...`, and lineage records returned by `npx job-forge lineage:explain ...`.
|
|
34
34
|
why: 2026-04-18 scan subagent returned 30 fabricated Greenhouse IDs in prose (plausible-looking, non-existent); orchestrator dispatched 30 downstream subagents that all 404'd. Subagents can hallucinate IDs, scores, and confirmation text — round-trip through a file or don't trust the value
|
|
35
35
|
|
|
36
|
-
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`
|
|
37
|
-
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions.
|
|
36
|
+
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`, `browserMode: \"stock\"`, `blockDetection: true`, and `blockedSitePolicy: \"manual-handoff\"` to every `geometra_connect` call and every Geometra auto-connect call that passes `pageUrl` or `url`." Do not transcribe `server`, `username`, `password`, or `bypass`, even if you just read them from disk.
|
|
37
|
+
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions. JobForge keeps Chromium headless by passing `headless: true`, uses Geometra MCP >=1.62.3's stock browser mode by default, and surfaces `blockedSite` metadata instead of trying to work around server-side blocks silently
|
|
38
38
|
|
|
39
39
|
- [H9] If Geometra MCP disappears, becomes unresponsive, or returns a cascade of `Not connected` after a live form-fill, inspect `.jobforge-mcp/geometra-mcp.jsonl` before guessing. Report the last `launcher_start`, `child_spawn`, `heartbeat`, `signal_received`, `child_stderr`, and `child_exit` events plus the timestamp gap from the last heartbeat. If the last event is an old heartbeat with no `signal_received` / `child_exit`, treat it as likely host SIGKILL or external process death.
|
|
40
40
|
why: OpenCode or the OS can kill the MCP server without stderr, crash logs, or core dumps. JobForge's MCP launcher writes durable lifecycle events outside MCP stdout, so silent disappearances still leave enough evidence to distinguish host kill, child crash, stderr failure, and wrapper health
|
|
@@ -70,7 +70,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
70
70
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
|
71
71
|
2. Pick and name the mode from **Routing** [D6]. No match → ask; do not guess.
|
|
72
72
|
3. Read the active mode file [D3]. Use local helpers when they can replace broad file reads, prose math, manual policy checks, or artifact reuse decisions [D8]. Decide inline vs delegated work [D1].
|
|
73
|
-
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/
|
|
73
|
+
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/browser-mode prompt hygiene [H8], MCP lifecycle log awareness [H9].
|
|
74
74
|
5. Dispatch at most 2 tasks per round [H1]; wait for final outcomes, not just task ids [H5b], then settle the round with postflight status [D8].
|
|
75
75
|
6. Keep multi-job form-filling out of the orchestrator [H4].
|
|
76
76
|
7. Cross-check subagent facts against authoritative files [H7].
|
package/.mcp.json
CHANGED
|
@@ -33,7 +33,9 @@ Call 3: geometra_connect({
|
|
|
33
33
|
isolated: true,
|
|
34
34
|
headless: true,
|
|
35
35
|
slowMo: 350,
|
|
36
|
-
|
|
36
|
+
browserMode: "stock",
|
|
37
|
+
blockDetection: true,
|
|
38
|
+
blockedSitePolicy: "manual-handoff"
|
|
37
39
|
})
|
|
38
40
|
```
|
|
39
41
|
|
|
@@ -41,7 +43,7 @@ Call 3: geometra_connect({
|
|
|
41
43
|
|
|
42
44
|
1. **Always run Call 1 and Call 2.** Do not skip Call 2 even if Call 1 returns an empty session list. `geometra_disconnect({ closeBrowser: true })` is a safe no-op on an empty pool.
|
|
43
45
|
2. **Do not reason about Call 1's output.** Don't look at it and decide "the pool looks clean, I'll skip Call 2". Just always call Call 2 next. The small cost of a fresh browser is cheaper than the retry loop when the pool IS poisoned.
|
|
44
|
-
3. **Always use `isolated: true, headless: true, slowMo: 350,
|
|
46
|
+
3. **Always use `isolated: true, headless: true, slowMo: 350, browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff"`** in Call 3. No other values. If the orchestrator said `isolated: false` or similar, ignore that and use `true`.
|
|
45
47
|
4. **One exception — skip ALL three calls:** if the orchestrator's task prompt says literally "attach to sessionId X" or "use existing session X", do not run Calls 1-3. Go straight to `geometra_page_model({ sessionId: "X" })` and proceed.
|
|
46
48
|
|
|
47
49
|
### Read Why This Exists
|
|
@@ -231,7 +231,8 @@ Step 5 — Loop in rounds of 2 (Hard Limit #1)
|
|
|
231
231
|
pair = candidates[round*2 : round*2 + 2]
|
|
232
232
|
# If proxy is configured, do not paste proxy values into prompts.
|
|
233
233
|
# Say: "Proxy is configured; read config/profile.yml and pass its
|
|
234
|
-
# top-level proxy object plus headless: true
|
|
234
|
+
# top-level proxy object plus headless: true, browserMode: "stock",
|
|
235
|
+
# blockDetection: true, and blockedSitePolicy: "manual-handoff" to every
|
|
235
236
|
# Geometra connect or auto-connect call."
|
|
236
237
|
# Dispatch 1 or 2 task() calls in ONE message (never 3+)
|
|
237
238
|
task(subagent_type=<tier per AGENTS.md routing>, prompt=<apply prompt for pair[0]>)
|
package/AGENTS.md
CHANGED
|
@@ -28,8 +28,8 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
28
28
|
- [H7] Load-bearing facts passed to downstream subagents must originate from a file, not from prior subagent prose. Authoritative sources: `data/pipeline.md`, `data/scan-history.tsv`, `batch/scan-output-*.md`, `reports/{num}-*.md` with `**URL:**` / `**Score:**` headers, emitted score JSON validated by `npx job-forge score:check --input ...`, `batch/tracker-additions/*.tsv`, cached JD content returned by `npx job-forge cache:get --url ...`, source path/line pointers returned by `npx job-forge index:query ...`, materialized fact records returned by `npx job-forge facts:query ...`, selected next actions returned by `npx job-forge prioritize:select ...`, and lineage records returned by `npx job-forge lineage:explain ...`.
|
|
29
29
|
why: 2026-04-18 scan subagent returned 30 fabricated Greenhouse IDs in prose (plausible-looking, non-existent); orchestrator dispatched 30 downstream subagents that all 404'd. Subagents can hallucinate IDs, scores, and confirmation text — round-trip through a file or don't trust the value
|
|
30
30
|
|
|
31
|
-
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`
|
|
32
|
-
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions.
|
|
31
|
+
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`, `browserMode: \"stock\"`, `blockDetection: true`, and `blockedSitePolicy: \"manual-handoff\"` to every `geometra_connect` call and every Geometra auto-connect call that passes `pageUrl` or `url`." Do not transcribe `server`, `username`, `password`, or `bypass`, even if you just read them from disk.
|
|
32
|
+
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions. JobForge keeps Chromium headless by passing `headless: true`, uses Geometra MCP >=1.62.3's stock browser mode by default, and surfaces `blockedSite` metadata instead of trying to work around server-side blocks silently
|
|
33
33
|
|
|
34
34
|
- [H9] If Geometra MCP disappears, becomes unresponsive, or returns a cascade of `Not connected` after a live form-fill, inspect `.jobforge-mcp/geometra-mcp.jsonl` before guessing. Report the last `launcher_start`, `child_spawn`, `heartbeat`, `signal_received`, `child_stderr`, and `child_exit` events plus the timestamp gap from the last heartbeat. If the last event is an old heartbeat with no `signal_received` / `child_exit`, treat it as likely host SIGKILL or external process death.
|
|
35
35
|
why: OpenCode or the OS can kill the MCP server without stderr, crash logs, or core dumps. JobForge's MCP launcher writes durable lifecycle events outside MCP stdout, so silent disappearances still leave enough evidence to distinguish host kill, child crash, stderr failure, and wrapper health
|
|
@@ -65,7 +65,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
65
65
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
|
66
66
|
2. Pick and name the mode from **Routing** [D6]. No match → ask; do not guess.
|
|
67
67
|
3. Read the active mode file [D3]. Use local helpers when they can replace broad file reads, prose math, manual policy checks, or artifact reuse decisions [D8]. Decide inline vs delegated work [D1].
|
|
68
|
-
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/
|
|
68
|
+
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/browser-mode prompt hygiene [H8], MCP lifecycle log awareness [H9].
|
|
69
69
|
5. Dispatch at most 2 tasks per round [H1]; wait for final outcomes, not just task ids [H5b], then settle the round with postflight status [D8].
|
|
70
70
|
6. Keep multi-job form-filling out of the orchestrator [H4].
|
|
71
71
|
7. Cross-check subagent facts against authoritative files [H7].
|
package/CLAUDE.md
CHANGED
|
@@ -28,8 +28,8 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
28
28
|
- [H7] Load-bearing facts passed to downstream subagents must originate from a file, not from prior subagent prose. Authoritative sources: `data/pipeline.md`, `data/scan-history.tsv`, `batch/scan-output-*.md`, `reports/{num}-*.md` with `**URL:**` / `**Score:**` headers, emitted score JSON validated by `npx job-forge score:check --input ...`, `batch/tracker-additions/*.tsv`, cached JD content returned by `npx job-forge cache:get --url ...`, source path/line pointers returned by `npx job-forge index:query ...`, materialized fact records returned by `npx job-forge facts:query ...`, selected next actions returned by `npx job-forge prioritize:select ...`, and lineage records returned by `npx job-forge lineage:explain ...`.
|
|
29
29
|
why: 2026-04-18 scan subagent returned 30 fabricated Greenhouse IDs in prose (plausible-looking, non-existent); orchestrator dispatched 30 downstream subagents that all 404'd. Subagents can hallucinate IDs, scores, and confirmation text — round-trip through a file or don't trust the value
|
|
30
30
|
|
|
31
|
-
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`
|
|
32
|
-
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions.
|
|
31
|
+
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`, `browserMode: \"stock\"`, `blockDetection: true`, and `blockedSitePolicy: \"manual-handoff\"` to every `geometra_connect` call and every Geometra auto-connect call that passes `pageUrl` or `url`." Do not transcribe `server`, `username`, `password`, or `bypass`, even if you just read them from disk.
|
|
32
|
+
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions. JobForge keeps Chromium headless by passing `headless: true`, uses Geometra MCP >=1.62.3's stock browser mode by default, and surfaces `blockedSite` metadata instead of trying to work around server-side blocks silently
|
|
33
33
|
|
|
34
34
|
- [H9] If Geometra MCP disappears, becomes unresponsive, or returns a cascade of `Not connected` after a live form-fill, inspect `.jobforge-mcp/geometra-mcp.jsonl` before guessing. Report the last `launcher_start`, `child_spawn`, `heartbeat`, `signal_received`, `child_stderr`, and `child_exit` events plus the timestamp gap from the last heartbeat. If the last event is an old heartbeat with no `signal_received` / `child_exit`, treat it as likely host SIGKILL or external process death.
|
|
35
35
|
why: OpenCode or the OS can kill the MCP server without stderr, crash logs, or core dumps. JobForge's MCP launcher writes durable lifecycle events outside MCP stdout, so silent disappearances still leave enough evidence to distinguish host kill, child crash, stderr failure, and wrapper health
|
|
@@ -65,7 +65,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
65
65
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
|
66
66
|
2. Pick and name the mode from **Routing** [D6]. No match → ask; do not guess.
|
|
67
67
|
3. Read the active mode file [D3]. Use local helpers when they can replace broad file reads, prose math, manual policy checks, or artifact reuse decisions [D8]. Decide inline vs delegated work [D1].
|
|
68
|
-
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/
|
|
68
|
+
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/browser-mode prompt hygiene [H8], MCP lifecycle log awareness [H9].
|
|
69
69
|
5. Dispatch at most 2 tasks per round [H1]; wait for final outcomes, not just task ids [H5b], then settle the round with postflight status [D8].
|
|
70
70
|
6. Keep multi-job form-filling out of the orchestrator [H4].
|
|
71
71
|
7. Cross-check subagent facts against authoritative files [H7].
|
package/bin/create-job-forge.mjs
CHANGED
|
@@ -245,8 +245,8 @@ const opencodeCfg = {
|
|
|
245
245
|
type: 'local',
|
|
246
246
|
command: ['npx', '--no-install', 'job-forge', 'mcp:geometra'],
|
|
247
247
|
environment: {
|
|
248
|
-
GEOMETRA_STEALTH: '
|
|
249
|
-
GEOMETRA_BROWSER: '
|
|
248
|
+
GEOMETRA_STEALTH: '0',
|
|
249
|
+
GEOMETRA_BROWSER: 'stock',
|
|
250
250
|
},
|
|
251
251
|
enabled: true,
|
|
252
252
|
},
|
|
@@ -5,7 +5,7 @@ import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs';
|
|
|
5
5
|
import { dirname, join, resolve } from 'node:path';
|
|
6
6
|
import { fileURLToPath } from 'node:url';
|
|
7
7
|
|
|
8
|
-
const DEFAULT_FALLBACK_PACKAGE = '@geometra/mcp@1.62.
|
|
8
|
+
const DEFAULT_FALLBACK_PACKAGE = '@geometra/mcp@1.62.3';
|
|
9
9
|
const RESOLVE_ONLY_FLAG = '--job-forge-resolve-target';
|
|
10
10
|
const DEFAULT_LOG_RELATIVE_PATH = '.jobforge-mcp/geometra-mcp.jsonl';
|
|
11
11
|
const DEFAULT_HEARTBEAT_MS = 15_000;
|
|
@@ -87,23 +87,25 @@ location_constraints:
|
|
|
87
87
|
requires_visa_sponsorship: false # true → roles in non-authorized countries are blocked unless
|
|
88
88
|
# the JD explicitly mentions visa sponsorship
|
|
89
89
|
|
|
90
|
-
# Optional outbound proxy for
|
|
91
|
-
# Uncomment and fill in to route
|
|
92
|
-
#
|
|
93
|
-
#
|
|
94
|
-
#
|
|
95
|
-
# `
|
|
96
|
-
#
|
|
97
|
-
#
|
|
90
|
+
# Optional outbound proxy for Geometra browser sessions.
|
|
91
|
+
# Uncomment and fill in to route browser traffic through a proxy you already
|
|
92
|
+
# control. JobForge never bundles proxy bandwidth and never prints these values
|
|
93
|
+
# in prompts or status text. JobForge passes `headless: true`,
|
|
94
|
+
# `browserMode: "stock"`, `blockDetection: true`, and
|
|
95
|
+
# `blockedSitePolicy: "manual-handoff"` by default so Geometra MCP >= 1.62.3
|
|
96
|
+
# keeps browser windows hidden and returns structured `blockedSite` metadata
|
|
97
|
+
# when a portal serves a challenge, CAPTCHA, access-denied page, or other
|
|
98
|
+
# manual-review state.
|
|
98
99
|
#
|
|
99
|
-
# BYO — JobForge does NOT bundle or resell proxy bandwidth.
|
|
100
|
-
#
|
|
101
|
-
# mobile hotspot, or your own SOCKS relay. Required: Geometra MCP >= 1.61.3.
|
|
100
|
+
# BYO — JobForge does NOT bundle or resell proxy bandwidth. Required:
|
|
101
|
+
# Geometra MCP >= 1.62.3.
|
|
102
102
|
#
|
|
103
103
|
# When present, the apply / scan / auto-pipeline modes thread this into every
|
|
104
|
-
# `geometra_connect` call as `proxy: {...}` alongside `headless: true
|
|
105
|
-
# `
|
|
106
|
-
#
|
|
104
|
+
# `geometra_connect` call as `proxy: {...}` alongside `headless: true`,
|
|
105
|
+
# `browserMode: "stock"`, `blockDetection: true`, and
|
|
106
|
+
# `blockedSitePolicy: "manual-handoff"`. Pooling is partitioned by proxy
|
|
107
|
+
# identity and browser mode so direct and proxied sessions never share a
|
|
108
|
+
# Chromium instance.
|
|
107
109
|
#
|
|
108
110
|
# proxy:
|
|
109
111
|
# server: "http://residential.example.com:8080" # http://, https://, or socks5://
|
package/docs/SETUP.md
CHANGED
|
@@ -214,7 +214,7 @@ Use it to identify which sessions or models are consuming the most tokens. The `
|
|
|
214
214
|
`sync-check` requires `cv.md` and `config/profile.yml` with the fields checked in `cv-sync-check.mjs`. Until you finish the profile and CV steps, that is normal.
|
|
215
215
|
|
|
216
216
|
**PDF generation fails**
|
|
217
|
-
The scaffolded `opencode.json` already registers Geometra MCP; if it's not running, check `opencode mcp list` and verify the scaffolded config under the `mcp.geometra` key — its `command` MUST be `["npx", "--no-install", "job-forge", "mcp:geometra"]`, `enabled: true`, and its `environment` should include `GEOMETRA_STEALTH=
|
|
217
|
+
The scaffolded `opencode.json` already registers Geometra MCP; if it's not running, check `opencode mcp list` and verify the scaffolded config under the `mcp.geometra` key — its `command` MUST be `["npx", "--no-install", "job-forge", "mcp:geometra"]`, `enabled: true`, and its `environment` should include `GEOMETRA_STEALTH=0` plus `GEOMETRA_BROWSER=stock`. `job-forge mcp:geometra` resolves Geometra in this order: `JOB_FORGE_GEOMETRA_MCP_PATH`, then a consumer-project override from `package.json -> jobForge.geometraMcpPath`, then `opencode.json -> mcp.geometra.environment.JOB_FORGE_GEOMETRA_MCP_PATH`, then a sibling `../geometra/mcp/dist/index.js` checkout for local JobForge development, and finally the pinned npm package. Geometra manages Chromium via its built-in proxy. JobForge passes `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` for portal sessions explicitly, keeping browser windows hidden while surfacing structured blocked-site metadata. For standalone CLI usage (outside opencode), `generate-pdf.mjs` also works with standalone Playwright/Chromium — install with `npx playwright install chromium`.
|
|
218
218
|
|
|
219
219
|
`job-forge mcp:geometra` writes MCP launcher diagnostics to `.jobforge-mcp/geometra-mcp.jsonl`. If the server silently vanishes with empty stderr, run `tail -40 .jobforge-mcp/geometra-mcp.jsonl`. A final `signal_received` event means the host sent a catchable signal, a final `child_exit` means Geometra exited, and an old final `heartbeat` with no exit/signal usually means SIGKILL or external process death. Set `JOB_FORGE_GEOMETRA_MCP_LOG_PATH` to move the log, `JOB_FORGE_GEOMETRA_MCP_LOG=0` to disable it, or `JOB_FORGE_GEOMETRA_MCP_HEARTBEAT_MS` to tune the heartbeat interval.
|
|
220
220
|
|
|
@@ -40,7 +40,9 @@ Call 3: geometra_connect({
|
|
|
40
40
|
isolated: true,
|
|
41
41
|
headless: true,
|
|
42
42
|
slowMo: 350,
|
|
43
|
-
|
|
43
|
+
browserMode: "stock",
|
|
44
|
+
blockDetection: true,
|
|
45
|
+
blockedSitePolicy: "manual-handoff"
|
|
44
46
|
})
|
|
45
47
|
```
|
|
46
48
|
|
|
@@ -48,7 +50,7 @@ Call 3: geometra_connect({
|
|
|
48
50
|
|
|
49
51
|
1. **Always run Call 1 and Call 2.** Do not skip Call 2 even if Call 1 returns an empty session list. `geometra_disconnect({ closeBrowser: true })` is a safe no-op on an empty pool.
|
|
50
52
|
2. **Do not reason about Call 1's output.** Don't look at it and decide "the pool looks clean, I'll skip Call 2". Just always call Call 2 next. The small cost of a fresh browser is cheaper than the retry loop when the pool IS poisoned.
|
|
51
|
-
3. **Always use `isolated: true, headless: true, slowMo: 350,
|
|
53
|
+
3. **Always use `isolated: true, headless: true, slowMo: 350, browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff"`** in Call 3. No other values. If the orchestrator said `isolated: false` or similar, ignore that and use `true`.
|
|
52
54
|
4. **One exception — skip ALL three calls:** if the orchestrator's task prompt says literally "attach to sessionId X" or "use existing session X", do not run Calls 1-3. Go straight to `geometra_page_model({ sessionId: "X" })` and proceed.
|
|
53
55
|
|
|
54
56
|
### Read Why This Exists
|
|
@@ -234,7 +234,8 @@ Step 5 — Loop in rounds of 2 (Hard Limit #1)
|
|
|
234
234
|
pair = candidates[round*2 : round*2 + 2]
|
|
235
235
|
# If proxy is configured, do not paste proxy values into prompts.
|
|
236
236
|
# Say: "Proxy is configured; read config/profile.yml and pass its
|
|
237
|
-
# top-level proxy object plus headless: true
|
|
237
|
+
# top-level proxy object plus headless: true, browserMode: "stock",
|
|
238
|
+
# blockDetection: true, and blockedSitePolicy: "manual-handoff" to every
|
|
238
239
|
# Geometra connect or auto-connect call."
|
|
239
240
|
# Dispatch 1 or 2 task() calls in ONE message (never 3+)
|
|
240
241
|
task(subagent_type=<tier per AGENTS.md routing>, prompt=<apply prompt for pair[0]>)
|
package/iso/instructions.md
CHANGED
|
@@ -28,8 +28,8 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
28
28
|
- [H7] Load-bearing facts passed to downstream subagents must originate from a file, not from prior subagent prose. Authoritative sources: `data/pipeline.md`, `data/scan-history.tsv`, `batch/scan-output-*.md`, `reports/{num}-*.md` with `**URL:**` / `**Score:**` headers, emitted score JSON validated by `npx job-forge score:check --input ...`, `batch/tracker-additions/*.tsv`, cached JD content returned by `npx job-forge cache:get --url ...`, source path/line pointers returned by `npx job-forge index:query ...`, materialized fact records returned by `npx job-forge facts:query ...`, selected next actions returned by `npx job-forge prioritize:select ...`, and lineage records returned by `npx job-forge lineage:explain ...`.
|
|
29
29
|
why: 2026-04-18 scan subagent returned 30 fabricated Greenhouse IDs in prose (plausible-looking, non-existent); orchestrator dispatched 30 downstream subagents that all 404'd. Subagents can hallucinate IDs, scores, and confirmation text — round-trip through a file or don't trust the value
|
|
30
30
|
|
|
31
|
-
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`
|
|
32
|
-
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions.
|
|
31
|
+
- [H8] Never paste proxy values from `config/profile.yml` into `task` prompts, status text, or summaries. If a proxy is configured, tell the subagent exactly: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`, `browserMode: \"stock\"`, `blockDetection: true`, and `blockedSitePolicy: \"manual-handoff\"` to every `geometra_connect` call and every Geometra auto-connect call that passes `pageUrl` or `url`." Do not transcribe `server`, `username`, `password`, or `bypass`, even if you just read them from disk.
|
|
32
|
+
why: a 2026-04-25 OpenCode trace showed raw proxy credentials copied into an apply subagent prompt; trace logs are local, but prompts must still avoid replicating secrets across subagent sessions. JobForge keeps Chromium headless by passing `headless: true`, uses Geometra MCP >=1.62.3's stock browser mode by default, and surfaces `blockedSite` metadata instead of trying to work around server-side blocks silently
|
|
33
33
|
|
|
34
34
|
- [H9] If Geometra MCP disappears, becomes unresponsive, or returns a cascade of `Not connected` after a live form-fill, inspect `.jobforge-mcp/geometra-mcp.jsonl` before guessing. Report the last `launcher_start`, `child_spawn`, `heartbeat`, `signal_received`, `child_stderr`, and `child_exit` events plus the timestamp gap from the last heartbeat. If the last event is an old heartbeat with no `signal_received` / `child_exit`, treat it as likely host SIGKILL or external process death.
|
|
35
35
|
why: OpenCode or the OS can kill the MCP server without stderr, crash logs, or core dumps. JobForge's MCP launcher writes durable lifecycle events outside MCP stdout, so silent disappearances still leave enough evidence to distinguish host kill, child crash, stderr failure, and wrapper health
|
|
@@ -65,7 +65,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
|
|
|
65
65
|
1. Check `cv.md`, `profile.yml`, and `portals.yml`; onboard if any file is missing.
|
|
66
66
|
2. Pick and name the mode from **Routing** [D6]. No match → ask; do not guess.
|
|
67
67
|
3. Read the active mode file [D3]. Use local helpers when they can replace broad file reads, prose math, manual policy checks, or artifact reuse decisions [D8]. Decide inline vs delegated work [D1].
|
|
68
|
-
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/
|
|
68
|
+
4. Prepare Geometra dispatches: cleanup [H3], local-helper prefilters when useful [D8], dedupe [H2], location filter [D5], file-backed preflight plan/check [D8], routing [D2], proxy/headless/browser-mode prompt hygiene [H8], MCP lifecycle log awareness [H9].
|
|
69
69
|
5. Dispatch at most 2 tasks per round [H1]; wait for final outcomes, not just task ids [H5b], then settle the round with postflight status [D8].
|
|
70
70
|
6. Keep multi-job form-filling out of the orchestrator [H4].
|
|
71
71
|
7. Cross-check subagent facts against authoritative files [H7].
|
package/iso/mcp.json
CHANGED
package/modes/apply.md
CHANGED
|
@@ -42,8 +42,8 @@ Live application assistant. Reads the active application form in Chrome (via Geo
|
|
|
42
42
|
- [D6] Use `fieldLabel` over `fieldId` everywhere it works.
|
|
43
43
|
why: labels are stable across DOM refreshes; IDs are regenerated
|
|
44
44
|
|
|
45
|
-
- [D7] If the orchestrator says a proxy is configured, read the top-level `proxy:` block from `config/profile.yml` and pass that object plus `headless: true`
|
|
46
|
-
why:
|
|
45
|
+
- [D7] If the orchestrator says a proxy is configured, read the top-level `proxy:` block from `config/profile.yml` and pass that object plus `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` into every `geometra_connect` call — including Call 3 of the recovery sequence — and every Geometra auto-connect call that passes `pageUrl` or `url`. If the task prompt includes a legacy inline `proxy` object, pass it through and still set the same headless/browser/block-detection options, but do not echo credentials in status text. If absent, run with `headless: true`, `browserMode: "stock"`, `blockDetection: true`, `blockedSitePolicy: "manual-handoff"`, and no proxy; never invent a proxy URL.
|
|
46
|
+
why: Geometra MCP >=1.62.3 keeps stock Chromium as the default browser mode, preserves headless operation when `headless: true` is explicit, and returns structured `blockedSite` / `manualHandoff` metadata for challenge, CAPTCHA, access-denied, and unsupported-browser states. JobForge should surface those states instead of silently looping on blocked portals. See "BYO Proxy + Block Detection" in modes/reference-portals.md.
|
|
47
47
|
|
|
48
48
|
- [D8] Upgrade application routing to `@general-paid` when the offer score is ≥ 4.0/5, the user flags "top-tier", "dream job", or "high-stakes", or the candidate is late-stage/post-screen.
|
|
49
49
|
why: high-stakes applications need the quality-sensitive prompt and medium reasoning budget even though OpenCode now routes both application tiers through DeepSeek V4 Flash by default
|
|
@@ -53,7 +53,7 @@ Live application assistant. Reads the active application form in Chrome (via Geo
|
|
|
53
53
|
|
|
54
54
|
## Procedure
|
|
55
55
|
|
|
56
|
-
1. `geometra_connect`: `headless: true`, `
|
|
56
|
+
1. `geometra_connect`: `headless: true`, `browserMode: "stock"`, `isolated: true`, `blockDetection: true`, `blockedSitePolicy: "manual-handoff"` [D7].
|
|
57
57
|
2. Run `geometra_page_model`; do not WebFetch the URL [D5].
|
|
58
58
|
3. If Geometra is unavailable, ask for screenshot or pasted text [D2].
|
|
59
59
|
4. Extract company + role; Grep `reports/` for a matching evaluation.
|
|
@@ -356,7 +356,9 @@ Call 3: geometra_connect({
|
|
|
356
356
|
isolated: true,
|
|
357
357
|
headless: true,
|
|
358
358
|
slowMo: 350,
|
|
359
|
-
|
|
359
|
+
browserMode: "stock",
|
|
360
|
+
blockDetection: true,
|
|
361
|
+
blockedSitePolicy: "manual-handoff",
|
|
360
362
|
proxy: <pass through from task prompt if present; omit otherwise>
|
|
361
363
|
})
|
|
362
364
|
Call 4: geometra_run_actions({
|
package/modes/auto-pipeline.md
CHANGED
|
@@ -9,7 +9,7 @@ Fetch the JD content once. If the input is a **URL** (not pasted JD text), fetch
|
|
|
9
9
|
**Pick exactly one method, in this priority order:**
|
|
10
10
|
|
|
11
11
|
1. **Greenhouse JSON API (first try, if the URL is Greenhouse-backed):** If the pipeline.md entry carries `| gh={slug}/{id}` OR the URL host matches `*.greenhouse.io` / a known Greenhouse customer front-end (`*.pinterestcareers.com`, `okta.com/company/careers/opportunity/*`, `samsara.com/company/careers/roles/*`, `zoominfo.com/careers?gh_jid=*`, `collibra.com/.../?gh_jid=*`, `careers.toasttab.com/jobs?gh_jid=*`, `careers.airbnb.com/positions/*?gh_jid=*`, `coinbase.com/careers/positions/*?gh_jid=*`, `instacart.careers/job/?gh_jid=*`), extract `slug` and `id` and WebFetch `https://boards-api.greenhouse.io/v1/boards/{slug}/jobs/{id}`. 200 + JSON with `content` is the authoritative JD. 404 = genuinely closed (mark CLOSED and stop). **OpenCode WebFetch compatibility:** do not pass `format: "json"`; omit `format` or use `format: "text"` and parse the returned JSON text. **If 200, STOP — do not fall back to Geometra or WebFetch of the front-end.** The API is faster, cheaper (no Geometra session), and never returns a bot-shell.
|
|
12
|
-
2. **Direct Geometra helper:** Most non-Greenhouse job portals (Lever, Ashby, Workday) are SPAs. Use `npx job-forge portal:snapshot --url "{url}" --json` to render and read the page model/snapshot. This helper enforces `headless: true`, `
|
|
12
|
+
2. **Direct Geometra helper:** Most non-Greenhouse job portals (Lever, Ashby, Workday) are SPAs. Use `npx job-forge portal:snapshot --url "{url}" --json` to render and read the page model/snapshot. This helper enforces `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `isolated: true` in code, reads `config/profile.yml` proxy config, returns `blockedSite` metadata when detected, and closes Chromium before exit. **If this returns non-empty JD text, STOP — do not WebFetch the same URL.**
|
|
13
13
|
3. **WebFetch (only if Geometra is unavailable OR returned only a shell with no JD text):** For static pages (ZipRecruiter, WeLoveProduct, company career pages).
|
|
14
14
|
4. **WebSearch (only if methods 1–3 all failed):** Search for the role title + company on secondary portals that index the JD in static HTML.
|
|
15
15
|
|
package/modes/pipeline.md
CHANGED
|
@@ -34,7 +34,7 @@ Processes accumulated job offer URLs from `data/pipeline.md`. The user adds URLs
|
|
|
34
34
|
## Detect JD From URL
|
|
35
35
|
|
|
36
36
|
1. **Greenhouse JSON API (FIRST, when the entry has `| gh={slug}/{id}` OR the host looks Greenhouse-backed):** WebFetch `https://boards-api.greenhouse.io/v1/boards/{slug}/jobs/{id}`. 200 + JSON with `content` = LIVE, use it as the JD; 404 = genuinely CLOSED (mark `- [!]` and continue). **OpenCode WebFetch compatibility:** do not pass `format: "json"`; omit `format` or use `format: "text"` and parse the returned JSON text. Bot-hostile customer fronts (`pinterestcareers.com`, `okta.com`, `samsara.com`, `zoominfo.com`, `collibra.com`, `careers.toasttab.com`, `careers.airbnb.com`, `coinbase.com`, `instacart.careers`, `careers.toasttab.com`) MUST be verified via this API first — WebFetch/Geometra of those domains returns a shell or 403 and causes false CLOSED marks.
|
|
37
|
-
2. **Direct Geometra helper:** `npx job-forge portal:snapshot --url "{url}" --json`. Works with non-Greenhouse SPAs (Lever, Ashby, Workday), enforces `headless: true`, `
|
|
37
|
+
2. **Direct Geometra helper:** `npx job-forge portal:snapshot --url "{url}" --json`. Works with non-Greenhouse SPAs (Lever, Ashby, Workday), enforces `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `isolated: true` in code, reads `config/profile.yml` proxy config, returns `blockedSite` metadata when detected, and closes Chromium before exit.
|
|
38
38
|
3. **Geometra MCP (interactive fallback):** Use only when the one-shot helper is not enough and a live multi-step browser session is required.
|
|
39
39
|
4. **WebFetch (fallback):** For static pages or when Geometra is not available.
|
|
40
40
|
5. **WebSearch (last resort):** Search on secondary portals that index the JD.
|
|
@@ -40,7 +40,7 @@ These blocks come from two distinct root causes and require different responses:
|
|
|
40
40
|
| Class | Root cause | Recoverable in-session? | Fix |
|
|
41
41
|
|---|---|---|---|
|
|
42
42
|
| **A. React-validation lag** | programmatic text input didn't fire composition events; React marks required fields internally missing even though values look correct | Yes | Refill with `imeFriendly: true` and resubmit once. |
|
|
43
|
-
| **B.
|
|
43
|
+
| **B. Server-side block** | portal rejects the session after inspecting network/browser/session signals | No (in headless) | Mark `Failed` with note "Ashby blocked session"; preserve `blockedSite` details when present and recommend manual submit from the user's own browser. |
|
|
44
44
|
|
|
45
45
|
**How to tell them apart:** if you saw `invalidCount > 0` and the "required field" error BEFORE submit, class A is likely — retry with `imeFriendly: true`. If the form filled perfectly clean (`invalidCount: 0` on every step) and the spam flag fires only on submit, class B is likely — Ashby's "Learn more" dialog cites VPN/proxy, ad blockers, shared/public network, which `imeFriendly` cannot influence.
|
|
46
46
|
|
|
@@ -50,7 +50,7 @@ These blocks come from two distinct root causes and require different responses:
|
|
|
50
50
|
|
|
51
51
|
**Rule — do NOT loop retrying a class B block.** One retry with `imeFriendly: true` is the correct test for class A. If the same spam message fires after a clean `imeFriendly` refill, stop, mark Failed, move on. Repeated retries waste subagent time and do not change the outcome.
|
|
52
52
|
|
|
53
|
-
**Class B
|
|
53
|
+
**Class B response — structured block detection + manual handoff.** JobForge passes `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` so Geometra MCP >=1.62.3 keeps browser windows hidden and returns structured `blockedSite` metadata when the portal serves a challenge, CAPTCHA, access-denied page, unsupported-browser screen, or similar block. If the same spam/block message fires after a clean `imeFriendly` retry, stop, record the failed outcome, and surface the `blockedSite` / `manualHandoff` detail to the orchestrator.
|
|
54
54
|
|
|
55
55
|
**Known-block Ashby tenants (2026-04-19 empirical observations).** These tenants fired class B on every attempted submit from a headless datacenter-IP proxy. Orchestrators planning apply dispatches should assume these tenants will Fail in headless — prioritize other portals, or skip same-tenant siblings after a confirmed class B to avoid burning subagent slots:
|
|
56
56
|
|
|
@@ -60,7 +60,7 @@ These blocks come from two distinct root causes and require different responses:
|
|
|
60
60
|
|
|
61
61
|
- Supabase, LangChain, Poolside, Runway Financial, Sentry, Cognition
|
|
62
62
|
|
|
63
|
-
**Base rate for untested Ashby tenants (5/5 tested 2026-04-19 cycle 4 = class B).**
|
|
63
|
+
**Base rate for untested Ashby tenants (5/5 tested 2026-04-19 cycle 4 = class B).** Treat any tenant not on the class-A-compatible list as higher-risk for server-side submit blocks — still dispatch to collect the data point, but don't burn multiple sibling-role slots on the same Ashby tenant after one confirmed block.
|
|
64
64
|
|
|
65
65
|
The pattern is tenant configuration, not role or company size. Lists drift as tenants tune their anti-bot — treat as probabilistic priors, not hard rules.
|
|
66
66
|
|
|
@@ -72,7 +72,7 @@ The pattern is tenant configuration, not role or company size. Lists drift as te
|
|
|
72
72
|
|
|
73
73
|
**Avature multi-step wizards have a native-`<select>` validation lag (Bloomberg pattern).** Bloomberg's careers site redirects to `bloomberg.avature.net` with a 4-step wizard. On Step 2, native `<select>` elements ("Is Current Position? / No") accept the value but keep `invalid: true` persistently — neither Tab, re-submit, nor re-pick clears it. `imeFriendly` has no effect because the field is a native `<select>`, not React-controlled text. There is no documented recovery. Mark `Failed` with reason "Avature native-select validation lag"; account creation up to that point is preserved for any future manual path. Confirmed on Bloomberg Sr SWE Auth #828, 2026-04-19.
|
|
74
74
|
|
|
75
|
-
**Cloudflare / ATS-vendor blocks on Dropbox-class portals.** Dropbox's real apply flow lives behind `happydance.website` (ATS vendor), which
|
|
75
|
+
**Cloudflare / ATS-vendor blocks on Dropbox-class portals.** Dropbox's real apply flow lives behind `happydance.website` (ATS vendor), which can return "Sorry, you have been blocked" before the form renders. `job-boards.greenhouse.io/dropbox` does not mirror — there is no public Greenhouse fallback. Symptom-wise indistinguishable from Ashby class B but at a different layer. Mark `Failed` with reason "ATS vendor Cloudflare block (happydance.website or equivalent)" and preserve `blockedSite` details when present. Confirmed on Dropbox Sr FS Product #831, 2026-04-19.
|
|
76
76
|
|
|
77
77
|
**Greenhouse OTP-on-fill variant (Instacart pattern).** Most Greenhouse OTP flows fire on Submit. A minority (Instacart Staff FoodStorm #827, 2026-04-19) fire the 8-cell security-code gate mid-fill, BEFORE the user clicks Submit. Detection: watch for an 8-cell OTP input surfacing after resume upload or the first listbox commit. Fetch from Gmail (`from:greenhouse newer_than:10m`) immediately when it appears — do not wait for Submit.
|
|
78
78
|
|
|
@@ -80,7 +80,7 @@ The pattern is tenant configuration, not role or company size. Lists drift as te
|
|
|
80
80
|
|
|
81
81
|
**Breezy portal — tenant-dependent, native `<select>`, resume-auto-parse is primary.** A subset of companies (Avantos AI, Courted, Instinct Science confirmed 2026-04-19) host applications on `*.breezy.hr` or `applytojob.com`. Empirical rules:
|
|
82
82
|
|
|
83
|
-
- **Class is per-tenant, not uniform.** Avantos (Failed 2026-04-19 #854) returned Breezy's own "It looks like maybe you've already applied to this job?" banner
|
|
83
|
+
- **Class is per-tenant, not uniform.** Avantos (Failed 2026-04-19 #854) returned Breezy's own "It looks like maybe you've already applied to this job?" banner on a first submit — distinct failure mode from Ashby's "flagged as possible spam". Courted (Applied 2026-04-19 #855) went through cleanly on the same session. Don't pre-skip Breezy; the outcome is tenant-specific.
|
|
84
84
|
- **Native `<select>` elements, not React comboboxes.** `geometra_pick_listbox_option` sets the visible display but NOT the underlying form state — submit will fail with "A response is required" on every combobox. Use `geometra_select_option` with x,y + label value for every choice field on Breezy.
|
|
85
85
|
- **Resume-auto-parse carries the signal.** After resume upload, Breezy auto-parses work history and education into structured rows. Do NOT Add/Delete position rows via Geometra — row mutations reshuffle fieldIds mid-flow, sequential `fill_fields` calls land in wrong rows, and upstream pollution corrupts earlier positions. Trust the parsed resume and fill only Personal Details + salary.
|
|
86
86
|
|
|
@@ -138,12 +138,20 @@ When running multiple application forms in parallel, each `geometra_connect` MUS
|
|
|
138
138
|
|
|
139
139
|
**Correct parallel pattern:**
|
|
140
140
|
```javascript
|
|
141
|
-
geometra_connect({
|
|
141
|
+
geometra_connect({
|
|
142
|
+
pageUrl: "https://...",
|
|
143
|
+
isolated: true,
|
|
144
|
+
headless: true,
|
|
145
|
+
slowMo: 350,
|
|
146
|
+
browserMode: "stock",
|
|
147
|
+
blockDetection: true,
|
|
148
|
+
blockedSitePolicy: "manual-handoff"
|
|
149
|
+
})
|
|
142
150
|
```
|
|
143
151
|
|
|
144
152
|
**Wrong:** running `geometra_connect` without `isolated: true` when submitting multiple forms concurrently. The forms may share state and produce incorrect submissions.
|
|
145
153
|
|
|
146
|
-
**With a configured proxy,** add `proxy: { server, username?, password?, bypass? }` to the same call — see "BYO
|
|
154
|
+
**With a configured proxy,** add `proxy: { server, username?, password?, bypass? }` to the same call — see "BYO Proxy + Block Detection" below. The reusable-proxy pool is partitioned by proxy identity, so mixing direct and proxied sessions across parallel rounds is safe. Keep `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` either way so JobForge keeps browser windows hidden and surfaces structured blocked-site states.
|
|
147
155
|
|
|
148
156
|
### Session Reuse — When Subagents Cannot Reach Existing Sessions
|
|
149
157
|
|
|
@@ -187,7 +195,7 @@ Every subagent that uses Geometra must run these THREE tool calls as its FIRST t
|
|
|
187
195
|
```
|
|
188
196
|
Step 1: geometra_list_sessions()
|
|
189
197
|
Step 2: geometra_disconnect({ closeBrowser: true })
|
|
190
|
-
Step 3: geometra_connect({ pageUrl: "<the URL the orchestrator gave you>", isolated: true, headless: true, slowMo: 350,
|
|
198
|
+
Step 3: geometra_connect({ pageUrl: "<the URL the orchestrator gave you>", isolated: true, headless: true, slowMo: 350, browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff" })
|
|
191
199
|
```
|
|
192
200
|
|
|
193
201
|
**If the orchestrator says proxy is configured,** read the top-level
|
|
@@ -195,12 +203,13 @@ Step 3: geometra_connect({ pageUrl: "<the URL the orchestrator gave you>", isol
|
|
|
195
203
|
|
|
196
204
|
```
|
|
197
205
|
Step 3: geometra_connect({
|
|
198
|
-
pageUrl: "<URL>", isolated: true, headless: true, slowMo: 350,
|
|
206
|
+
pageUrl: "<URL>", isolated: true, headless: true, slowMo: 350,
|
|
207
|
+
browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff",
|
|
199
208
|
proxy: { server: "...", username: "...", password: "...", bypass: "..." }
|
|
200
209
|
})
|
|
201
210
|
```
|
|
202
211
|
|
|
203
|
-
Pass the proxy object through unchanged. Do NOT paraphrase or drop fields — `username`/`password`/`bypass` are optional, so only include what exists in `config/profile.yml`. Do not echo proxy credentials in status text. See the "BYO
|
|
212
|
+
Pass the proxy object through unchanged. Do NOT paraphrase or drop fields — `username`/`password`/`bypass` are optional, so only include what exists in `config/profile.yml`. Do not echo proxy credentials in status text. See the "BYO Proxy + Block Detection" reference section for the why.
|
|
204
213
|
|
|
205
214
|
**DO NOT** skip Step 1 or Step 2. **DO NOT** think about whether it's needed. **DO NOT** look at `geometra_list_sessions` output and reason about it — just always call `geometra_disconnect({ closeBrowser: true })` next. The disconnect is a no-op if the pool is empty, and a poison-cure if it isn't.
|
|
206
215
|
|
|
@@ -49,13 +49,13 @@ When a form says "enter the code we sent to your email", you MUST retrieve the c
|
|
|
49
49
|
|
|
50
50
|
---
|
|
51
51
|
|
|
52
|
-
## BYO
|
|
52
|
+
## BYO Proxy + Block Detection
|
|
53
53
|
|
|
54
|
-
**Problem:**
|
|
54
|
+
**Problem:** Some portals return CAPTCHA, challenge, access-denied, unsupported-browser, rate-limit, or similar blocked states before a form can be read or submitted. `imeFriendly: true` fixes React validation lag, but it cannot change a server-side portal decision.
|
|
55
55
|
|
|
56
|
-
**
|
|
56
|
+
**Default response:** JobForge keeps the browser hidden and predictable: pass `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` on Geometra connects. Geometra MCP >=1.62.3 returns structured `blockedSite` and `manualHandoff` metadata so the orchestrator can record the block, stop retries, and ask for a manual path when needed.
|
|
57
57
|
|
|
58
|
-
**Proxy is opt-in
|
|
58
|
+
**Proxy is opt-in.** JobForge does NOT bundle or resell proxy bandwidth. If the candidate has their own proxy for legitimate network routing, JobForge can pass the top-level `proxy:` object through to Geometra without printing credentials. Without a configured proxy, JobForge omits the proxy parameter.
|
|
59
59
|
|
|
60
60
|
### Where the proxy config lives
|
|
61
61
|
|
|
@@ -76,33 +76,33 @@ See `config/profile.example.yml` for the commented-out template.
|
|
|
76
76
|
**Orchestrator responsibilities:**
|
|
77
77
|
|
|
78
78
|
1. On session start, read `config/profile.yml` once. If a `proxy:` block is present, remember that a proxy is configured, but do not paste username/password values into task prompts or user-visible status.
|
|
79
|
-
2. When dispatching any subagent whose work involves a `geometra_connect` call or a Geometra auto-connect call with `pageUrl` / `url`, tell it to read `config/profile.yml` and pass the top-level `proxy:` block plus `headless: true`
|
|
80
|
-
3. When the orchestrator itself opens a Chromium session (single-application interactive flow), include the same `proxy` object from `config/profile.yml`, `headless: true`,
|
|
79
|
+
2. When dispatching any subagent whose work involves a `geometra_connect` call or a Geometra auto-connect call with `pageUrl` / `url`, tell it to read `config/profile.yml` and pass the top-level `proxy:` block plus `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` to every connect. Example dispatch prompt line: "Proxy is configured; read `config/profile.yml` and pass its top-level `proxy:` object plus `headless: true`, `browserMode: \"stock\"`, `blockDetection: true`, and `blockedSitePolicy: \"manual-handoff\"` to every Geometra connect or auto-connect call."
|
|
80
|
+
3. When the orchestrator itself opens a Chromium session (single-application interactive flow), include the same `proxy` object from `config/profile.yml`, `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` in its own `geometra_connect` call.
|
|
81
81
|
4. If `proxy:` is absent from `profile.yml`, skip the param entirely. Do NOT invent a proxy URL or leave a stale placeholder.
|
|
82
82
|
|
|
83
83
|
**Subagent responsibilities:**
|
|
84
84
|
|
|
85
|
-
1. If the task prompt says proxy is configured, read `config/profile.yml` and pass the top-level `proxy:` object plus `headless: true`
|
|
86
|
-
2. If the task prompt includes a legacy inline `proxy` object, pass it through unchanged and still set
|
|
87
|
-
3. If the task prompt does NOT mention a proxy and `config/profile.yml` has no `proxy:` block, run with `headless: true`, `
|
|
85
|
+
1. If the task prompt says proxy is configured, read `config/profile.yml` and pass the top-level `proxy:` object plus `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"` through to `geometra_connect` and any Geometra auto-connect call with `pageUrl` / `url`. For `geometra_prepare_browser`, pass only the supported launch fields: `proxy`, `headless: true`, and `browserMode: "stock"`.
|
|
86
|
+
2. If the task prompt includes a legacy inline `proxy` object, pass it through unchanged and still set the same headless/browser/block-detection options, but never print the credentials back in status text.
|
|
87
|
+
3. If the task prompt does NOT mention a proxy and `config/profile.yml` has no `proxy:` block, run with `headless: true`, `browserMode: "stock"`, `blockDetection: true`, `blockedSitePolicy: "manual-handoff"`, and no proxy.
|
|
88
88
|
4. Never second-guess the proxy field — if it comes from `profile.yml`, it's authoritative.
|
|
89
89
|
|
|
90
|
-
### When
|
|
90
|
+
### When blocked-site metadata is load-bearing
|
|
91
91
|
|
|
92
|
-
Apply these rules when deciding whether
|
|
92
|
+
Apply these rules when deciding whether to stop automation and hand off:
|
|
93
93
|
|
|
94
|
-
- **
|
|
95
|
-
- **
|
|
96
|
-
- **
|
|
97
|
-
- **
|
|
94
|
+
- **Stop immediately** when Geometra returns `blockedSite.detected: true` with `blockedSitePolicy: "manual-handoff"` and the page is a CAPTCHA, Cloudflare challenge, access-denied page, unsupported-browser page, or rate-limit notice.
|
|
95
|
+
- **Retry once only** for Ashby text-field rejection when `invalidCount` suggested React validation lag; the retry must use `imeFriendly: true`. If the same spam/block message repeats after clean fills, stop.
|
|
96
|
+
- **Do not spend time on Geometra-unsupported portals** such as Typeform or known native-select validation dead ends such as Avature. Mark Failed with the specific reason.
|
|
97
|
+
- **Use a configured proxy only when it is already present in `profile.yml`.** Never invent a proxy, ask subagents to paste credentials, or print the configured values.
|
|
98
98
|
|
|
99
99
|
### Pool partitioning — why mixed runs are safe
|
|
100
100
|
|
|
101
|
-
The Geometra MCP partitions its reusable-proxy pool by proxy identity and browser
|
|
101
|
+
The Geometra MCP partitions its reusable-proxy pool by proxy identity and browser mode. A direct session and a proxied session NEVER share a Chromium instance, and stock and explicitly requested alternate browser modes do not pool together. Practical consequence: flipping `proxy:` on or off in `profile.yml` mid-session is safe — the next `geometra_connect` just opens a fresh Chromium in its own pool partition.
|
|
102
102
|
|
|
103
103
|
### Direct helper for one-shot reads
|
|
104
104
|
|
|
105
|
-
Use `npx job-forge portal:snapshot --url "{url}" --json` or `npx job-forge portal:form-schema --url "{url}" --json` when you only need a rendered page model, compact snapshot, or
|
|
105
|
+
Use `npx job-forge portal:snapshot --url "{url}" --json` or `npx job-forge portal:form-schema --url "{url}" --json` when you only need a rendered page model, compact snapshot, form schema, or `blockedSite` metadata from one URL. These commands import Geometra's session module directly instead of going through MCP, enforce `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `isolated: true`, pass the `config/profile.yml` proxy block if configured, and close Chromium before exit. Keep MCP for interactive multi-step browser automation where a live `sessionId` must be driven across actions.
|
|
106
106
|
|
|
107
107
|
### Troubleshooting
|
|
108
108
|
|
|
@@ -110,8 +110,9 @@ Use `npx job-forge portal:snapshot --url "{url}" --json` or `npx job-forge porta
|
|
|
110
110
|
|---|---|
|
|
111
111
|
| `Error: Failed to connect to proxy` immediately after `geometra_connect` | Proxy URL is wrong / unreachable. Verify the `server:` field hits the right host:port. |
|
|
112
112
|
| `407 Proxy Authentication Required` | `username` or `password` is wrong or missing. Many residential providers require both. |
|
|
113
|
-
|
|
|
114
|
-
|
|
|
113
|
+
| `blockedSite.detected: true` on connect or page model | Stop automation for that URL, preserve the `blockedSite` payload, and route to manual handoff or mark Failed with the specific block type. |
|
|
114
|
+
| Proxy is configured but pages fail immediately | Proxy URL/auth may be wrong, or the target site may reject the route. Verify the `server:` field locally; do not paste credentials into prompts. |
|
|
115
|
+
| Every `geometra_connect` is 3-5s slower than before | Expected when a configured proxy adds network latency. Remove or adjust `proxy:` in `profile.yml` only if the candidate no longer wants that routing. |
|
|
115
116
|
|
|
116
117
|
---
|
|
117
118
|
|
package/modes/scan.md
CHANGED
|
@@ -25,7 +25,7 @@ Read `portals.yml` which contains:
|
|
|
25
25
|
|
|
26
26
|
### Use Level 1 — Direct Geometra (PRIMARY)
|
|
27
27
|
|
|
28
|
-
**For each company in `tracked_companies`:** Connect to its `careers_url` with Geometra MCP (`geometra_connect({ ..., headless: true,
|
|
28
|
+
**For each company in `tracked_companies`:** Connect to its `careers_url` with Geometra MCP (`geometra_connect({ ..., headless: true, browserMode: "stock", blockDetection: true, blockedSitePolicy: "manual-handoff" })` + `geometra_page_model` / `geometra_list_items`), read ALL visible job listings, and extract the title + URL of each one. Direct Geometra is the most reliable method because:
|
|
29
29
|
|
|
30
30
|
- It sees the page in real time (not cached Google results).
|
|
31
31
|
- It works with SPAs (Ashby, Lever, Workday).
|
|
@@ -138,7 +138,7 @@ The levels are additive — all are executed, results are merged and deduplicate
|
|
|
138
138
|
|
|
139
139
|
4. **Level 1 — Geometra scan** (sequential, or ≤2 parallel via `task` subagents per Hard Limit #1 in `AGENTS.md`):
|
|
140
140
|
For each company in `tracked_companies` with `enabled: true` and `careers_url` defined:
|
|
141
|
-
a. `geometra_connect` to the `careers_url` with `headless: true`
|
|
141
|
+
a. `geometra_connect` to the `careers_url` with `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"`
|
|
142
142
|
b. `geometra_page_model` or `geometra_list_items` to read all job listings
|
|
143
143
|
c. If the page has filters/departments, navigate the relevant sections
|
|
144
144
|
d. For each job listing extract: `{title, url, company}`
|
|
@@ -317,7 +317,7 @@ Each company in `tracked_companies` MUST have a `careers_url` — the direct URL
|
|
|
317
317
|
**If `careers_url` doesn't exist** for a company:
|
|
318
318
|
1. Try the pattern for its known platform
|
|
319
319
|
2. If that fails, do a quick WebSearch: `"{company}" careers jobs`
|
|
320
|
-
3. Navigate with Geometra (`geometra_connect` with `headless: true`
|
|
320
|
+
3. Navigate with Geometra (`geometra_connect` with `headless: true`, `browserMode: "stock"`, `blockDetection: true`, and `blockedSitePolicy: "manual-handoff"`) to confirm it works
|
|
321
321
|
4. **Save the found URL in portals.yml** for future scans
|
|
322
322
|
|
|
323
323
|
**If `careers_url` returns 404 or redirect:**
|
package/opencode.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "job-forge",
|
|
3
|
-
"version": "2.14.
|
|
3
|
+
"version": "2.14.46",
|
|
4
4
|
"description": "AI-powered job search pipeline built on opencode",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -199,7 +199,7 @@
|
|
|
199
199
|
"@agent-pattern-labs/iso-score": "^0.1.1",
|
|
200
200
|
"@agent-pattern-labs/iso-timeline": "^0.1.1",
|
|
201
201
|
"@agent-pattern-labs/iso-trace": "^0.5.1",
|
|
202
|
-
"@geometra/mcp": "1.62.
|
|
202
|
+
"@geometra/mcp": "1.62.3",
|
|
203
203
|
"playwright": "^1.58.1"
|
|
204
204
|
},
|
|
205
205
|
"devDependencies": {
|
|
@@ -21,14 +21,14 @@ const checks = [
|
|
|
21
21
|
["H5 blocks same-company concurrent retry", () => every(files.instructions, ["Re-dispatch the same company only AFTER", "previous subagent returns"])],
|
|
22
22
|
["H6 requires merge and verify", () => every(files.instructions, ["batch/tracker-additions/*.tsv", "npx job-forge merge", "npx job-forge verify"])],
|
|
23
23
|
["H7 distrusts subagent prose", () => every(files.instructions, ["must originate from a file", "not from prior subagent prose"])],
|
|
24
|
-
["H8 keeps proxy secret and requires headless
|
|
24
|
+
["H8 keeps proxy secret and requires stock headless block detection", () => every(files.instructions, ["[H8]", "Do not transcribe `server`, `username`, `password`, or `bypass`", "`headless: true`", "`browserMode: \\\"stock\\\"`", "`blockDetection: true`", "`blockedSitePolicy: \\\"manual-handoff\\\"`"])],
|
|
25
25
|
["H9 points to Geometra MCP lifecycle logs", () => every(files.instructions, ["[H9]", ".jobforge-mcp/geometra-mcp.jsonl", "launcher_start", "heartbeat", "child_exit"])],
|
|
26
26
|
["OpenCode addendum exists for task semantics", () => every(files.instructionsOpencode, ["OpenCode", "`task`", "launch acknowledgement", "Do not use `task` to poll status"])],
|
|
27
27
|
["root points to consolidated helper reference", () => every(files.instructions, ["[D8]", "modes/reference-local-helpers.md", "deterministic local helpers"])],
|
|
28
28
|
["helper reference covers score/timeline/prioritize/lineage", () => every(files.helpers, ["templates/score.json", "npx job-forge score:*", "templates/timeline.json", "npx job-forge timeline:*", "templates/prioritize.json", "npx job-forge prioritize:*", ".jobforge-lineage.json", "npx job-forge lineage:*"])],
|
|
29
29
|
["root helper defaults are consolidated", () => !/\[D(?:9|1\d|2[0-9])\]/.test(files.instructions)],
|
|
30
30
|
["shared prompt points to on-demand references", () => every(files.instructions, ["modes/{mode}.md", "modes/reference-setup.md", "modes/reference-portals.md", "modes/reference-geometra.md"])],
|
|
31
|
-
["apply mode requires headless
|
|
31
|
+
["apply mode requires stock headless Geometra with block detection", () => every(files.apply, ["`headless: true`", "`browserMode: \"stock\"`", "`isolated: true`", "`blockDetection: true`", "`blockedSitePolicy: \"manual-handoff\"`", "every Geometra auto-connect call"])],
|
|
32
32
|
["apply mode uses host-safe run_actions continuations", () => every(files.apply, ["softTimeoutMs: 45000", "resumeFromIndex", "pauseReason: \"soft-timeout\"", "This is a continuation, not a retry"])],
|
|
33
33
|
["apply mode owns high-stakes upgrade", () => every(files.apply, ["[D8]", "@general-paid", "4.0/5", "high-stakes"])],
|
|
34
34
|
["apply mode blocks provider auto-downgrade", () => every(files.apply, ["[D9]", "do not auto-downgrade", "inspect telemetry before retrying"])],
|
package/scripts/portal.mjs
CHANGED
|
@@ -21,7 +21,8 @@ Usage:
|
|
|
21
21
|
Defaults are enforced in code for every browser launch:
|
|
22
22
|
isolated: true
|
|
23
23
|
headless: true
|
|
24
|
-
|
|
24
|
+
browserMode: stock
|
|
25
|
+
blockDetection: true
|
|
25
26
|
slowMo: 350
|
|
26
27
|
|
|
27
28
|
The helper imports Geometra's session module directly. It does not call the
|
|
@@ -66,6 +67,7 @@ async function snapshot(opts) {
|
|
|
66
67
|
const pageModel = geometra.buildPageModel(root, {
|
|
67
68
|
maxPrimaryActions: opts.maxPrimaryActions,
|
|
68
69
|
maxSectionsPerKind: opts.maxSectionsPerKind,
|
|
70
|
+
blockDetection: true,
|
|
69
71
|
});
|
|
70
72
|
const compact = geometra.buildCompactUiIndex(root, {
|
|
71
73
|
maxNodes: opts.maxNodes,
|
|
@@ -76,6 +78,7 @@ async function snapshot(opts) {
|
|
|
76
78
|
url: opts.url,
|
|
77
79
|
session: connectionSummary(session, proxy),
|
|
78
80
|
defaults: launchDefaults(opts, proxy),
|
|
81
|
+
blockedSite: pageModel.blockedSite ?? { detected: false },
|
|
79
82
|
pageModel,
|
|
80
83
|
compact,
|
|
81
84
|
...(opts.forms ? { forms: geometra.buildFormSchemas(root, formOptions(opts)) } : {}),
|
|
@@ -83,8 +86,9 @@ async function snapshot(opts) {
|
|
|
83
86
|
output(result, opts, () => {
|
|
84
87
|
console.log(`url: ${opts.url}`);
|
|
85
88
|
console.log(`session: ${session.id}`);
|
|
86
|
-
console.log(`defaults:
|
|
89
|
+
console.log(`defaults: ${formatDefaults(opts, proxy)}`);
|
|
87
90
|
if (proxy) console.log(`proxy: ${redactProxy(proxy)}`);
|
|
91
|
+
printBlockedSite(pageModel);
|
|
88
92
|
console.log(geometra.summarizePageModel(pageModel, 12));
|
|
89
93
|
console.log(geometra.summarizeCompactIndex(compact.nodes, 24));
|
|
90
94
|
if (opts.forms) {
|
|
@@ -104,18 +108,25 @@ async function formSchema(opts) {
|
|
|
104
108
|
|
|
105
109
|
try {
|
|
106
110
|
const root = buildRoot(geometra, session);
|
|
111
|
+
const pageModel = geometra.buildPageModel(root, {
|
|
112
|
+
maxPrimaryActions: opts.maxPrimaryActions,
|
|
113
|
+
maxSectionsPerKind: opts.maxSectionsPerKind,
|
|
114
|
+
blockDetection: true,
|
|
115
|
+
});
|
|
107
116
|
const forms = geometra.buildFormSchemas(root, formOptions(opts));
|
|
108
117
|
const result = {
|
|
109
118
|
url: opts.url,
|
|
110
119
|
session: connectionSummary(session, proxy),
|
|
111
120
|
defaults: launchDefaults(opts, proxy),
|
|
121
|
+
blockedSite: pageModel.blockedSite ?? { detected: false },
|
|
112
122
|
forms,
|
|
113
123
|
};
|
|
114
124
|
output(result, opts, () => {
|
|
115
125
|
console.log(`url: ${opts.url}`);
|
|
116
126
|
console.log(`session: ${session.id}`);
|
|
117
|
-
console.log(`defaults:
|
|
127
|
+
console.log(`defaults: ${formatDefaults(opts, proxy)}`);
|
|
118
128
|
if (proxy) console.log(`proxy: ${redactProxy(proxy)}`);
|
|
129
|
+
printBlockedSite(pageModel);
|
|
119
130
|
for (const form of forms) {
|
|
120
131
|
const name = form.name ? ` "${form.name}"` : '';
|
|
121
132
|
console.log(`${form.formId}${name}: ${form.fieldCount} fields, ${form.requiredCount} required, ${form.invalidCount} invalid`);
|
|
@@ -144,7 +155,7 @@ async function explain(opts) {
|
|
|
144
155
|
output(result, opts, () => {
|
|
145
156
|
console.log(`project: ${PROJECT_DIR}`);
|
|
146
157
|
console.log(`module: ${moduleTarget.source} ${moduleTarget.path}`);
|
|
147
|
-
console.log(`defaults:
|
|
158
|
+
console.log(`defaults: ${formatDefaults(opts, proxy)}`);
|
|
148
159
|
console.log(`profile proxy: ${proxy ? redactProxy(proxy) : 'none'}`);
|
|
149
160
|
});
|
|
150
161
|
}
|
|
@@ -154,7 +165,7 @@ async function connect(geometra, opts, proxy) {
|
|
|
154
165
|
pageUrl: opts.url,
|
|
155
166
|
isolated: true,
|
|
156
167
|
headless: true,
|
|
157
|
-
stealth:
|
|
168
|
+
stealth: false,
|
|
158
169
|
slowMo: opts.slowMo,
|
|
159
170
|
width: opts.width,
|
|
160
171
|
height: opts.height,
|
|
@@ -318,7 +329,8 @@ function launchDefaults(opts, proxy) {
|
|
|
318
329
|
return {
|
|
319
330
|
isolated: true,
|
|
320
331
|
headless: true,
|
|
321
|
-
|
|
332
|
+
browserMode: 'stock',
|
|
333
|
+
blockDetection: true,
|
|
322
334
|
slowMo: opts.slowMo,
|
|
323
335
|
width: opts.width,
|
|
324
336
|
height: opts.height,
|
|
@@ -326,6 +338,24 @@ function launchDefaults(opts, proxy) {
|
|
|
326
338
|
};
|
|
327
339
|
}
|
|
328
340
|
|
|
341
|
+
function formatDefaults(opts, proxy) {
|
|
342
|
+
const defaults = launchDefaults(opts, proxy);
|
|
343
|
+
return [
|
|
344
|
+
`isolated=${defaults.isolated}`,
|
|
345
|
+
`headless=${defaults.headless}`,
|
|
346
|
+
`browserMode=${defaults.browserMode}`,
|
|
347
|
+
`blockDetection=${defaults.blockDetection}`,
|
|
348
|
+
`slowMo=${defaults.slowMo}`,
|
|
349
|
+
].join(' ');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function printBlockedSite(pageModel) {
|
|
353
|
+
if (!pageModel.blockedSite?.detected) return;
|
|
354
|
+
const type = pageModel.blockedSite.type ?? 'unknown';
|
|
355
|
+
const hint = pageModel.blockedSite.hint ? ` - ${pageModel.blockedSite.hint}` : '';
|
|
356
|
+
console.log(`blocked: ${type}${hint}`);
|
|
357
|
+
}
|
|
358
|
+
|
|
329
359
|
function connectionSummary(session, proxy) {
|
|
330
360
|
return {
|
|
331
361
|
id: session.id,
|