npm - job-forge - Versions diffs - 2.14.11 → 2.14.13 - Mend

job-forge 2.14.11 → 2.14.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/.claude/agents/general-free.md +2 -2
package/.claude/agents/general-paid.md +5 -5
package/.claude/agents/glm-minimal.md +1 -1
package/.cursor/rules/agent-general-free.mdc +2 -2
package/.cursor/rules/agent-general-paid.mdc +5 -5
package/.cursor/rules/agent-glm-minimal.mdc +1 -1
package/.cursor/rules/main.mdc +3 -3
package/.opencode/agents/general-free.md +3 -8
package/.opencode/agents/general-paid.md +6 -13
package/.opencode/agents/glm-minimal.md +2 -7
package/.opencode/skills/job-forge.md +8 -3
package/AGENTS.md +3 -3
package/CLAUDE.md +3 -3
package/README.md +2 -2
package/bin/create-job-forge.mjs +10 -48
package/bin/sync.mjs +0 -1
package/docs/ARCHITECTURE.md +2 -2
package/docs/MODEL-ROUTING.md +43 -76
package/docs/README.md +1 -1
package/iso/agents/general-free.md +5 -13
package/iso/agents/general-paid.md +8 -20
package/iso/agents/glm-minimal.md +4 -11
package/iso/commands/job-forge.md +8 -3
package/iso/config.json +1 -42
package/iso/instructions.md +3 -3
package/models.yaml +25 -11
package/modes/apply.md +13 -10
package/opencode.json +5 -26
package/package.json +2 -3
package/scripts/check-iso-smoke.mjs +7 -3
package/scripts/telemetry.mjs +256 -20
package/.opencode/opencode-model-fallback.json +0 -22

package/iso/agents/general-free.md CHANGED Viewed

@@ -1,24 +1,16 @@
 ---
-description: Procedural worker on free-tier model. Use for form filling via Geometra, tracker updates, TSV merges, scan dedup, OTP retrieval, and other mechanical/scripted tasks where quality-sensitive text generation is NOT required.
+description: Procedural worker on the low-cost DeepSeek V4 Flash OpenCode route. Use for form filling via Geometra, tracker updates, TSV merges, scan dedup, OTP retrieval, and other mechanical/scripted tasks where quality-sensitive text generation is NOT required.
 role: fast
 targets:
-  # No inline model: iso-route's "standard" preset maps role "fast" to
-  # each harness's cheapest reliable model. Claude Code reads
+  # No inline model: JobForge's models.yaml maps role "fast" to each
+  # harness's cheapest reliable model. On OpenCode that is pinned to
+  # opencode-go/deepseek-v4-flash. Claude Code reads
   # .claude/iso-route.resolved.json; OpenCode reads opencode.json's
   # agent.fast.model (iso-harness 0.6.0+).
   opencode:
     mode: subagent
     temperature: 0.1
     reasoningEffort: minimal
-    # Primary comes from models.yaml: opencode/big-pickle on OpenCode.
-    # Fallback chain stays free-only and intentionally excludes
-    # openrouter/minimax/minimax-m2.5:free because recent traces showed
-    # repeated read({ path|file_path }) schema drift on that route.
-    fallback_models:
-      - openrouter/z-ai/glm-4.5-air:free
-      - openrouter/openai/gpt-oss-20b:free
-      - openrouter/nvidia/nemotron-3-nano-30b-a3b:free
-      - openrouter/qwen/qwen3-coder:free
     tools:
       geometra_connect: true
       geometra_page_model: true
@@ -34,7 +26,7 @@ targets:
       task: false
 ---
-You are the @general-free subagent. You run on a free-tier model, which means the orchestrator has delegated this task to you **specifically because the work is procedural**: deterministic steps, scripted outputs, no nuanced writing required.
+You are the @general-free subagent. You run on JobForge's low-cost procedural model, which means the orchestrator has delegated this task to you **specifically because the work is procedural**: deterministic steps, scripted outputs, no nuanced writing required.
 ## Run This Pre-Flight First Every Time

package/iso/agents/general-paid.md CHANGED Viewed

@@ -1,28 +1,16 @@
 ---
-description: Quality-sensitive worker on the strongest free-tier OpenCode model by default. Use for offer evaluation narratives (Blocks A-F), cover letter generation, "Why X?" form answers, interview STAR stories, and other tasks where writing quality and judgment matter.
+description: Quality-sensitive worker on the low-cost DeepSeek V4 Flash OpenCode route by default. Use for offer evaluation narratives (Blocks A-F), cover letter generation, "Why X?" form answers, interview STAR stories, and other tasks where writing quality and judgment matter.
 role: quality
 targets:
-  # No inline model: JobForge's models.yaml maps role "quality" to a
-  # free OpenRouter model on OpenCode, while Claude/Codex keep their
-  # quality-tier defaults from the standard preset. Claude Code reads
+  # No inline model: JobForge's models.yaml maps role "quality" to
+  # opencode-go/deepseek-v4-flash on OpenCode, while Claude/Codex keep
+  # their quality-tier defaults from the standard preset. Claude Code reads
   # .claude/iso-route.resolved.json; OpenCode reads opencode.json's
   # agent.quality.model (iso-harness 0.6.0+).
   opencode:
     mode: subagent
     temperature: 0.3
     reasoningEffort: medium
-    # Primary (qwen/qwen3-next-80b-a3b-instruct:free) resolves from the
-    # openrouter-free preset. First fallbacks intentionally avoid another
-    # immediate hop through the same Venice/Qwen pool when OpenRouter
-    # returns "[Venice] insufficient …" — gpt-oss-120b + nemotron are
-    # usually different backends. Remaining picks stay free-only.
-    fallback_models:
-      - openrouter/openai/gpt-oss-120b:free
-      - openrouter/nvidia/nemotron-3-super-120b-a12b:free
-      - openrouter/z-ai/glm-4.5-air:free
-      - openrouter/qwen/qwen3-coder:free
-      - openrouter/google/gemma-4-31b-it:free
-      - openrouter/meta-llama/llama-3.3-70b-instruct:free
     tools:
       geometra_connect: true
       geometra_page_model: true
@@ -40,10 +28,10 @@ targets:
 You are the @general-paid subagent. The orchestrator delegated this task to you because it requires quality writing or judgment — the kind of work `@general-free` isn't well-suited for.
-On OpenCode, this agent now defaults to a free OpenRouter model. On other
-harnesses, the same role may still resolve to a premium model. Your job is
-still the same: produce the best final writing you can from the context you
-were given.
+On OpenCode, this agent defaults to DeepSeek V4 Flash so application work
+does not fall back into overloaded free OpenRouter pools. On other harnesses,
+the same role may still resolve to a premium model. Your job is still the
+same: produce the best final writing you can from the context you were given.
 ## Do These Tasks

package/iso/agents/glm-minimal.md CHANGED Viewed

@@ -1,23 +1,16 @@
 ---
-description: Narrow-scope extractor on free-tier model. Use for single-purpose tasks where the orchestrator passes the exact input and expects a small, structured output — e.g., "extract these 8 fields from this JD text" or "parse this form schema into a label→type map". NOT for multi-step workflows.
+description: Narrow-scope extractor on the low-cost DeepSeek V4 Flash OpenCode route. Use for single-purpose tasks where the orchestrator passes the exact input and expects a small, structured output — e.g., "extract these 8 fields from this JD text" or "parse this form schema into a label→type map". NOT for multi-step workflows.
 role: minimal
 targets:
-  # No inline model: iso-route's "standard" preset maps role "minimal" to
-  # each harness's smallest credible model. Claude Code reads
+  # No inline model: JobForge's models.yaml maps role "minimal" to each
+  # harness's smallest credible model. On OpenCode that is pinned to
+  # opencode-go/deepseek-v4-flash. Claude Code reads
   # .claude/iso-route.resolved.json; OpenCode reads opencode.json's
   # agent.minimal.model (iso-harness 0.6.0+).
   opencode:
     mode: subagent
     temperature: 0
     reasoningEffort: none
-    # Primary (openai/gpt-oss-20b:free) resolves from openrouter-free
-    # preset. Fallback chain sticks to small dense models with reliable
-    # structured-output behavior — no creative generation upstream.
-    fallback_models:
-      - openrouter/google/gemma-4-26b-a4b-it:free
-      - openrouter/nvidia/nemotron-nano-9b-v2:free
-      - openrouter/google/gemma-4-31b-it:free
-      - openrouter/z-ai/glm-4.5-air:free
     tools:
       geometra_*: false
       gmail_*: false

package/iso/commands/job-forge.md CHANGED Viewed

@@ -140,13 +140,18 @@ When the user says "apply to N jobs", "process the pipeline", or similar, execut
 ```
 Step 1  — Enumerate candidates
-  - Grep data/applications/$(date +%Y-%m-%d).md and the last 3 day files for status "Evaluated"
+  - Grep data/applications/*.md for status "Evaluated" without loading every file into context
   - Also read data/pipeline.md for unprocessed URLs
   - Build ordered list: candidates = [job_1, job_2, ..., job_N]
 Step 2  — Dedup against already-applied
-  - For each candidate, Grep data/pipeline.md + today's day file for "APPLIED" + company+role
-  - Drop any match. Never re-apply.
+  - For each candidate, grep all four sources for URL and company+role:
+    data/pipeline.md, data/applications/*.md, batch/tracker-additions/*.tsv,
+    batch/tracker-additions/merged/*.tsv
+  - Drop any APPLIED / Applied match before counting toward N. Never re-apply.
+  - If a subagent later returns SKIP because it found a duplicate, treat that as
+    a missed preflight check; finish the current round, re-run dedupe, then pick
+    a replacement from the remaining candidates.
 Step 3  — Pre-flight cleanup (once, before the loop)
   - geometra_list_sessions()

package/iso/config.json CHANGED Viewed

@@ -1,49 +1,8 @@
 {
-  "opencodeModelFallback": {
-    "cooldown_seconds": 60,
-    "timeout_seconds": 30,
-    "notify_on_fallback": true,
-    "fallback_models": [
-      "openrouter/openai/gpt-oss-120b:free",
-      "openrouter/z-ai/glm-4.5-air:free",
-      "openrouter/nvidia/nemotron-3-super-120b-a12b:free",
-      "openrouter/qwen/qwen3-next-80b-a3b-instruct:free"
-    ],
-    "retryable_error_patterns": [
-      "\\bvenice\\b",
-      "insufficient\\s+usd",
-      "insufficient\\s+.*\\s+diem",
-      "diem\\s+balance",
-      "add\\s+credits",
-      "chutes",
-      "insufficient\\s+(?:credits?|funds?|balance)",
-      "credit.*balance.*too.*low",
-      "(?:temporarily\\s+)?unavailable|overloaded|try\\s+again"
-    ]
-  },
   "targets": {
     "opencode": {
-      "plugin": ["@razroo/opencode-model-fallback"],
       "instructions": ["templates/states.yml"],
-      "small_model": "openrouter/google/gemma-4-26b-a4b-it:free",
-      "provider": {
-        "openrouter": {
-          "models": {
-            "qwen/qwen3-coder:free": {},
-            "z-ai/glm-4.5-air:free": {},
-            "qwen/qwen3-next-80b-a3b-instruct:free": {},
-            "openai/gpt-oss-20b:free": {},
-            "openai/gpt-oss-120b:free": {},
-            "minimax/minimax-m2.5:free": {},
-            "nvidia/nemotron-3-super-120b-a12b:free": {},
-            "nvidia/nemotron-3-nano-30b-a3b:free": {},
-            "nvidia/nemotron-nano-9b-v2:free": {},
-            "google/gemma-4-26b-a4b-it:free": {},
-            "google/gemma-4-31b-it:free": {},
-            "meta-llama/llama-3.3-70b-instruct:free": {}
-          }
-        }
-      }
+      "small_model": "opencode-go/deepseek-v4-flash"
     }
   }
 }

package/iso/instructions.md CHANGED Viewed

@@ -5,9 +5,9 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
 ## Hard limits
 - [H1] Max 2 parallel `task` dispatches per message. For N jobs, run `ceil(N/2)` sequential rounds of 2. A round is not complete until both subagents return a final outcome (`APPLIED`, `APPLY FAILED`, `SKIP`, `Discarded`, or a written TSV path). A `task` tool result that only gives a session id / title is a launch acknowledgement, not completion. Applies in all modes, for all user phrasings ("urgent", "apply to 10 jobs now").
-  why: higher parallelism blows through free-tier rate limits; each subagent requires post-cleanup and racing more than 2 reliably loses at least one result. On 2026-04-25 the orchestrator launched round 2 while round 1 had only returned task ids, leaving four application subagents in flight and losing two provider-fallback recoveries
+  why: each subagent requires post-cleanup and racing more than 2 reliably loses at least one result. On 2026-04-25 the orchestrator launched round 2 while round 1 had only returned task ids, leaving four application subagents in flight and losing two provider recoveries
-- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch.
+- [H2] Max 1 application per company+role. Before every `apply` dispatch, grep all four sources for the URL and for `company+role`: `data/pipeline.md`, all `data/applications/*.md` day files, `batch/tracker-additions/*.tsv`, `batch/tracker-additions/merged/*.tsv`. If any source shows APPLIED / Applied, skip the dispatch and pick a replacement from the remaining candidate list. Do not count duplicates toward a requested "apply to N jobs" total, and do not delegate obvious duplicates just so a subagent can return SKIP.
   why: 2026-04 same-day batch collision — when two batches target the same role, `npx job-forge merge` updates the existing day-file row rather than appending, so grepping day files alone misses earlier-batch applies; merged/*.tsv is the only place the breadcrumb remains
 - [H3] Before every batch of `task` dispatches that will use Geometra, call `geometra_list_sessions` then `geometra_disconnect({closeBrowser: true})`. Every round, no exceptions. Name this cleanup as an explicit "step 0" in your first-response plan for any multi-apply request — it is the most frequently skipped guardrail in practice, and skipping it produces cascade "Not connected" failures on the next dispatch.
@@ -37,7 +37,7 @@ AI-powered job search pipeline: scans portals, evaluates offers, generates CVs v
   why: iso-trace showed 0.25% Agent calls across 5174 turns under a prior over-broad "delegate before 2nd tool call" rule — the rule was ignored in practice; narrowing matches the original cache-bust incident
 - [D2] Route subagent work by cost tier. `@general-free`: procedural — form-fill, TSV merge, verify, OTP retrieval, portal scan metadata extraction, one-shot structured-field transforms. `@general-paid`: quality-sensitive — offer evaluation narrative Blocks A-F, cover letters, "Why X?" answers, STAR interview stories, LinkedIn outreach. `@glm-minimal`: narrow ≤5K-input one-shot extract/classify jobs that do not need context.
-  why: GLM 5.1 doesn't discount cache reads so procedural work there costs ~10×; free-tier models handle procedural work fine empirically (`opencode/big-pickle` processed 1000+ messages at $0)
+  why: OpenCode routes all JobForge tiers through DeepSeek V4 Flash by default now; recent traces showed free OpenRouter fallbacks freezing or hitting provider balance errors during applications
 - [D3] Read the active mode file before dispatch. Mode files own score gates, provider fallback, portal runbooks, and output shape.
   why: mode-specific rules change faster than global orchestration rules; keeping them out of the shared prefix preserves cache efficiency and prevents stale branches

package/models.yaml CHANGED Viewed

@@ -1,17 +1,15 @@
 # JobForge model policy.
 #
-# Extends @razroo/iso-route's bundled "openrouter-free" preset — a
-# curated zero-cost routing config that keeps Claude Code / Codex on
-# their native providers but routes OpenCode through explicit free
-# OpenRouter model IDs. Preset content lives in
-# node_modules/@razroo/iso-route/presets/openrouter-free.yaml; run
-# `npx iso-route plan models.yaml` to see the resolved policy.
+# Extends @razroo/iso-route's bundled "standard" preset, then pins every
+# OpenCode route to DeepSeek V4 Flash. Recent traces showed free OpenRouter
+# routes freezing or falling through Venice balance errors, so JobForge's
+# OpenCode default is now "best affordable paid" rather than "free".
 #
 # JobForge's subagents bind to preset roles via the `role:` field in
 # iso/agents/<slug>.md:
-#   @general-free  → role: fast     (Haiku / OpenCode big-pickle / gpt-5.4-mini)
-#   @general-paid  → role: quality  (Opus 4.7 / OpenRouter Qwen3 Next 80B free / gpt-5.4)
-#   @glm-minimal   → role: minimal  (Haiku / OpenRouter GPT-OSS-20B free / gpt-5.4-nano)
+#   @general-free  → role: fast     (Haiku / DeepSeek V4 Flash / gpt-5.4-mini)
+#   @general-paid  → role: quality  (Opus 4.7 / DeepSeek V4 Flash / gpt-5.4)
+#   @glm-minimal   → role: minimal  (Haiku / DeepSeek V4 Flash / gpt-5.4-nano)
 #
 # Override anything by adding fields here. For example, to pin Opus on
 # Claude Code for the @general-paid (quality) role:
@@ -29,11 +27,27 @@
 #           provider: openai
 #           model: gpt-5.4
-extends: openrouter-free
+extends: standard
+default:
+  targets:
+    opencode:
+      provider: opencode
+      model: opencode-go/deepseek-v4-flash
 roles:
   fast:
     targets:
       opencode:
         provider: opencode
-        model: opencode/big-pickle
+        model: opencode-go/deepseek-v4-flash
+  quality:
+    targets:
+      opencode:
+        provider: opencode
+        model: opencode-go/deepseek-v4-flash
+  minimal:
+    targets:
+      opencode:
+        provider: opencode
+        model: opencode-go/deepseek-v4-flash

package/modes/apply.md CHANGED Viewed

@@ -17,7 +17,7 @@ Live application assistant. Reads the active application form in Chrome (via Geo
   why: prior aborted subagents leave Chromium sessions stuck in the pool; next `geometra_connect` fails with "Not connected" (see root `[H3]`)
 - [H5] Max 2 parallel `task` dispatches per round. For N jobs, run `ceil(N/2)` sequential rounds of 2. Never emit 3+ dispatches in a single message. Do not start the next round until both current-round subagents return final outcomes (`APPLIED`, `APPLY FAILED`, `SKIP`, `Discarded`, or a written TSV path); task/session ids are only launch receipts.
-  why: free-tier rate limits + subagent post-cleanup cost; racing more than 2 reliably loses at least one result (see root `[H1]`). A 2026-04-25 OpenCode trace launched round 2 while round 1 was still running, then lost two fallback recoveries
+  why: subagent post-cleanup cost and portal state make racing more than 2 unreliable (see root `[H1]`). A 2026-04-25 OpenCode trace launched round 2 while round 1 was still running, then lost two provider recoveries
 ## Defaults
@@ -46,10 +46,10 @@ Live application assistant. Reads the active application form in Chrome (via Geo
   why: class-B Ashby / Cloudflare-fronted portals need a residential outbound IP; the fix is wired in Geometra MCP v1.59.0 but the orchestrator owns the config pipe. See "BYO Residential Proxy" in modes/reference-portals.md.
 - [D8] Upgrade application routing to `@general-paid` when the offer score is ≥ 4.0/5, the user flags "top-tier", "dream job", or "high-stakes", or the candidate is late-stage/post-screen.
-  why: form-fill flows are 6+ steps; free-tier sometimes aborts mid-flow on large Greenhouse/Workday schemas; paid tier has more headroom
+  why: high-stakes applications need the quality-sensitive prompt and medium reasoning budget even though OpenCode now routes both application tiers through DeepSeek V4 Flash by default
-- [D9] If an upgraded `@general-paid` subagent fails with provider-side errors, re-dispatch the same URL once on `@general-free` before marking FAILED. Provider-side errors include Venice, Diem, Chutes, HTTP 402/429, insufficient credits/funds/balance, overload, and temporarily unavailable.
-  why: OpenCode paid-tier routing can still use free OpenRouter model IDs; backend pool limits are not evidence that a procedural free-tier worker cannot complete the same form after preflight gates pass
+- [D9] If a subagent fails with provider-side errors, do not auto-downgrade or re-dispatch the same URL. Report the provider failure, leave any TSV untouched unless there is a confirmed outcome, and inspect telemetry before retrying.
+  why: OpenCode now pins all JobForge application tiers to DeepSeek V4 Flash; switching `@general-paid` → `@general-free` changes the prompt/tool budget but not the provider route, so automatic duplicate dispatches add risk without fixing provider availability
 ## Procedure
@@ -65,7 +65,7 @@ Live application assistant. Reads the active application form in Chrome (via Geo
 10. Generate answers from Block B + Block F + Section G + JD.
 11. Submit as ONE `run_actions` call [H1] using labels [D6] with `imeFriendly: true` [D4].
 12. On session error, run the 4-step recovery; only one retry [H2].
-13. On upgraded-provider failure, downgrade once to `@general-free` [D9].
+13. On provider failure, stop and inspect telemetry before any retry [D9].
 14. On OTP prompt, fetch the code from Gmail via `gmail_get_message`.
 15. Submit the OTP with `geometra_fill_otp` and click Submit.
 16. Write outcome as `batch/tracker-additions/*.tsv` [H3].
@@ -98,7 +98,7 @@ Or, on failure:
 APPLY FAILED AFTER RECOVERY: <url>
   Error 1: <first error>
   Error 2: <post-recovery error>
-  Recommend: re-dispatch on @general-paid
+  Recommend: inspect telemetry before retrying this URL
 ```
 ---
@@ -176,7 +176,10 @@ When `location_constraints` is absent, use the prose fields:
 ```
 Step 1  — Build the job list (N items)
-Step 2  — Dedup: Grep data/pipeline.md + today's day file for each company+role. Drop any already APPLIED.
+Step 2  — Dedup: for each candidate, grep all four sources for the URL and for company+role:
+          data/pipeline.md, all data/applications/*.md day files,
+          batch/tracker-additions/*.tsv, batch/tracker-additions/merged/*.tsv.
+          Drop any already APPLIED before counting toward N; pick replacements from the remaining list.
 Step 3  — geometra_list_sessions() + geometra_disconnect({closeBrowser: true})  [once, before loop]
 Step 4  — For round in ceil(N/2):
             pair = jobs[round*2 : round*2 + 2]
@@ -192,7 +195,7 @@ Step 6  — Reconcile outcomes (Hard Limit #6):
 Step 7  — Summarize outcomes; do NOT auto-retry failures.
 ```
-If a subagent fails, report it in the summary and let the user decide whether to retry. Never auto-retry — re-running a submit step risks duplicate applications.
+If a subagent fails, report it in the summary and let the user decide whether to retry. Never auto-retry — re-running a submit step risks duplicate applications. If a subagent returns SKIP because it discovered a duplicate, treat that as a missed preflight check: finish the current round, then choose a replacement candidate only after re-running dedupe against all four sources.
 **Outcome routing (Hard Limit #6 in `AGENTS.md`):**
 - Subagents write `batch/tracker-additions/{num}-{slug}.tsv` — one TSV per job.
@@ -359,10 +362,10 @@ Call 4:  geometra_run_actions({
    APPLY FAILED AFTER RECOVERY: <URL>
    Error 1: <first error message>
    Error 2: <error after recovery>
-   Recommend: re-dispatch on @general-paid
+   Recommend: inspect telemetry before retrying this URL
    ```
-   Do NOT try a third time. Do NOT try a different approach. The orchestrator will decide whether to re-dispatch on a bigger model.
+   Do NOT try a third time. Do NOT try a different approach. The orchestrator will decide whether to retry after inspecting telemetry.
 ### Skip schema re-fetches mid-flow (Rule D)

package/opencode.json CHANGED Viewed

@@ -1,33 +1,15 @@
 {
   "$schema": "https://opencode.ai/config.json",
-  "model": "openrouter/qwen/qwen3-coder:free",
+  "model": "opencode-go/deepseek-v4-flash",
   "agent": {
     "fast": {
-      "model": "opencode/big-pickle"
+      "model": "opencode-go/deepseek-v4-flash"
     },
     "quality": {
-      "model": "openrouter/qwen/qwen3-next-80b-a3b-instruct:free"
+      "model": "opencode-go/deepseek-v4-flash"
     },
     "minimal": {
-      "model": "openrouter/openai/gpt-oss-20b:free"
-    }
-  },
-  "provider": {
-    "openrouter": {
-      "models": {
-        "qwen/qwen3-coder:free": {},
-        "z-ai/glm-4.5-air:free": {},
-        "qwen/qwen3-next-80b-a3b-instruct:free": {},
-        "openai/gpt-oss-20b:free": {},
-        "openai/gpt-oss-120b:free": {},
-        "minimax/minimax-m2.5:free": {},
-        "nvidia/nemotron-3-super-120b-a12b:free": {},
-        "nvidia/nemotron-3-nano-30b-a3b:free": {},
-        "nvidia/nemotron-nano-9b-v2:free": {},
-        "google/gemma-4-26b-a4b-it:free": {},
-        "google/gemma-4-31b-it:free": {},
-        "meta-llama/llama-3.3-70b-instruct:free": {}
-      }
+      "model": "opencode-go/deepseek-v4-flash"
     }
   },
   "mcp": {
@@ -66,11 +48,8 @@
       }
     }
   },
-  "plugin": [
-    "@razroo/opencode-model-fallback"
-  ],
   "instructions": [
     "templates/states.yml"
   ],
-  "small_model": "openrouter/google/gemma-4-26b-a4b-it:free"
+  "small_model": "opencode-go/deepseek-v4-flash"
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "job-forge",
-  "version": "2.14.11",
+  "version": "2.14.13",
   "description": "AI-powered job search pipeline built on opencode",
   "type": "module",
   "bin": {
@@ -97,7 +97,6 @@
     "@razroo/iso": "^0.2.5",
     "@razroo/iso-eval": "^0.4.0",
     "@razroo/iso-harness": "^0.6.1",
-    "@razroo/iso-route": "^0.5.3",
-    "@razroo/opencode-model-fallback": "^0.3.1"
+    "@razroo/iso-route": "^0.5.3"
   }
 }

package/scripts/check-iso-smoke.mjs CHANGED Viewed

@@ -21,9 +21,9 @@ const checks = [
   ["H7 distrusts subagent prose", () => every(files.instructions, ["must originate from a file", "not from prior subagent prose"])],
   ["shared prompt points to on-demand references", () => every(files.instructions, ["modes/{mode}.md", "modes/reference-setup.md", "modes/reference-portals.md", "modes/reference-geometra.md"])],
   ["apply mode owns high-stakes upgrade", () => every(files.apply, ["[D8]", "@general-paid", "4.0/5", "high-stakes"])],
-  ["apply mode owns provider downgrade", () => every(files.apply, ["[D9]", "@general-free", "HTTP 402/429", "insufficient credits/funds/balance"])],
-  ["models policy extends free OpenRouter preset", () => /extends:\s*openrouter-free/.test(files.models)],
-  ["OpenCode fallback plugin is configured", () => every(files.config, ["opencodeModelFallback", "@razroo/opencode-model-fallback"])],
+  ["apply mode blocks provider auto-downgrade", () => every(files.apply, ["[D9]", "do not auto-downgrade", "inspect telemetry before retrying"])],
+  ["models policy pins OpenCode to DeepSeek V4 Flash", () => /extends:\s*standard/.test(files.models) && count(files.models, "opencode-go/deepseek-v4-flash") >= 4],
+  ["OpenCode fallback plugin is not configured", () => !every(files.config, ["opencodeModelFallback", "@razroo/opencode-model-fallback"])],
 ];
 const failures = checks
@@ -41,3 +41,7 @@ console.log(`JobForge iso smoke passed (${checks.length} checks).`);
 function every(source, needles) {
   return needles.every((needle) => source.includes(needle));
 }
+function count(source, needle) {
+  return source.split(needle).length - 1;
+}