opencode-lmstudio-warm 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,17 +1,19 @@
1
- ## [0.1.1](https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.0...v0.1.1) (2026-07-04)
2
-
3
1
  # Changelog
4
2
 
5
- All notable changes to this project are documented here.
3
+ ## [0.1.2](https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.1...v0.1.2) (2026-07-04)
4
+
5
+ All notable changes are documented here. From v0.1.1 onward, entries are
6
+ generated automatically by [semantic-release](https://semver.org) from
7
+ [Conventional Commits](https://www.conventionalcommits.org). While the version
8
+ is `0.x`, a MINOR bump may include breaking changes (SemVer 0.x rule).
6
9
 
7
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
8
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
9
- While the version is `0.x`, a MINOR bump may include breaking changes (per the
10
- SemVer 0.x rule); such changes are called out explicitly below.
10
+ ## [0.1.1](https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.0...v0.1.1) (2026-07-04)
11
+
12
+ ### Bug Fixes
11
13
 
12
- ## [Unreleased]
14
+ - avoid redundant eager warm when `model` and `small_model` are identical ([0f16e8f](https://github.com/diegomarino/opencode-lmstudio-warm/commit/0f16e8f))
13
15
 
14
- ## [0.1.0] - 2026-07-03
16
+ ## 0.1.0 - 2026-07-04
15
17
 
16
18
  Initial public release.
17
19
 
@@ -44,6 +46,3 @@ Initial public release.
44
46
  lock immediately when its holder pid is dead (or the pid file is absent past a
45
47
  grace window), the release is synchronous, and a `process.on("exit")` handler
46
48
  is a last-resort cleanup. Verified 9/9 against a live LM Studio fleet.
47
-
48
- [Unreleased]: https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.0...HEAD
49
- [0.1.0]: https://github.com/diegomarino/opencode-lmstudio-warm/releases/tag/v0.1.0
package/README.md CHANGED
@@ -20,8 +20,12 @@ Per request, the plugin checks that the model is actually loaded and, when it
20
20
  isn't, performs exactly one `lms load` (even across parallel sessions) before
21
21
  letting the request through.
22
22
 
23
- Verified against opencode **v1.17.10** and the local LM Studio + `lms` CLI on
24
- macOS/Apple Silicon (see [`test/e2e/verify.sh`](./test/e2e/verify.sh), 9/9 passing).
23
+ Verified against opencode **v1.17.10** and **LM Studio 0.4.18** (`lms` CLI
24
+ commit `6041ae0`) on macOS/Apple Silicon (see
25
+ [`test/e2e/verify.sh`](./test/e2e/verify.sh), 9/9 passing). The LM Studio
26
+ behaviors the plugin depends on are the `lms ps --json` field names
27
+ (`modelKey` / `identifier` / `status` / `queued`) and the fact that
28
+ `lms load` is not idempotent.
25
29
 
26
30
  ## Quick start
27
31
 
@@ -128,15 +132,33 @@ the JSON/`jq` above.
128
132
 
129
133
  The plugin works with zero configuration. Optional tuning lives in
130
134
  `~/.config/opencode/lmstudio-warm.json` (or inline as
131
- `"plugin": [["opencode-lmstudio-warm", {...}]]`): `providers`,
132
- `ttlSeconds`, `parallel` (size ≈ concurrent fleet width; overflow queues
133
- server-side), `contextLength`,
134
- `perModel: { "<key>": { parallel, ttlSeconds, contextLength } }`,
135
- `verifyCacheMs`, `retryCooldownMs`, `failMode` (`hybrid` default: confirmed
136
- failures fail the request with a clear error; ambiguous lock contention
137
- proceeds fail-open), `reconcileDuplicates`, `eager`, `logFile`.
138
-
139
- Log: `~/.cache/opencode/lmstudio-warm.log`.
135
+ `"plugin": [["opencode-lmstudio-warm", {...}]]`).
136
+
137
+ > **Scope:** the plugin manages the **local** LM Studio through the `lms` CLI.
138
+ > `baseURL` (and any gated provider's `baseURL`) must point at this same
139
+ > machine a non-loopback URL is logged as a warning, and the gate can
140
+ > neither verify nor load models on a remote server.
141
+
142
+ | Option | Default | What it does |
143
+ |--------|---------|--------------|
144
+ | `providers` | `["lmstudio"]` | Provider IDs to gate; requests on other providers are ignored. All listed providers must address the local LM Studio. |
145
+ | `lmsPath` | `~/.lmstudio/bin/lms` if present, else `lms` | Path to the `lms` CLI. |
146
+ | `baseURL` | `http://127.0.0.1:1234/v1` | Fallback base URL when the provider config doesn't carry one. Must be loopback. |
147
+ | `ttlSeconds` | `0` | `--ttl` for `lms load`; `0` omits the flag (resident until unloaded). |
148
+ | `parallel` | `0` | `--parallel` for `lms load`; `0` omits it (LM Studio default, currently 4). Size ≈ concurrent fleet width; overflow queues server-side. |
149
+ | `contextLength` | `0` | `--context-length` for `lms load`; `0` omits it (model default). |
150
+ | `perModel` | `{}` | Per-model-key overrides of `ttlSeconds` / `parallel` / `contextLength`. |
151
+ | `verifyCacheMs` | `30000` | How long a positive residency verdict is trusted before re-checking. |
152
+ | `retryCooldownMs` | `60000` | After a confirmed load failure, don't retry the same key for this long (prevents load storms). |
153
+ | `loadTimeoutMs` | `900000` | Hard cap on a single `lms load` (a cold big-model load can take minutes). |
154
+ | `serverStartTimeoutMs` | `90000` | Hard cap on bringing the HTTP server up. |
155
+ | `lockWaitTimeoutMs` | `1200000` | Max wait for another process's in-flight load before proceeding fail-open. |
156
+ | `failMode` | `"hybrid"` | `hybrid`: confirmed failures fail the request with a clear error, ambiguous ones proceed fail-open. `open`: never fail. `closed`: any warm failure fails the request. |
157
+ | `reconcileDuplicates` | `true` | Unload idle suffixed duplicates (`key:2` …) and load fresh when the bare key isn't addressable. |
158
+ | `launchAppFallback` | `true` | If the server won't start, try `open -ga "LM Studio"` once (macOS only). |
159
+ | `eager` | `true` | Background-warm `model` + `small_model` at instance start. |
160
+ | `logFile` | `~/.cache/opencode/lmstudio-warm.log` | Plugin log file; rotated to `<logFile>.old` once it grows past ~5 MB. |
161
+ | `lockDir` | `~/.cache/opencode/lmstudio-warm.lock` | Cross-process lock directory. |
140
162
 
141
163
  See `examples/lmstudio-warm.json` for a fleet-tuned starting point
142
164
  (`cp examples/lmstudio-warm.json ~/.config/opencode/lmstudio-warm.json`).
@@ -287,8 +309,9 @@ building, addressability, pid liveness, fail-mode decisions) is exported from
287
309
  `src/index.ts` and unit-tested under `test/`; the live system behavior is covered
288
310
  by the E2E fixture under [`test/e2e/`](./test/e2e/).
289
311
 
290
- Releases follow [SemVer](https://semver.org) and are cut by CI on `v*` tags
291
- (see [`CHANGELOG.md`](./CHANGELOG.md)).
312
+ Releases follow [SemVer](https://semver.org) and are cut automatically by
313
+ semantic-release on every push to `main` — Conventional Commits decide the
314
+ bump (see [`CHANGELOG.md`](./CHANGELOG.md)).
292
315
 
293
316
  ## Disclaimer
294
317
 
@@ -10,7 +10,7 @@ strings opencode sends as the API `model` field).
10
10
  A minimal consumer config: the `plugin` array entry plus the `lmstudio` provider
11
11
  block (`baseURL`, `apiKey`, and the recommended `headerTimeout` / `chunkTimeout`).
12
12
  Merge it into your own `opencode.json` — the repo
13
- [README's Install section](../README.md#install) has an idempotent `jq` one-liner
13
+ [README's Install section](../README.md#install-options) has an idempotent `jq` one-liner
14
14
  that does this non-destructively, or run `opencode plugin opencode-lmstudio-warm`
15
15
  to register the plugin and add only the provider block by hand.
16
16
 
@@ -1,6 +1,5 @@
1
1
  {
2
2
  "$schema": "https://opencode.ai/config.json",
3
- "//": "Consumer example: wire the plugin via npm. opencode auto-installs the package named in `plugin` and loads it before every request. Copy the `plugin` line and the `lmstudio` provider block into your own opencode.json, then set your model/small_model and LM_API_TOKEN.",
4
3
  "plugin": ["opencode-lmstudio-warm"],
5
4
  "model": "lmstudio/your-main-model-key",
6
5
  "small_model": "lmstudio/your-small-model-key",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-lmstudio-warm",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Deterministic LM Studio model pre-warm gate for opencode — loads and keeps the target model resident before every request, healing cold starts and mid-session TTL evictions.",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
package/src/index.ts CHANGED
@@ -145,17 +145,49 @@ const OK: WarmResult = { ok: true, confirmed: false, reason: "" }
145
145
  // No per-process state — the plugin closure below composes these with the live
146
146
  // caches, child processes, and lock directory.
147
147
 
148
- /** Merge config in precedence order: DEFAULTS < file options < plugin options.
149
- * Also maps the legacy `failClosed` boolean onto `failMode` when the newer
150
- * key isn't set. */
151
- export function resolveOptions(
152
- fileOpts: Partial<WarmOptions>,
153
- pluginOpts?: (Partial<WarmOptions> & { failClosed?: boolean }) | null,
154
- ): WarmOptions {
155
- const raw = { ...fileOpts, ...(pluginOpts ?? {}) }
156
- // Legacy boolean from earlier revisions of this plugin.
157
- if (raw.failClosed !== undefined && raw.failMode === undefined) raw.failMode = raw.failClosed ? "closed" : "open"
158
- return { ...DEFAULTS, ...raw }
148
+ /** Merge config in precedence order: DEFAULTS < file options < plugin options. */
149
+ export function resolveOptions(fileOpts: Partial<WarmOptions>, pluginOpts?: Partial<WarmOptions> | null): WarmOptions {
150
+ return { ...DEFAULTS, ...fileOpts, ...(pluginOpts ?? {}) }
151
+ }
152
+
153
+ /** Keys in a raw options object that the plugin does not know. Surfaced as
154
+ * warnings at startup — a typo'd key would otherwise be silently ignored. */
155
+ export function unknownOptionKeys(raw: Record<string, unknown>): string[] {
156
+ return Object.keys(raw).filter((k) => !(k in DEFAULTS))
157
+ }
158
+
159
+ const NUMERIC_KEYS = [
160
+ "ttlSeconds",
161
+ "parallel",
162
+ "contextLength",
163
+ "verifyCacheMs",
164
+ "retryCooldownMs",
165
+ "loadTimeoutMs",
166
+ "serverStartTimeoutMs",
167
+ "lockWaitTimeoutMs",
168
+ ] as const
169
+ const BOOLEAN_KEYS = ["reconcileDuplicates", "launchAppFallback", "eager"] as const
170
+ const STRING_KEYS = ["lmsPath", "baseURL", "logFile", "lockDir"] as const
171
+
172
+ /** Repair invalid option VALUES back to their defaults, collecting one
173
+ * warning per repair. Notably an unrecognized failMode falls back to
174
+ * "hybrid" (the default): the exact-match checks downstream would otherwise
175
+ * make a typo silently behave like "open", the least safe mode. */
176
+ export function sanitizeOptions(o: WarmOptions): { opts: WarmOptions; warnings: string[] } {
177
+ const warnings: string[] = []
178
+ const out: WarmOptions = { ...o }
179
+ const fix = (key: keyof WarmOptions, why: string) => {
180
+ warnings.push(`${key} ${why} — using default ${JSON.stringify(DEFAULTS[key])}`)
181
+ ;(out as Record<string, unknown>)[key] = DEFAULTS[key]
182
+ }
183
+ if (!["open", "closed", "hybrid"].includes(out.failMode)) fix("failMode", `"${out.failMode}" is not open|closed|hybrid`)
184
+ if (!Array.isArray(out.providers) || out.providers.length === 0 || out.providers.some((p) => typeof p !== "string"))
185
+ fix("providers", "must be a non-empty string array")
186
+ for (const k of NUMERIC_KEYS) if (typeof out[k] !== "number" || !Number.isFinite(out[k]) || out[k] < 0) fix(k, "must be a non-negative number")
187
+ for (const k of BOOLEAN_KEYS) if (typeof out[k] !== "boolean") fix(k, "must be a boolean")
188
+ for (const k of STRING_KEYS) if (typeof out[k] !== "string" || out[k] === "") fix(k, "must be a non-empty string")
189
+ if (out.perModel === null || typeof out.perModel !== "object" || Array.isArray(out.perModel)) fix("perModel", "must be an object")
190
+ return { opts: out, warnings }
159
191
  }
160
192
 
161
193
  /** opencode addresses models by the UNSUFFIXED key; LM Studio routes the API
@@ -168,6 +200,26 @@ export function addressable(instances: LmsInstance[], key: string): boolean {
168
200
  return instances.some((i) => i.identifier === key)
169
201
  }
170
202
 
203
+ /** Classify `lms ps` output for a key. "unknown" (ps output unavailable) is a
204
+ * first-class state on purpose: it is AMBIGUOUS, never "absent" — loading
205
+ * blind onto a possibly-resident key is how duplicate instances are made,
206
+ * and a failed post-load probe must not be reported as a confirmed load
207
+ * failure (that would negative-cache a model that may well be loaded). */
208
+ export type PsCheck =
209
+ | { state: "unknown" }
210
+ | { state: "addressable" }
211
+ | { state: "absent" }
212
+ | { state: "duplicates"; dups: LmsInstance[]; busy: boolean }
213
+
214
+ export function classifyPs(instances: LmsInstance[] | null, key: string): PsCheck {
215
+ if (instances === null) return { state: "unknown" }
216
+ if (addressable(instances, key)) return { state: "addressable" }
217
+ const dups = instances.filter((i) => i.modelKey === key)
218
+ if (dups.length === 0) return { state: "absent" }
219
+ const busy = dups.some((i) => i.status === "generating" || (i.queued ?? 0) > 0)
220
+ return { state: "duplicates", dups, busy }
221
+ }
222
+
171
223
  /** Split an opencode model ref ("provider/key…") on the FIRST slash, so a key
172
224
  * that itself contains slashes (e.g. "qwen/qwen3") is preserved intact. */
173
225
  export function parseModelRef(ref: unknown): { providerID: string; key: string } | null {
@@ -202,11 +254,14 @@ export function pidAlive(pid: number): boolean {
202
254
  }
203
255
  }
204
256
 
205
- /** Parse a lock pid-file's contents to a pid, or null if absent/blank/garbage. */
257
+ /** Parse a lock pid-file's contents to a pid, or null if absent/blank/garbage
258
+ * or non-positive. Non-positive values are rejected because `kill(-1, 0)`
259
+ * probes ALL processes (always "alive") — a corrupted pid file must not make
260
+ * the lock unbreakable until the staleness backstop. */
206
261
  export function parseLockPid(content: string | null): number | null {
207
262
  if (content == null) return null
208
263
  const n = Number.parseInt(content.trim(), 10)
209
- return Number.isFinite(n) ? n : null
264
+ return Number.isFinite(n) && n > 0 ? n : null
210
265
  }
211
266
 
212
267
  /** Given a warm outcome and the configured failMode, should opencode's request
@@ -218,16 +273,23 @@ export function shouldFailRequest(failMode: WarmOptions["failMode"], result: War
218
273
  }
219
274
 
220
275
  export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
221
- const opts = resolveOptions(
222
- loadFileOptions(),
223
- pluginOptions as (Partial<WarmOptions> & { failClosed?: boolean }) | null,
224
- )
276
+ const fileOpts = loadFileOptions()
277
+ const plugOpts = (pluginOptions ?? {}) as Partial<WarmOptions>
278
+ const { opts, warnings: configWarnings } = sanitizeOptions(resolveOptions(fileOpts, plugOpts))
279
+ for (const k of unknownOptionKeys(fileOpts as Record<string, unknown>))
280
+ configWarnings.push(`unknown option "${k}" in lmstudio-warm.json`)
281
+ for (const k of unknownOptionKeys(plugOpts as Record<string, unknown>))
282
+ configWarnings.push(`unknown option "${k}" in plugin options`)
225
283
 
226
284
  // ---- state (per opencode process) ----
227
- const verifiedAt = new Map<string, number>() // model key -> last confirmed-addressable timestamp
285
+ // Warm caches are keyed by `${baseURL}::${modelKey}` residency and failure
286
+ // are facts about one server+model pair, not about a model key in the
287
+ // abstract (two gated providers may serve the same key).
288
+ const verifiedAt = new Map<string, number>() // last confirmed-addressable timestamp
228
289
  const failedAt = new Map<string, { at: number; reason: string }>() // negative cache
229
290
  const inflight = new Map<string, Promise<WarmResult>>()
230
- let serverVerifiedAt = 0
291
+ const serverVerifiedAt = new Map<string, number>() // baseURL -> last confirmed-listening
292
+ const serverFailedAt = new Map<string, number>() // baseURL -> last failed bring-up
231
293
  // True only while THIS process holds the mkdir lock. Used by the exit handler
232
294
  // to release a lock that a fire-and-forget eager warm may still be holding
233
295
  // when the process tears down (otherwise the async finally never runs).
@@ -237,30 +299,56 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
237
299
  fs.mkdirSync(path.dirname(opts.logFile), { recursive: true })
238
300
  } catch {}
239
301
 
302
+ // Rotate the log once it outgrows ~5 MB (one previous generation kept at
303
+ // .old) so long-lived fleet hosts cannot grow it unbounded.
304
+ try {
305
+ if (fs.statSync(opts.logFile).size > 5 * 1024 * 1024) fs.renameSync(opts.logFile, `${opts.logFile}.old`)
306
+ } catch {}
307
+
240
308
  function log(msg: string) {
241
309
  try {
242
310
  fs.appendFileSync(opts.logFile, `${new Date().toISOString()} [pid ${process.pid}] ${msg}\n`)
243
311
  } catch {}
244
312
  }
245
313
 
246
- // Last-resort synchronous lock release. A one-shot `opencode run` can exit
247
- // while a background eager warm still holds the lock; process.on("exit") runs
248
- // sync only, so rmSync is the tool. Guard by the pid file so we never delete a
249
- // lock another process legitimately re-acquired in the meantime (TOCTOU), and
250
- // never throw from the handler. SIGKILL is uncatchable — the dead-holder
251
- // liveness check in acquireLock is the backstop for that.
252
- process.once("exit", () => {
253
- if (!holdingLock) return
314
+ const loggedOnce = new Set<string>()
315
+ function logOnce(msg: string) {
316
+ if (loggedOnce.has(msg)) return
317
+ loggedOnce.add(msg)
318
+ log(msg)
319
+ }
320
+
321
+ for (const w of configWarnings) log(`config warning: ${w}`)
322
+
323
+ // The ONLY lock-release path (also used by the exit handler below). Removes
324
+ // the lock dir only if the pid file still names this process, or is
325
+ // absent/blank (we mkdir'd but hadn't written it yet). Another process may
326
+ // have legitimately broken our lock (stale/dead-holder rules in acquireLock)
327
+ // and re-acquired it — deleting THEIR lock would reopen the duplicate-load
328
+ // race the lock exists to prevent. Synchronous on purpose: rmSync + flag
329
+ // clear run with no await between them, so a second in-process waiter cannot
330
+ // observe a removed dir with holdingLock still true, and it works inside the
331
+ // sync-only "exit" handler. Never throws.
332
+ function releaseLockIfOurs() {
254
333
  try {
255
334
  let ours = true
256
335
  try {
257
336
  const pidStr = fs.readFileSync(path.join(opts.lockDir, "pid"), "utf8").trim()
258
- ours = pidStr === "" || pidStr === String(process.pid) // absent pid ⇒ we mkdir'd but hadn't written it yet
337
+ ours = pidStr === "" || pidStr === String(process.pid)
259
338
  } catch {
260
339
  ours = true
261
340
  }
262
341
  if (ours) fs.rmSync(opts.lockDir, { recursive: true, force: true })
263
342
  } catch {}
343
+ holdingLock = false
344
+ }
345
+
346
+ // Last-resort release. A one-shot `opencode run` can exit while a background
347
+ // eager warm still holds the lock; process.on("exit") runs sync only.
348
+ // SIGKILL is uncatchable — the dead-holder liveness check in acquireLock is
349
+ // the backstop for that.
350
+ process.once("exit", () => {
351
+ if (holdingLock) releaseLockIfOurs()
264
352
  })
265
353
 
266
354
  function run(
@@ -318,26 +406,36 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
318
406
  return false
319
407
  }
320
408
 
321
- let serverInflight: Promise<boolean> | null = null
409
+ const serverInflight = new Map<string, Promise<boolean>>()
322
410
  function ensureServer(baseURL: string): Promise<boolean> {
323
- if (Date.now() - serverVerifiedAt < opts.verifyCacheMs) return Promise.resolve(true)
324
- if (serverInflight) return serverInflight
325
- serverInflight = ensureServerImpl(baseURL).finally(() => {
326
- serverInflight = null
327
- })
328
- return serverInflight
411
+ if (Date.now() - (serverVerifiedAt.get(baseURL) ?? 0) < opts.verifyCacheMs) return Promise.resolve(true)
412
+ // Server-level negative cache: a failed bring-up (start + poll + optional
413
+ // app launch) can take minutes — fail fast for retryCooldownMs instead of
414
+ // re-paying that stall on every request while the server stays down.
415
+ if (Date.now() - (serverFailedAt.get(baseURL) ?? 0) < opts.retryCooldownMs) return Promise.resolve(false)
416
+ const existing = serverInflight.get(baseURL)
417
+ if (existing) return existing
418
+ const p = ensureServerImpl(baseURL)
419
+ .then((up) => {
420
+ if (up) serverFailedAt.delete(baseURL)
421
+ else serverFailedAt.set(baseURL, Date.now())
422
+ return up
423
+ })
424
+ .finally(() => serverInflight.delete(baseURL))
425
+ serverInflight.set(baseURL, p)
426
+ return p
329
427
  }
330
428
 
331
429
  async function ensureServerImpl(baseURL: string): Promise<boolean> {
332
430
  if (await httpAlive(baseURL)) {
333
- serverVerifiedAt = Date.now()
431
+ serverVerifiedAt.set(baseURL, Date.now())
334
432
  return true
335
433
  }
336
434
  log(`HTTP server not reachable at ${baseURL} — running lms server start`)
337
435
  const started = await lms(["server", "start"], 30_000)
338
436
  if (!started.ok) log(`lms server start failed: ${started.stderr.trim().slice(0, 300)}`)
339
437
  if (await pollAlive(baseURL, opts.serverStartTimeoutMs)) {
340
- serverVerifiedAt = Date.now()
438
+ serverVerifiedAt.set(baseURL, Date.now())
341
439
  log(`HTTP server is up at ${baseURL}`)
342
440
  return true
343
441
  }
@@ -348,7 +446,7 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
348
446
  await new Promise((r) => setTimeout(r, 3_000))
349
447
  await lms(["server", "start"], 30_000)
350
448
  if (await pollAlive(baseURL, opts.serverStartTimeoutMs)) {
351
- serverVerifiedAt = Date.now()
449
+ serverVerifiedAt.set(baseURL, Date.now())
352
450
  log(`HTTP server is up at ${baseURL} (after app launch)`)
353
451
  return true
354
452
  }
@@ -367,15 +465,17 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
367
465
 
368
466
  // Cross-process mutex via atomic mkdir: parallel opencode workers must not
369
467
  // race lms load (it is not idempotent). A lock may be broken when (1) it is
370
- // older than staleMs (no live holder can run that long every command under
371
- // the lock is killed at a hard timeout), (2) its recorded holder pid is dead
372
- // (crash/abrupt exit before the finally released it the observed eager-warm
373
- // leak), or (3) the pid file is missing AND the dir has outlived a short grace
374
- // (a holder that crashed between mkdir and writeFile). A fresh, pid-less lock
375
- // is left alone: that is a live holder still mid-acquisition.
468
+ // older than staleMs holders refresh the dir mtime before each long phase
469
+ // (touchLock in doWarm), so age measures the CURRENT phase and no live phase
470
+ // can outlast the load timeout, the longest hard cap; (2) its recorded
471
+ // holder pid is dead (crash/abrupt exit before the finally released it the
472
+ // observed eager-warm leak); or (3) the pid file is missing AND the dir has
473
+ // outlived a short grace (a holder that crashed between mkdir and
474
+ // writeFile). A fresh, pid-less lock is left alone: that is a live holder
475
+ // still mid-acquisition.
376
476
  async function acquireLock(): Promise<(() => void) | null> {
377
477
  const deadline = Date.now() + opts.lockWaitTimeoutMs
378
- const staleMs = opts.loadTimeoutMs + opts.serverStartTimeoutMs + 120_000
478
+ const staleMs = opts.loadTimeoutMs + 120_000
379
479
  const pidGraceMs = 5_000
380
480
  for (;;) {
381
481
  try {
@@ -384,16 +484,7 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
384
484
  try {
385
485
  await fsp.writeFile(path.join(opts.lockDir, "pid"), String(process.pid))
386
486
  } catch {}
387
- // Synchronous release: rmSync + flag clear run with no await between them,
388
- // so a second in-process waiter (parallel eager warm) cannot observe a
389
- // removed dir with holdingLock still true, and the dir is gone even if
390
- // the process is mid-teardown when release fires.
391
- return () => {
392
- try {
393
- fs.rmSync(opts.lockDir, { recursive: true, force: true })
394
- } catch {}
395
- holdingLock = false
396
- }
487
+ return releaseLockIfOurs
397
488
  } catch (err: any) {
398
489
  if (err?.code !== "EEXIST") throw err
399
490
  try {
@@ -416,15 +507,35 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
416
507
  }
417
508
  }
418
509
 
510
+ // The lms CLI manages only the LOCAL LM Studio: with a non-loopback baseURL
511
+ // the gate would load models on this machine while requests go elsewhere.
512
+ // Warn once per URL instead of failing — a LAN hostname can still be an
513
+ // alias for this host, and generation may work regardless.
514
+ function warnIfNonLoopback(baseURL: string) {
515
+ try {
516
+ const host = new URL(baseURL).hostname
517
+ if (host === "127.0.0.1" || host === "localhost" || host === "[::1]" || host === "::1") return
518
+ logOnce(
519
+ `WARNING: baseURL ${baseURL} is not loopback — lms manages only the LOCAL LM Studio, so the warm gate cannot ensure models on a remote server`,
520
+ )
521
+ } catch {}
522
+ }
523
+
419
524
  async function doWarm(key: string, baseURL: string): Promise<WarmResult> {
525
+ const cacheKey = `${baseURL}::${key}`
526
+ warnIfNonLoopback(baseURL)
420
527
  if (!(await ensureServer(baseURL))) {
421
528
  return { ok: false, confirmed: true, reason: `LM Studio HTTP server is not reachable at ${baseURL}` }
422
529
  }
423
530
 
424
- // Fast path: no lock needed if already addressable.
425
- let instances = await psInstances()
426
- if (instances && addressable(instances, key)) {
427
- verifiedAt.set(key, Date.now())
531
+ // Fast path: no lock needed if already addressable. An "unknown" ps state
532
+ // is ambiguous never proceed toward a load on it (see classifyPs).
533
+ let check = classifyPs(await psInstances(), key)
534
+ if (check.state === "unknown") {
535
+ return { ok: false, confirmed: false, reason: "lms ps failed — model state unknown" }
536
+ }
537
+ if (check.state === "addressable") {
538
+ verifiedAt.set(cacheKey, Date.now())
428
539
  return OK
429
540
  }
430
541
 
@@ -435,11 +546,17 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
435
546
  log(`lock contention timeout waiting to warm ${key} — proceeding (ambiguous)`)
436
547
  return { ok: false, confirmed: false, reason: "lock contention timeout" }
437
548
  }
549
+ // Refresh the lock dir mtime before each long phase so acquireLock's
550
+ // age-based stale check measures the current phase, never the total hold.
551
+ const touchLock = () => fsp.utimes(opts.lockDir, new Date(), new Date()).catch(() => {})
438
552
  try {
439
553
  // Double-checked: another process may have loaded it while we waited.
440
- instances = await psInstances()
441
- if (instances && addressable(instances, key)) {
442
- verifiedAt.set(key, Date.now())
554
+ check = classifyPs(await psInstances(), key)
555
+ if (check.state === "unknown") {
556
+ return { ok: false, confirmed: false, reason: "lms ps failed — model state unknown" }
557
+ }
558
+ if (check.state === "addressable") {
559
+ verifiedAt.set(cacheKey, Date.now())
443
560
  return OK
444
561
  }
445
562
 
@@ -447,16 +564,15 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
447
564
  // is addressable by the bare key (e.g. the original was unloaded and a
448
565
  // stray duplicate survived). Loading again would only stack key:3 —
449
566
  // reconcile by unloading idle duplicates first, then load fresh.
450
- const dups = (instances ?? []).filter((i) => i.modelKey === key)
451
- if (dups.length > 0) {
452
- const busy = dups.some((i) => i.status === "generating" || (i.queued ?? 0) > 0)
453
- if (!opts.reconcileDuplicates || busy) {
454
- const ids = dups.map((i) => i.identifier).join(", ")
455
- log(`WARNING: only non-addressable instances of ${key} exist (${ids}); busy=${busy} — cannot warm`)
567
+ if (check.state === "duplicates") {
568
+ if (!opts.reconcileDuplicates || check.busy) {
569
+ const ids = check.dups.map((i) => i.identifier).join(", ")
570
+ log(`WARNING: only non-addressable instances of ${key} exist (${ids}); busy=${check.busy} cannot warm`)
456
571
  return { ok: false, confirmed: true, reason: `only suffixed duplicates of ${key} are resident (${ids})` }
457
572
  }
458
- for (const d of dups) {
573
+ for (const d of check.dups) {
459
574
  if (!d.identifier) continue
575
+ await touchLock()
460
576
  log(`reconciling: unloading duplicate instance ${d.identifier}`)
461
577
  const un = await lms(["unload", d.identifier], 60_000)
462
578
  if (!un.ok) log(`unload ${d.identifier} failed: ${un.stderr.trim().slice(0, 200)}`)
@@ -465,6 +581,7 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
465
581
 
466
582
  const args = loadArgs(opts, key)
467
583
  log(`loading ${key} (${args.join(" ")}) ...`)
584
+ await touchLock()
468
585
  const t0 = Date.now()
469
586
  const res = await lms(args, opts.loadTimeoutMs)
470
587
  if (!res.ok) {
@@ -474,12 +591,19 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
474
591
  return { ok: false, confirmed: true, reason: `lms load failed (${kind}): ${detail.slice(0, 200)}` }
475
592
  }
476
593
 
477
- instances = await psInstances()
478
- if (instances && addressable(instances, key)) {
479
- verifiedAt.set(key, Date.now())
594
+ const after = classifyPs(await psInstances(), key)
595
+ if (after.state === "addressable") {
596
+ verifiedAt.set(cacheKey, Date.now())
480
597
  log(`loaded ${key} in ${Math.round((Date.now() - t0) / 1000)}s`)
481
598
  return OK
482
599
  }
600
+ if (after.state === "unknown") {
601
+ // The load exited 0; only the verification probe failed. Ambiguous —
602
+ // negative-caching this as confirmed would fail requests for up to
603
+ // retryCooldownMs against a model that is very likely loaded.
604
+ log(`lms load ${key} exited 0 but lms ps failed — cannot verify addressability`)
605
+ return { ok: false, confirmed: false, reason: "loaded but unverified (lms ps failed)" }
606
+ }
483
607
  log(`lms load ${key} exited 0 but ps does not show identifier === key`)
484
608
  return { ok: false, confirmed: true, reason: `loaded but not addressable as "${key}"` }
485
609
  } finally {
@@ -488,12 +612,13 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
488
612
  }
489
613
 
490
614
  function warm(key: string, baseURL: string): Promise<WarmResult> {
491
- if (Date.now() - (verifiedAt.get(key) ?? 0) < opts.verifyCacheMs) return Promise.resolve(OK)
492
- const failed = failedAt.get(key)
615
+ const cacheKey = `${baseURL}::${key}`
616
+ if (Date.now() - (verifiedAt.get(cacheKey) ?? 0) < opts.verifyCacheMs) return Promise.resolve(OK)
617
+ const failed = failedAt.get(cacheKey)
493
618
  if (failed && Date.now() - failed.at < opts.retryCooldownMs) {
494
619
  return Promise.resolve({ ok: false, confirmed: true, reason: `${failed.reason} (cooldown)` })
495
620
  }
496
- const existing = inflight.get(key)
621
+ const existing = inflight.get(cacheKey)
497
622
  if (existing) return existing
498
623
  const p = doWarm(key, baseURL)
499
624
  .catch((err): WarmResult => {
@@ -501,12 +626,12 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
501
626
  return { ok: false, confirmed: false, reason: "internal error (see log)" }
502
627
  })
503
628
  .then((r) => {
504
- if (r.ok) failedAt.delete(key)
505
- else if (r.confirmed) failedAt.set(key, { at: Date.now(), reason: r.reason })
629
+ if (r.ok) failedAt.delete(cacheKey)
630
+ else if (r.confirmed) failedAt.set(cacheKey, { at: Date.now(), reason: r.reason })
506
631
  return r
507
632
  })
508
- .finally(() => inflight.delete(key))
509
- inflight.set(key, p)
633
+ .finally(() => inflight.delete(cacheKey))
634
+ inflight.set(cacheKey, p)
510
635
  return p
511
636
  }
512
637
 
@@ -539,12 +664,23 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
539
664
  let result: WarmResult = OK
540
665
  let key: string | undefined
541
666
  try {
667
+ // Contract-drift canaries: this plugin depends on undocumented input
668
+ // shapes verified against opencode v1.17.10. If an upgrade changes
669
+ // them the gate silently no-ops — these one-time log lines are the
670
+ // only signal that would remain.
542
671
  const providerID: string | undefined = input?.provider?.info?.id ?? input?.model?.providerID
543
- if (!providerID || !opts.providers.includes(providerID)) return
672
+ if (!providerID) {
673
+ logOnce("chat.params input carries no provider id — opencode hook shape may have changed; gate skipped")
674
+ return
675
+ }
676
+ if (!opts.providers.includes(providerID)) return
544
677
  // model.api.id is the exact string opencode sends as the API `model`
545
678
  // field (== LM Studio model key for config-defined models).
546
679
  key = input?.model?.api?.id ?? input?.model?.id
547
- if (!key) return
680
+ if (!key) {
681
+ logOnce(`chat.params for gated provider "${providerID}" carries no model key — opencode hook shape may have changed; gate skipped`)
682
+ return
683
+ }
548
684
  const configured = input?.provider?.options?.baseURL
549
685
  const baseURL = typeof configured === "string" && configured.startsWith("http") ? configured : opts.baseURL
550
686
  result = await warm(key, baseURL)