opencode-lmstudio-warm 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -12
- package/README.md +36 -13
- package/examples/README.md +1 -1
- package/examples/opencode.json +0 -1
- package/package.json +1 -1
- package/src/index.ts +217 -81
package/CHANGELOG.md
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
## [0.1.1](https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.0...v0.1.1) (2026-07-04)
|
|
2
|
-
|
|
3
1
|
# Changelog
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
## [0.1.2](https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.1...v0.1.2) (2026-07-04)
|
|
4
|
+
|
|
5
|
+
All notable changes are documented here. From v0.1.1 onward, entries are
|
|
6
|
+
generated automatically by [semantic-release](https://semver.org) from
|
|
7
|
+
[Conventional Commits](https://www.conventionalcommits.org). While the version
|
|
8
|
+
is `0.x`, a MINOR bump may include breaking changes (SemVer 0.x rule).
|
|
6
9
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
SemVer 0.x rule); such changes are called out explicitly below.
|
|
10
|
+
## [0.1.1](https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.0...v0.1.1) (2026-07-04)
|
|
11
|
+
|
|
12
|
+
### Bug Fixes
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
- avoid redundant eager warm when `model` and `small_model` are identical ([0f16e8f](https://github.com/diegomarino/opencode-lmstudio-warm/commit/0f16e8f))
|
|
13
15
|
|
|
14
|
-
##
|
|
16
|
+
## 0.1.0 - 2026-07-04
|
|
15
17
|
|
|
16
18
|
Initial public release.
|
|
17
19
|
|
|
@@ -44,6 +46,3 @@ Initial public release.
|
|
|
44
46
|
lock immediately when its holder pid is dead (or the pid file is absent past a
|
|
45
47
|
grace window), the release is synchronous, and a `process.on("exit")` handler
|
|
46
48
|
is a last-resort cleanup. Verified 9/9 against a live LM Studio fleet.
|
|
47
|
-
|
|
48
|
-
[Unreleased]: https://github.com/diegomarino/opencode-lmstudio-warm/compare/v0.1.0...HEAD
|
|
49
|
-
[0.1.0]: https://github.com/diegomarino/opencode-lmstudio-warm/releases/tag/v0.1.0
|
package/README.md
CHANGED
|
@@ -20,8 +20,12 @@ Per request, the plugin checks that the model is actually loaded and, when it
|
|
|
20
20
|
isn't, performs exactly one `lms load` (even across parallel sessions) before
|
|
21
21
|
letting the request through.
|
|
22
22
|
|
|
23
|
-
Verified against opencode **v1.17.10** and
|
|
24
|
-
macOS/Apple Silicon (see
|
|
23
|
+
Verified against opencode **v1.17.10** and **LM Studio 0.4.18** (`lms` CLI
|
|
24
|
+
commit `6041ae0`) on macOS/Apple Silicon (see
|
|
25
|
+
[`test/e2e/verify.sh`](./test/e2e/verify.sh), 9/9 passing). The LM Studio
|
|
26
|
+
behaviors the plugin depends on are the `lms ps --json` field names
|
|
27
|
+
(`modelKey` / `identifier` / `status` / `queued`) and the fact that
|
|
28
|
+
`lms load` is not idempotent.
|
|
25
29
|
|
|
26
30
|
## Quick start
|
|
27
31
|
|
|
@@ -128,15 +132,33 @@ the JSON/`jq` above.
|
|
|
128
132
|
|
|
129
133
|
The plugin works with zero configuration. Optional tuning lives in
|
|
130
134
|
`~/.config/opencode/lmstudio-warm.json` (or inline as
|
|
131
|
-
`"plugin": [["opencode-lmstudio-warm", {...}]]`)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
`
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
`"plugin": [["opencode-lmstudio-warm", {...}]]`).
|
|
136
|
+
|
|
137
|
+
> **Scope:** the plugin manages the **local** LM Studio through the `lms` CLI.
|
|
138
|
+
> `baseURL` (and any gated provider's `baseURL`) must point at this same
|
|
139
|
+
> machine — a non-loopback URL is logged as a warning, and the gate can
|
|
140
|
+
> neither verify nor load models on a remote server.
|
|
141
|
+
|
|
142
|
+
| Option | Default | What it does |
|
|
143
|
+
|--------|---------|--------------|
|
|
144
|
+
| `providers` | `["lmstudio"]` | Provider IDs to gate; requests on other providers are ignored. All listed providers must address the local LM Studio. |
|
|
145
|
+
| `lmsPath` | `~/.lmstudio/bin/lms` if present, else `lms` | Path to the `lms` CLI. |
|
|
146
|
+
| `baseURL` | `http://127.0.0.1:1234/v1` | Fallback base URL when the provider config doesn't carry one. Must be loopback. |
|
|
147
|
+
| `ttlSeconds` | `0` | `--ttl` for `lms load`; `0` omits the flag (resident until unloaded). |
|
|
148
|
+
| `parallel` | `0` | `--parallel` for `lms load`; `0` omits it (LM Studio default, currently 4). Size ≈ concurrent fleet width; overflow queues server-side. |
|
|
149
|
+
| `contextLength` | `0` | `--context-length` for `lms load`; `0` omits it (model default). |
|
|
150
|
+
| `perModel` | `{}` | Per-model-key overrides of `ttlSeconds` / `parallel` / `contextLength`. |
|
|
151
|
+
| `verifyCacheMs` | `30000` | How long a positive residency verdict is trusted before re-checking. |
|
|
152
|
+
| `retryCooldownMs` | `60000` | After a confirmed load failure, don't retry the same key for this long (prevents load storms). |
|
|
153
|
+
| `loadTimeoutMs` | `900000` | Hard cap on a single `lms load` (a cold big-model load can take minutes). |
|
|
154
|
+
| `serverStartTimeoutMs` | `90000` | Hard cap on bringing the HTTP server up. |
|
|
155
|
+
| `lockWaitTimeoutMs` | `1200000` | Max wait for another process's in-flight load before proceeding fail-open. |
|
|
156
|
+
| `failMode` | `"hybrid"` | `hybrid`: confirmed failures fail the request with a clear error, ambiguous ones proceed fail-open. `open`: never fail. `closed`: any warm failure fails the request. |
|
|
157
|
+
| `reconcileDuplicates` | `true` | Unload idle suffixed duplicates (`key:2` …) and load fresh when the bare key isn't addressable. |
|
|
158
|
+
| `launchAppFallback` | `true` | If the server won't start, try `open -ga "LM Studio"` once (macOS only). |
|
|
159
|
+
| `eager` | `true` | Background-warm `model` + `small_model` at instance start. |
|
|
160
|
+
| `logFile` | `~/.cache/opencode/lmstudio-warm.log` | Plugin log file; rotated to `<logFile>.old` once it grows past ~5 MB. |
|
|
161
|
+
| `lockDir` | `~/.cache/opencode/lmstudio-warm.lock` | Cross-process lock directory. |
|
|
140
162
|
|
|
141
163
|
See `examples/lmstudio-warm.json` for a fleet-tuned starting point
|
|
142
164
|
(`cp examples/lmstudio-warm.json ~/.config/opencode/lmstudio-warm.json`).
|
|
@@ -287,8 +309,9 @@ building, addressability, pid liveness, fail-mode decisions) is exported from
|
|
|
287
309
|
`src/index.ts` and unit-tested under `test/`; the live system behavior is covered
|
|
288
310
|
by the E2E fixture under [`test/e2e/`](./test/e2e/).
|
|
289
311
|
|
|
290
|
-
Releases follow [SemVer](https://semver.org) and are cut by
|
|
291
|
-
|
|
312
|
+
Releases follow [SemVer](https://semver.org) and are cut automatically by
|
|
313
|
+
semantic-release on every push to `main` — Conventional Commits decide the
|
|
314
|
+
bump (see [`CHANGELOG.md`](./CHANGELOG.md)).
|
|
292
315
|
|
|
293
316
|
## Disclaimer
|
|
294
317
|
|
package/examples/README.md
CHANGED
|
@@ -10,7 +10,7 @@ strings opencode sends as the API `model` field).
|
|
|
10
10
|
A minimal consumer config: the `plugin` array entry plus the `lmstudio` provider
|
|
11
11
|
block (`baseURL`, `apiKey`, and the recommended `headerTimeout` / `chunkTimeout`).
|
|
12
12
|
Merge it into your own `opencode.json` — the repo
|
|
13
|
-
[README's Install section](../README.md#install) has an idempotent `jq` one-liner
|
|
13
|
+
[README's Install section](../README.md#install-options) has an idempotent `jq` one-liner
|
|
14
14
|
that does this non-destructively, or run `opencode plugin opencode-lmstudio-warm`
|
|
15
15
|
to register the plugin and add only the provider block by hand.
|
|
16
16
|
|
package/examples/opencode.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://opencode.ai/config.json",
|
|
3
|
-
"//": "Consumer example: wire the plugin via npm. opencode auto-installs the package named in `plugin` and loads it before every request. Copy the `plugin` line and the `lmstudio` provider block into your own opencode.json, then set your model/small_model and LM_API_TOKEN.",
|
|
4
3
|
"plugin": ["opencode-lmstudio-warm"],
|
|
5
4
|
"model": "lmstudio/your-main-model-key",
|
|
6
5
|
"small_model": "lmstudio/your-small-model-key",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-lmstudio-warm",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Deterministic LM Studio model pre-warm gate for opencode — loads and keeps the target model resident before every request, healing cold starts and mid-session TTL evictions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
package/src/index.ts
CHANGED
|
@@ -145,17 +145,49 @@ const OK: WarmResult = { ok: true, confirmed: false, reason: "" }
|
|
|
145
145
|
// No per-process state — the plugin closure below composes these with the live
|
|
146
146
|
// caches, child processes, and lock directory.
|
|
147
147
|
|
|
148
|
-
/** Merge config in precedence order: DEFAULTS < file options < plugin options.
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
148
|
+
/** Merge config in precedence order: DEFAULTS < file options < plugin options. */
|
|
149
|
+
export function resolveOptions(fileOpts: Partial<WarmOptions>, pluginOpts?: Partial<WarmOptions> | null): WarmOptions {
|
|
150
|
+
return { ...DEFAULTS, ...fileOpts, ...(pluginOpts ?? {}) }
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/** Keys in a raw options object that the plugin does not know. Surfaced as
|
|
154
|
+
* warnings at startup — a typo'd key would otherwise be silently ignored. */
|
|
155
|
+
export function unknownOptionKeys(raw: Record<string, unknown>): string[] {
|
|
156
|
+
return Object.keys(raw).filter((k) => !(k in DEFAULTS))
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const NUMERIC_KEYS = [
|
|
160
|
+
"ttlSeconds",
|
|
161
|
+
"parallel",
|
|
162
|
+
"contextLength",
|
|
163
|
+
"verifyCacheMs",
|
|
164
|
+
"retryCooldownMs",
|
|
165
|
+
"loadTimeoutMs",
|
|
166
|
+
"serverStartTimeoutMs",
|
|
167
|
+
"lockWaitTimeoutMs",
|
|
168
|
+
] as const
|
|
169
|
+
const BOOLEAN_KEYS = ["reconcileDuplicates", "launchAppFallback", "eager"] as const
|
|
170
|
+
const STRING_KEYS = ["lmsPath", "baseURL", "logFile", "lockDir"] as const
|
|
171
|
+
|
|
172
|
+
/** Repair invalid option VALUES back to their defaults, collecting one
|
|
173
|
+
* warning per repair. Notably an unrecognized failMode falls back to
|
|
174
|
+
* "hybrid" (the default): the exact-match checks downstream would otherwise
|
|
175
|
+
* make a typo silently behave like "open", the least safe mode. */
|
|
176
|
+
export function sanitizeOptions(o: WarmOptions): { opts: WarmOptions; warnings: string[] } {
|
|
177
|
+
const warnings: string[] = []
|
|
178
|
+
const out: WarmOptions = { ...o }
|
|
179
|
+
const fix = (key: keyof WarmOptions, why: string) => {
|
|
180
|
+
warnings.push(`${key} ${why} — using default ${JSON.stringify(DEFAULTS[key])}`)
|
|
181
|
+
;(out as Record<string, unknown>)[key] = DEFAULTS[key]
|
|
182
|
+
}
|
|
183
|
+
if (!["open", "closed", "hybrid"].includes(out.failMode)) fix("failMode", `"${out.failMode}" is not open|closed|hybrid`)
|
|
184
|
+
if (!Array.isArray(out.providers) || out.providers.length === 0 || out.providers.some((p) => typeof p !== "string"))
|
|
185
|
+
fix("providers", "must be a non-empty string array")
|
|
186
|
+
for (const k of NUMERIC_KEYS) if (typeof out[k] !== "number" || !Number.isFinite(out[k]) || out[k] < 0) fix(k, "must be a non-negative number")
|
|
187
|
+
for (const k of BOOLEAN_KEYS) if (typeof out[k] !== "boolean") fix(k, "must be a boolean")
|
|
188
|
+
for (const k of STRING_KEYS) if (typeof out[k] !== "string" || out[k] === "") fix(k, "must be a non-empty string")
|
|
189
|
+
if (out.perModel === null || typeof out.perModel !== "object" || Array.isArray(out.perModel)) fix("perModel", "must be an object")
|
|
190
|
+
return { opts: out, warnings }
|
|
159
191
|
}
|
|
160
192
|
|
|
161
193
|
/** opencode addresses models by the UNSUFFIXED key; LM Studio routes the API
|
|
@@ -168,6 +200,26 @@ export function addressable(instances: LmsInstance[], key: string): boolean {
|
|
|
168
200
|
return instances.some((i) => i.identifier === key)
|
|
169
201
|
}
|
|
170
202
|
|
|
203
|
+
/** Classify `lms ps` output for a key. "unknown" (ps output unavailable) is a
|
|
204
|
+
* first-class state on purpose: it is AMBIGUOUS, never "absent" — loading
|
|
205
|
+
* blind onto a possibly-resident key is how duplicate instances are made,
|
|
206
|
+
* and a failed post-load probe must not be reported as a confirmed load
|
|
207
|
+
* failure (that would negative-cache a model that may well be loaded). */
|
|
208
|
+
export type PsCheck =
|
|
209
|
+
| { state: "unknown" }
|
|
210
|
+
| { state: "addressable" }
|
|
211
|
+
| { state: "absent" }
|
|
212
|
+
| { state: "duplicates"; dups: LmsInstance[]; busy: boolean }
|
|
213
|
+
|
|
214
|
+
export function classifyPs(instances: LmsInstance[] | null, key: string): PsCheck {
|
|
215
|
+
if (instances === null) return { state: "unknown" }
|
|
216
|
+
if (addressable(instances, key)) return { state: "addressable" }
|
|
217
|
+
const dups = instances.filter((i) => i.modelKey === key)
|
|
218
|
+
if (dups.length === 0) return { state: "absent" }
|
|
219
|
+
const busy = dups.some((i) => i.status === "generating" || (i.queued ?? 0) > 0)
|
|
220
|
+
return { state: "duplicates", dups, busy }
|
|
221
|
+
}
|
|
222
|
+
|
|
171
223
|
/** Split an opencode model ref ("provider/key…") on the FIRST slash, so a key
|
|
172
224
|
* that itself contains slashes (e.g. "qwen/qwen3") is preserved intact. */
|
|
173
225
|
export function parseModelRef(ref: unknown): { providerID: string; key: string } | null {
|
|
@@ -202,11 +254,14 @@ export function pidAlive(pid: number): boolean {
|
|
|
202
254
|
}
|
|
203
255
|
}
|
|
204
256
|
|
|
205
|
-
/** Parse a lock pid-file's contents to a pid, or null if absent/blank/garbage
|
|
257
|
+
/** Parse a lock pid-file's contents to a pid, or null if absent/blank/garbage
|
|
258
|
+
* or non-positive. Non-positive values are rejected because `kill(-1, 0)`
|
|
259
|
+
* probes ALL processes (always "alive") — a corrupted pid file must not make
|
|
260
|
+
* the lock unbreakable until the staleness backstop. */
|
|
206
261
|
export function parseLockPid(content: string | null): number | null {
|
|
207
262
|
if (content == null) return null
|
|
208
263
|
const n = Number.parseInt(content.trim(), 10)
|
|
209
|
-
return Number.isFinite(n) ? n : null
|
|
264
|
+
return Number.isFinite(n) && n > 0 ? n : null
|
|
210
265
|
}
|
|
211
266
|
|
|
212
267
|
/** Given a warm outcome and the configured failMode, should opencode's request
|
|
@@ -218,16 +273,23 @@ export function shouldFailRequest(failMode: WarmOptions["failMode"], result: War
|
|
|
218
273
|
}
|
|
219
274
|
|
|
220
275
|
export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
221
|
-
const
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
)
|
|
276
|
+
const fileOpts = loadFileOptions()
|
|
277
|
+
const plugOpts = (pluginOptions ?? {}) as Partial<WarmOptions>
|
|
278
|
+
const { opts, warnings: configWarnings } = sanitizeOptions(resolveOptions(fileOpts, plugOpts))
|
|
279
|
+
for (const k of unknownOptionKeys(fileOpts as Record<string, unknown>))
|
|
280
|
+
configWarnings.push(`unknown option "${k}" in lmstudio-warm.json`)
|
|
281
|
+
for (const k of unknownOptionKeys(plugOpts as Record<string, unknown>))
|
|
282
|
+
configWarnings.push(`unknown option "${k}" in plugin options`)
|
|
225
283
|
|
|
226
284
|
// ---- state (per opencode process) ----
|
|
227
|
-
|
|
285
|
+
// Warm caches are keyed by `${baseURL}::${modelKey}` — residency and failure
|
|
286
|
+
// are facts about one server+model pair, not about a model key in the
|
|
287
|
+
// abstract (two gated providers may serve the same key).
|
|
288
|
+
const verifiedAt = new Map<string, number>() // last confirmed-addressable timestamp
|
|
228
289
|
const failedAt = new Map<string, { at: number; reason: string }>() // negative cache
|
|
229
290
|
const inflight = new Map<string, Promise<WarmResult>>()
|
|
230
|
-
|
|
291
|
+
const serverVerifiedAt = new Map<string, number>() // baseURL -> last confirmed-listening
|
|
292
|
+
const serverFailedAt = new Map<string, number>() // baseURL -> last failed bring-up
|
|
231
293
|
// True only while THIS process holds the mkdir lock. Used by the exit handler
|
|
232
294
|
// to release a lock that a fire-and-forget eager warm may still be holding
|
|
233
295
|
// when the process tears down (otherwise the async finally never runs).
|
|
@@ -237,30 +299,56 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
237
299
|
fs.mkdirSync(path.dirname(opts.logFile), { recursive: true })
|
|
238
300
|
} catch {}
|
|
239
301
|
|
|
302
|
+
// Rotate the log once it outgrows ~5 MB (one previous generation kept at
|
|
303
|
+
// .old) so long-lived fleet hosts cannot grow it unbounded.
|
|
304
|
+
try {
|
|
305
|
+
if (fs.statSync(opts.logFile).size > 5 * 1024 * 1024) fs.renameSync(opts.logFile, `${opts.logFile}.old`)
|
|
306
|
+
} catch {}
|
|
307
|
+
|
|
240
308
|
function log(msg: string) {
|
|
241
309
|
try {
|
|
242
310
|
fs.appendFileSync(opts.logFile, `${new Date().toISOString()} [pid ${process.pid}] ${msg}\n`)
|
|
243
311
|
} catch {}
|
|
244
312
|
}
|
|
245
313
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
314
|
+
const loggedOnce = new Set<string>()
|
|
315
|
+
function logOnce(msg: string) {
|
|
316
|
+
if (loggedOnce.has(msg)) return
|
|
317
|
+
loggedOnce.add(msg)
|
|
318
|
+
log(msg)
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
for (const w of configWarnings) log(`config warning: ${w}`)
|
|
322
|
+
|
|
323
|
+
// The ONLY lock-release path (also used by the exit handler below). Removes
|
|
324
|
+
// the lock dir only if the pid file still names this process, or is
|
|
325
|
+
// absent/blank (we mkdir'd but hadn't written it yet). Another process may
|
|
326
|
+
// have legitimately broken our lock (stale/dead-holder rules in acquireLock)
|
|
327
|
+
// and re-acquired it — deleting THEIR lock would reopen the duplicate-load
|
|
328
|
+
// race the lock exists to prevent. Synchronous on purpose: rmSync + flag
|
|
329
|
+
// clear run with no await between them, so a second in-process waiter cannot
|
|
330
|
+
// observe a removed dir with holdingLock still true, and it works inside the
|
|
331
|
+
// sync-only "exit" handler. Never throws.
|
|
332
|
+
function releaseLockIfOurs() {
|
|
254
333
|
try {
|
|
255
334
|
let ours = true
|
|
256
335
|
try {
|
|
257
336
|
const pidStr = fs.readFileSync(path.join(opts.lockDir, "pid"), "utf8").trim()
|
|
258
|
-
ours = pidStr === "" || pidStr === String(process.pid)
|
|
337
|
+
ours = pidStr === "" || pidStr === String(process.pid)
|
|
259
338
|
} catch {
|
|
260
339
|
ours = true
|
|
261
340
|
}
|
|
262
341
|
if (ours) fs.rmSync(opts.lockDir, { recursive: true, force: true })
|
|
263
342
|
} catch {}
|
|
343
|
+
holdingLock = false
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Last-resort release. A one-shot `opencode run` can exit while a background
|
|
347
|
+
// eager warm still holds the lock; process.on("exit") runs sync only.
|
|
348
|
+
// SIGKILL is uncatchable — the dead-holder liveness check in acquireLock is
|
|
349
|
+
// the backstop for that.
|
|
350
|
+
process.once("exit", () => {
|
|
351
|
+
if (holdingLock) releaseLockIfOurs()
|
|
264
352
|
})
|
|
265
353
|
|
|
266
354
|
function run(
|
|
@@ -318,26 +406,36 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
318
406
|
return false
|
|
319
407
|
}
|
|
320
408
|
|
|
321
|
-
|
|
409
|
+
const serverInflight = new Map<string, Promise<boolean>>()
|
|
322
410
|
function ensureServer(baseURL: string): Promise<boolean> {
|
|
323
|
-
if (Date.now() - serverVerifiedAt < opts.verifyCacheMs) return Promise.resolve(true)
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
411
|
+
if (Date.now() - (serverVerifiedAt.get(baseURL) ?? 0) < opts.verifyCacheMs) return Promise.resolve(true)
|
|
412
|
+
// Server-level negative cache: a failed bring-up (start + poll + optional
|
|
413
|
+
// app launch) can take minutes — fail fast for retryCooldownMs instead of
|
|
414
|
+
// re-paying that stall on every request while the server stays down.
|
|
415
|
+
if (Date.now() - (serverFailedAt.get(baseURL) ?? 0) < opts.retryCooldownMs) return Promise.resolve(false)
|
|
416
|
+
const existing = serverInflight.get(baseURL)
|
|
417
|
+
if (existing) return existing
|
|
418
|
+
const p = ensureServerImpl(baseURL)
|
|
419
|
+
.then((up) => {
|
|
420
|
+
if (up) serverFailedAt.delete(baseURL)
|
|
421
|
+
else serverFailedAt.set(baseURL, Date.now())
|
|
422
|
+
return up
|
|
423
|
+
})
|
|
424
|
+
.finally(() => serverInflight.delete(baseURL))
|
|
425
|
+
serverInflight.set(baseURL, p)
|
|
426
|
+
return p
|
|
329
427
|
}
|
|
330
428
|
|
|
331
429
|
async function ensureServerImpl(baseURL: string): Promise<boolean> {
|
|
332
430
|
if (await httpAlive(baseURL)) {
|
|
333
|
-
serverVerifiedAt
|
|
431
|
+
serverVerifiedAt.set(baseURL, Date.now())
|
|
334
432
|
return true
|
|
335
433
|
}
|
|
336
434
|
log(`HTTP server not reachable at ${baseURL} — running lms server start`)
|
|
337
435
|
const started = await lms(["server", "start"], 30_000)
|
|
338
436
|
if (!started.ok) log(`lms server start failed: ${started.stderr.trim().slice(0, 300)}`)
|
|
339
437
|
if (await pollAlive(baseURL, opts.serverStartTimeoutMs)) {
|
|
340
|
-
serverVerifiedAt
|
|
438
|
+
serverVerifiedAt.set(baseURL, Date.now())
|
|
341
439
|
log(`HTTP server is up at ${baseURL}`)
|
|
342
440
|
return true
|
|
343
441
|
}
|
|
@@ -348,7 +446,7 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
348
446
|
await new Promise((r) => setTimeout(r, 3_000))
|
|
349
447
|
await lms(["server", "start"], 30_000)
|
|
350
448
|
if (await pollAlive(baseURL, opts.serverStartTimeoutMs)) {
|
|
351
|
-
serverVerifiedAt
|
|
449
|
+
serverVerifiedAt.set(baseURL, Date.now())
|
|
352
450
|
log(`HTTP server is up at ${baseURL} (after app launch)`)
|
|
353
451
|
return true
|
|
354
452
|
}
|
|
@@ -367,15 +465,17 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
367
465
|
|
|
368
466
|
// Cross-process mutex via atomic mkdir: parallel opencode workers must not
|
|
369
467
|
// race lms load (it is not idempotent). A lock may be broken when (1) it is
|
|
370
|
-
// older than staleMs
|
|
371
|
-
//
|
|
372
|
-
//
|
|
373
|
-
//
|
|
374
|
-
//
|
|
375
|
-
//
|
|
468
|
+
// older than staleMs — holders refresh the dir mtime before each long phase
|
|
469
|
+
// (touchLock in doWarm), so age measures the CURRENT phase and no live phase
|
|
470
|
+
// can outlast the load timeout, the longest hard cap; (2) its recorded
|
|
471
|
+
// holder pid is dead (crash/abrupt exit before the finally released it — the
|
|
472
|
+
// observed eager-warm leak); or (3) the pid file is missing AND the dir has
|
|
473
|
+
// outlived a short grace (a holder that crashed between mkdir and
|
|
474
|
+
// writeFile). A fresh, pid-less lock is left alone: that is a live holder
|
|
475
|
+
// still mid-acquisition.
|
|
376
476
|
async function acquireLock(): Promise<(() => void) | null> {
|
|
377
477
|
const deadline = Date.now() + opts.lockWaitTimeoutMs
|
|
378
|
-
const staleMs = opts.loadTimeoutMs +
|
|
478
|
+
const staleMs = opts.loadTimeoutMs + 120_000
|
|
379
479
|
const pidGraceMs = 5_000
|
|
380
480
|
for (;;) {
|
|
381
481
|
try {
|
|
@@ -384,16 +484,7 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
384
484
|
try {
|
|
385
485
|
await fsp.writeFile(path.join(opts.lockDir, "pid"), String(process.pid))
|
|
386
486
|
} catch {}
|
|
387
|
-
|
|
388
|
-
// so a second in-process waiter (parallel eager warm) cannot observe a
|
|
389
|
-
// removed dir with holdingLock still true, and the dir is gone even if
|
|
390
|
-
// the process is mid-teardown when release fires.
|
|
391
|
-
return () => {
|
|
392
|
-
try {
|
|
393
|
-
fs.rmSync(opts.lockDir, { recursive: true, force: true })
|
|
394
|
-
} catch {}
|
|
395
|
-
holdingLock = false
|
|
396
|
-
}
|
|
487
|
+
return releaseLockIfOurs
|
|
397
488
|
} catch (err: any) {
|
|
398
489
|
if (err?.code !== "EEXIST") throw err
|
|
399
490
|
try {
|
|
@@ -416,15 +507,35 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
416
507
|
}
|
|
417
508
|
}
|
|
418
509
|
|
|
510
|
+
// The lms CLI manages only the LOCAL LM Studio: with a non-loopback baseURL
|
|
511
|
+
// the gate would load models on this machine while requests go elsewhere.
|
|
512
|
+
// Warn once per URL instead of failing — a LAN hostname can still be an
|
|
513
|
+
// alias for this host, and generation may work regardless.
|
|
514
|
+
function warnIfNonLoopback(baseURL: string) {
|
|
515
|
+
try {
|
|
516
|
+
const host = new URL(baseURL).hostname
|
|
517
|
+
if (host === "127.0.0.1" || host === "localhost" || host === "[::1]" || host === "::1") return
|
|
518
|
+
logOnce(
|
|
519
|
+
`WARNING: baseURL ${baseURL} is not loopback — lms manages only the LOCAL LM Studio, so the warm gate cannot ensure models on a remote server`,
|
|
520
|
+
)
|
|
521
|
+
} catch {}
|
|
522
|
+
}
|
|
523
|
+
|
|
419
524
|
async function doWarm(key: string, baseURL: string): Promise<WarmResult> {
|
|
525
|
+
const cacheKey = `${baseURL}::${key}`
|
|
526
|
+
warnIfNonLoopback(baseURL)
|
|
420
527
|
if (!(await ensureServer(baseURL))) {
|
|
421
528
|
return { ok: false, confirmed: true, reason: `LM Studio HTTP server is not reachable at ${baseURL}` }
|
|
422
529
|
}
|
|
423
530
|
|
|
424
|
-
// Fast path: no lock needed if already addressable.
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
531
|
+
// Fast path: no lock needed if already addressable. An "unknown" ps state
|
|
532
|
+
// is ambiguous — never proceed toward a load on it (see classifyPs).
|
|
533
|
+
let check = classifyPs(await psInstances(), key)
|
|
534
|
+
if (check.state === "unknown") {
|
|
535
|
+
return { ok: false, confirmed: false, reason: "lms ps failed — model state unknown" }
|
|
536
|
+
}
|
|
537
|
+
if (check.state === "addressable") {
|
|
538
|
+
verifiedAt.set(cacheKey, Date.now())
|
|
428
539
|
return OK
|
|
429
540
|
}
|
|
430
541
|
|
|
@@ -435,11 +546,17 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
435
546
|
log(`lock contention timeout waiting to warm ${key} — proceeding (ambiguous)`)
|
|
436
547
|
return { ok: false, confirmed: false, reason: "lock contention timeout" }
|
|
437
548
|
}
|
|
549
|
+
// Refresh the lock dir mtime before each long phase so acquireLock's
|
|
550
|
+
// age-based stale check measures the current phase, never the total hold.
|
|
551
|
+
const touchLock = () => fsp.utimes(opts.lockDir, new Date(), new Date()).catch(() => {})
|
|
438
552
|
try {
|
|
439
553
|
// Double-checked: another process may have loaded it while we waited.
|
|
440
|
-
|
|
441
|
-
if (
|
|
442
|
-
|
|
554
|
+
check = classifyPs(await psInstances(), key)
|
|
555
|
+
if (check.state === "unknown") {
|
|
556
|
+
return { ok: false, confirmed: false, reason: "lms ps failed — model state unknown" }
|
|
557
|
+
}
|
|
558
|
+
if (check.state === "addressable") {
|
|
559
|
+
verifiedAt.set(cacheKey, Date.now())
|
|
443
560
|
return OK
|
|
444
561
|
}
|
|
445
562
|
|
|
@@ -447,16 +564,15 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
447
564
|
// is addressable by the bare key (e.g. the original was unloaded and a
|
|
448
565
|
// stray duplicate survived). Loading again would only stack key:3 —
|
|
449
566
|
// reconcile by unloading idle duplicates first, then load fresh.
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
const ids = dups.map((i) => i.identifier).join(", ")
|
|
455
|
-
log(`WARNING: only non-addressable instances of ${key} exist (${ids}); busy=${busy} — cannot warm`)
|
|
567
|
+
if (check.state === "duplicates") {
|
|
568
|
+
if (!opts.reconcileDuplicates || check.busy) {
|
|
569
|
+
const ids = check.dups.map((i) => i.identifier).join(", ")
|
|
570
|
+
log(`WARNING: only non-addressable instances of ${key} exist (${ids}); busy=${check.busy} — cannot warm`)
|
|
456
571
|
return { ok: false, confirmed: true, reason: `only suffixed duplicates of ${key} are resident (${ids})` }
|
|
457
572
|
}
|
|
458
|
-
for (const d of dups) {
|
|
573
|
+
for (const d of check.dups) {
|
|
459
574
|
if (!d.identifier) continue
|
|
575
|
+
await touchLock()
|
|
460
576
|
log(`reconciling: unloading duplicate instance ${d.identifier}`)
|
|
461
577
|
const un = await lms(["unload", d.identifier], 60_000)
|
|
462
578
|
if (!un.ok) log(`unload ${d.identifier} failed: ${un.stderr.trim().slice(0, 200)}`)
|
|
@@ -465,6 +581,7 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
465
581
|
|
|
466
582
|
const args = loadArgs(opts, key)
|
|
467
583
|
log(`loading ${key} (${args.join(" ")}) ...`)
|
|
584
|
+
await touchLock()
|
|
468
585
|
const t0 = Date.now()
|
|
469
586
|
const res = await lms(args, opts.loadTimeoutMs)
|
|
470
587
|
if (!res.ok) {
|
|
@@ -474,12 +591,19 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
474
591
|
return { ok: false, confirmed: true, reason: `lms load failed (${kind}): ${detail.slice(0, 200)}` }
|
|
475
592
|
}
|
|
476
593
|
|
|
477
|
-
|
|
478
|
-
if (
|
|
479
|
-
verifiedAt.set(
|
|
594
|
+
const after = classifyPs(await psInstances(), key)
|
|
595
|
+
if (after.state === "addressable") {
|
|
596
|
+
verifiedAt.set(cacheKey, Date.now())
|
|
480
597
|
log(`loaded ${key} in ${Math.round((Date.now() - t0) / 1000)}s`)
|
|
481
598
|
return OK
|
|
482
599
|
}
|
|
600
|
+
if (after.state === "unknown") {
|
|
601
|
+
// The load exited 0; only the verification probe failed. Ambiguous —
|
|
602
|
+
// negative-caching this as confirmed would fail requests for up to
|
|
603
|
+
// retryCooldownMs against a model that is very likely loaded.
|
|
604
|
+
log(`lms load ${key} exited 0 but lms ps failed — cannot verify addressability`)
|
|
605
|
+
return { ok: false, confirmed: false, reason: "loaded but unverified (lms ps failed)" }
|
|
606
|
+
}
|
|
483
607
|
log(`lms load ${key} exited 0 but ps does not show identifier === key`)
|
|
484
608
|
return { ok: false, confirmed: true, reason: `loaded but not addressable as "${key}"` }
|
|
485
609
|
} finally {
|
|
@@ -488,12 +612,13 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
488
612
|
}
|
|
489
613
|
|
|
490
614
|
function warm(key: string, baseURL: string): Promise<WarmResult> {
|
|
491
|
-
|
|
492
|
-
|
|
615
|
+
const cacheKey = `${baseURL}::${key}`
|
|
616
|
+
if (Date.now() - (verifiedAt.get(cacheKey) ?? 0) < opts.verifyCacheMs) return Promise.resolve(OK)
|
|
617
|
+
const failed = failedAt.get(cacheKey)
|
|
493
618
|
if (failed && Date.now() - failed.at < opts.retryCooldownMs) {
|
|
494
619
|
return Promise.resolve({ ok: false, confirmed: true, reason: `${failed.reason} (cooldown)` })
|
|
495
620
|
}
|
|
496
|
-
const existing = inflight.get(
|
|
621
|
+
const existing = inflight.get(cacheKey)
|
|
497
622
|
if (existing) return existing
|
|
498
623
|
const p = doWarm(key, baseURL)
|
|
499
624
|
.catch((err): WarmResult => {
|
|
@@ -501,12 +626,12 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
501
626
|
return { ok: false, confirmed: false, reason: "internal error (see log)" }
|
|
502
627
|
})
|
|
503
628
|
.then((r) => {
|
|
504
|
-
if (r.ok) failedAt.delete(
|
|
505
|
-
else if (r.confirmed) failedAt.set(
|
|
629
|
+
if (r.ok) failedAt.delete(cacheKey)
|
|
630
|
+
else if (r.confirmed) failedAt.set(cacheKey, { at: Date.now(), reason: r.reason })
|
|
506
631
|
return r
|
|
507
632
|
})
|
|
508
|
-
.finally(() => inflight.delete(
|
|
509
|
-
inflight.set(
|
|
633
|
+
.finally(() => inflight.delete(cacheKey))
|
|
634
|
+
inflight.set(cacheKey, p)
|
|
510
635
|
return p
|
|
511
636
|
}
|
|
512
637
|
|
|
@@ -539,12 +664,23 @@ export const LMStudioWarm: Plugin = async (_input, pluginOptions) => {
|
|
|
539
664
|
let result: WarmResult = OK
|
|
540
665
|
let key: string | undefined
|
|
541
666
|
try {
|
|
667
|
+
// Contract-drift canaries: this plugin depends on undocumented input
|
|
668
|
+
// shapes verified against opencode v1.17.10. If an upgrade changes
|
|
669
|
+
// them the gate silently no-ops — these one-time log lines are the
|
|
670
|
+
// only signal that would remain.
|
|
542
671
|
const providerID: string | undefined = input?.provider?.info?.id ?? input?.model?.providerID
|
|
543
|
-
if (!providerID
|
|
672
|
+
if (!providerID) {
|
|
673
|
+
logOnce("chat.params input carries no provider id — opencode hook shape may have changed; gate skipped")
|
|
674
|
+
return
|
|
675
|
+
}
|
|
676
|
+
if (!opts.providers.includes(providerID)) return
|
|
544
677
|
// model.api.id is the exact string opencode sends as the API `model`
|
|
545
678
|
// field (== LM Studio model key for config-defined models).
|
|
546
679
|
key = input?.model?.api?.id ?? input?.model?.id
|
|
547
|
-
if (!key)
|
|
680
|
+
if (!key) {
|
|
681
|
+
logOnce(`chat.params for gated provider "${providerID}" carries no model key — opencode hook shape may have changed; gate skipped`)
|
|
682
|
+
return
|
|
683
|
+
}
|
|
548
684
|
const configured = input?.provider?.options?.baseURL
|
|
549
685
|
const baseURL = typeof configured === "string" && configured.startsWith("http") ? configured : opts.baseURL
|
|
550
686
|
result = await warm(key, baseURL)
|