little-coder 1.7.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/extra-tools/glob.test.ts +89 -0
- package/.pi/extensions/extra-tools/glob.ts +102 -0
- package/.pi/extensions/extra-tools/index.ts +8 -11
- package/.pi/extensions/llama-cpp-provider/config.test.ts +72 -1
- package/.pi/extensions/llama-cpp-provider/config.ts +51 -0
- package/.pi/extensions/llama-cpp-provider/index.ts +23 -3
- package/CHANGELOG.md +25 -0
- package/package.json +1 -1
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
2
|
+
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { globFiles, renderGlobOutcome, DEFAULT_HEAVY_DIRS } from "./glob.ts";
|
|
6
|
+
|
|
7
|
+
let dir: string;
|
|
8
|
+
|
|
9
|
+
beforeAll(() => {
|
|
10
|
+
dir = mkdtempSync(join(tmpdir(), "glob-test-"));
|
|
11
|
+
// real source we want to find
|
|
12
|
+
mkdirSync(join(dir, "src", "sub"), { recursive: true });
|
|
13
|
+
writeFileSync(join(dir, "src", "a.py"), "");
|
|
14
|
+
writeFileSync(join(dir, "src", "sub", "b.py"), "");
|
|
15
|
+
writeFileSync(join(dir, "README.md"), "");
|
|
16
|
+
// heavy dirs that must be pruned (with files matching the pattern inside)
|
|
17
|
+
mkdirSync(join(dir, "node_modules", "pkg", "deep"), { recursive: true });
|
|
18
|
+
writeFileSync(join(dir, "node_modules", "pkg", "deep", "x.py"), "");
|
|
19
|
+
mkdirSync(join(dir, ".git", "objects"), { recursive: true });
|
|
20
|
+
writeFileSync(join(dir, ".git", "objects", "y.py"), "");
|
|
21
|
+
mkdirSync(join(dir, "dist"), { recursive: true });
|
|
22
|
+
writeFileSync(join(dir, "dist", "z.py"), "");
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
afterAll(() => rmSync(dir, { recursive: true, force: true }));
|
|
26
|
+
|
|
27
|
+
describe("globFiles", () => {
|
|
28
|
+
it("matches real files and prunes heavy dirs (node_modules/.git/dist)", async () => {
|
|
29
|
+
const { matches, scanTruncated, matchTruncated } = await globFiles("**/*.py", { base: dir });
|
|
30
|
+
const rel = matches.map((m) => m.slice(dir.length + 1)).sort();
|
|
31
|
+
expect(rel).toEqual(["src/a.py", "src/sub/b.py"]);
|
|
32
|
+
expect(matches.some((m) => m.includes("node_modules"))).toBe(false);
|
|
33
|
+
expect(matches.some((m) => m.includes(".git"))).toBe(false);
|
|
34
|
+
expect(matches.some((m) => m.includes("/dist/"))).toBe(false);
|
|
35
|
+
expect(scanTruncated).toBe(false);
|
|
36
|
+
expect(matchTruncated).toBe(false);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("caps matches at maxMatches and flags matchTruncated", async () => {
|
|
40
|
+
const many = mkdtempSync(join(tmpdir(), "glob-many-"));
|
|
41
|
+
for (let i = 0; i < 50; i++) writeFileSync(join(many, `f${i}.txt`), "");
|
|
42
|
+
try {
|
|
43
|
+
const { matches, matchTruncated } = await globFiles("*.txt", { base: many, maxMatches: 10 });
|
|
44
|
+
expect(matches.length).toBe(10);
|
|
45
|
+
expect(matchTruncated).toBe(true);
|
|
46
|
+
} finally {
|
|
47
|
+
rmSync(many, { recursive: true, force: true });
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("stops the walk at maxScan and flags scanTruncated (memory bound)", async () => {
|
|
52
|
+
// A low budget must halt the walk regardless of how many entries exist.
|
|
53
|
+
const { scanned, scanTruncated } = await globFiles("**/*", { base: dir, maxScan: 3 });
|
|
54
|
+
expect(scanTruncated).toBe(true);
|
|
55
|
+
expect(scanned).toBeLessThanOrEqual(5); // a couple over the budget, not unbounded
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("the heavy-dir set covers the usual offenders", () => {
|
|
59
|
+
for (const d of ["node_modules", ".git", "dist", ".cache", "Library", "venv", "target"]) {
|
|
60
|
+
expect(DEFAULT_HEAVY_DIRS.has(d)).toBe(true);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
describe("renderGlobOutcome", () => {
|
|
66
|
+
it("reports no matches plainly", () => {
|
|
67
|
+
expect(renderGlobOutcome({ matches: [], scanned: 5, scanTruncated: false, matchTruncated: false }))
|
|
68
|
+
.toBe("No files matched");
|
|
69
|
+
});
|
|
70
|
+
it("notes scan truncation when nothing matched", () => {
|
|
71
|
+
expect(renderGlobOutcome({ matches: [], scanned: 9, scanTruncated: true, matchTruncated: false }, 200000))
|
|
72
|
+
.toMatch(/stopped after scanning 200000 entries/);
|
|
73
|
+
});
|
|
74
|
+
it("appends a match-cap note", () => {
|
|
75
|
+
const text = renderGlobOutcome(
|
|
76
|
+
{ matches: ["/a", "/b"], scanned: 2, scanTruncated: false, matchTruncated: true },
|
|
77
|
+
200000,
|
|
78
|
+
500,
|
|
79
|
+
);
|
|
80
|
+
expect(text).toMatch(/stopped at 500 matches/);
|
|
81
|
+
});
|
|
82
|
+
it("appends a scan-cap note when there were partial matches", () => {
|
|
83
|
+
const text = renderGlobOutcome(
|
|
84
|
+
{ matches: ["/a"], scanned: 9, scanTruncated: true, matchTruncated: false },
|
|
85
|
+
200000,
|
|
86
|
+
);
|
|
87
|
+
expect(text).toMatch(/results may be incomplete/);
|
|
88
|
+
});
|
|
89
|
+
});
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { glob as fsGlob } from "node:fs/promises";
|
|
2
|
+
|
|
3
|
+
// Bounded file globbing. The naive `for await (…glob…) { if (len>=500) break }`
|
|
4
|
+
// only caps MATCHES — it does nothing about the WALK. Run from a huge root
|
|
5
|
+
// (e.g. a home directory with macOS Library / caches / node_modules), fs.glob
|
|
6
|
+
// recursively descends everything, and its internal traversal state grows until
|
|
7
|
+
// the Node process OOMs (heap, not the model's context) — long before 500
|
|
8
|
+
// matches are found if matches are sparse. fs.glob exposes no signal/abort and
|
|
9
|
+
// no depth/scan cap, so we bound it through the one hook it does call for every
|
|
10
|
+
// entry: `exclude`. We use it to (a) prune heavy/irrelevant directories so they
|
|
11
|
+
// are never descended, and (b) meter total entries scanned — once the budget is
|
|
12
|
+
// hit, exclude everything, which winds the walk down.
|
|
13
|
+
|
|
14
|
+
/** Directories never worth descending for a file search — pruned at the dir
|
|
15
|
+
* level (returning true from `exclude` on a directory stops descent), which is
|
|
16
|
+
* what keeps a home-directory glob from exhausting memory. */
|
|
17
|
+
export const DEFAULT_HEAVY_DIRS: ReadonlySet<string> = new Set([
|
|
18
|
+
// version control
|
|
19
|
+
".git", ".hg", ".svn",
|
|
20
|
+
// dependencies / language caches
|
|
21
|
+
"node_modules", ".venv", "venv", "__pycache__", ".tox", ".mypy_cache",
|
|
22
|
+
".pytest_cache", ".gradle", ".cargo", "vendor", "Pods",
|
|
23
|
+
// build output
|
|
24
|
+
"dist", "build", "out", "target", ".next", ".nuxt", ".output", ".svelte-kit",
|
|
25
|
+
// tool caches
|
|
26
|
+
".cache", ".npm", ".pnpm-store", ".yarn", ".turbo",
|
|
27
|
+
// macOS / system heavies that blow up a home-dir walk
|
|
28
|
+
"Library", "Applications", ".Trash", "Photos Library.photoslibrary",
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
export interface GlobOptions {
|
|
32
|
+
base: string;
|
|
33
|
+
maxScan?: number;
|
|
34
|
+
maxMatches?: number;
|
|
35
|
+
heavyDirs?: ReadonlySet<string>;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface GlobOutcome {
|
|
39
|
+
matches: string[];
|
|
40
|
+
scanned: number;
|
|
41
|
+
/** the walk was cut short at maxScan entries (results may be incomplete) */
|
|
42
|
+
scanTruncated: boolean;
|
|
43
|
+
/** matches were capped at maxMatches */
|
|
44
|
+
matchTruncated: boolean;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export const DEFAULT_MAX_SCAN = 200_000;
|
|
48
|
+
export const DEFAULT_MAX_MATCHES = 500;
|
|
49
|
+
|
|
50
|
+
export async function globFiles(pattern: string, opts: GlobOptions): Promise<GlobOutcome> {
|
|
51
|
+
const maxScan = opts.maxScan ?? DEFAULT_MAX_SCAN;
|
|
52
|
+
const maxMatches = opts.maxMatches ?? DEFAULT_MAX_MATCHES;
|
|
53
|
+
const heavy = opts.heavyDirs ?? DEFAULT_HEAVY_DIRS;
|
|
54
|
+
|
|
55
|
+
const matches: string[] = [];
|
|
56
|
+
let scanned = 0;
|
|
57
|
+
let scanTruncated = false;
|
|
58
|
+
let matchTruncated = false;
|
|
59
|
+
|
|
60
|
+
// Called for every entry the walk visits (files AND directories). Pruning a
|
|
61
|
+
// directory here stops descent into it. Also our scan meter: once the budget
|
|
62
|
+
// is spent, exclude everything so fs.glob stops adding work and ends.
|
|
63
|
+
const exclude = (entry: unknown): boolean => {
|
|
64
|
+
scanned++;
|
|
65
|
+
if (scanned > maxScan) {
|
|
66
|
+
scanTruncated = true;
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
const name = typeof entry === "string" ? entry : String((entry as { name?: string })?.name ?? entry);
|
|
70
|
+
return name.split(/[\\/]/).some((seg) => heavy.has(seg));
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// `exclude` as a predicate isn't in every @types/node version's fs.glob
|
|
74
|
+
// signature, but it's supported at runtime (Node 22+); cast to pass it through.
|
|
75
|
+
for await (const m of fsGlob(pattern, { cwd: opts.base, exclude } as Parameters<typeof fsGlob>[1])) {
|
|
76
|
+
matches.push(`${opts.base}/${m}`);
|
|
77
|
+
if (matches.length >= maxMatches) {
|
|
78
|
+
matchTruncated = true;
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
matches.sort();
|
|
84
|
+
return { matches, scanned, scanTruncated, matchTruncated };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Render a globFiles outcome as the tool's text output, with a one-line note
|
|
88
|
+
* when results were cut short so the model knows to narrow its search. */
|
|
89
|
+
export function renderGlobOutcome(o: GlobOutcome, maxScan = DEFAULT_MAX_SCAN, maxMatches = DEFAULT_MAX_MATCHES): string {
|
|
90
|
+
if (o.matches.length === 0) {
|
|
91
|
+
return o.scanTruncated
|
|
92
|
+
? `No files matched (search stopped after scanning ${maxScan} entries — narrow the base path; build/dependency/cache dirs are skipped automatically).`
|
|
93
|
+
: "No files matched";
|
|
94
|
+
}
|
|
95
|
+
let text = o.matches.join("\n");
|
|
96
|
+
if (o.matchTruncated) {
|
|
97
|
+
text += `\n… (stopped at ${maxMatches} matches — narrow the pattern for the rest)`;
|
|
98
|
+
} else if (o.scanTruncated) {
|
|
99
|
+
text += `\n… (search stopped after scanning ${maxScan} entries — results may be incomplete; narrow the base path)`;
|
|
100
|
+
}
|
|
101
|
+
return text;
|
|
102
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
2
|
import { Type } from "@sinclair/typebox";
|
|
3
|
-
import {
|
|
3
|
+
import { globFiles, renderGlobOutcome } from "./glob.ts";
|
|
4
4
|
|
|
5
5
|
// Ports of tools.py::_glob, _webfetch, _websearch. Pi ships its own grep/find,
|
|
6
6
|
// so those are not re-registered here.
|
|
@@ -10,7 +10,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
10
10
|
name: "glob",
|
|
11
11
|
label: "Glob",
|
|
12
12
|
description:
|
|
13
|
-
"Find files matching a glob pattern. Returns a sorted list of matching paths (up to 500)."
|
|
13
|
+
"Find files matching a glob pattern. Returns a sorted list of matching paths (up to 500). " +
|
|
14
|
+
"Common dependency/build/cache dirs (node_modules, .git, dist, …) are skipped, and the walk " +
|
|
15
|
+
"is bounded — for a focused search, pass a `path` rather than globbing a whole home directory.",
|
|
14
16
|
parameters: Type.Object({
|
|
15
17
|
pattern: Type.String({ description: "Glob pattern e.g. **/*.py" }),
|
|
16
18
|
path: Type.Optional(Type.String({ description: "Base directory (default: cwd)" })),
|
|
@@ -18,16 +20,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
18
20
|
async execute(_id, { pattern, path }) {
|
|
19
21
|
try {
|
|
20
22
|
const base = path || process.cwd();
|
|
21
|
-
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
matches.push(`${base}/${m}`);
|
|
25
|
-
if (matches.length >= 500) break;
|
|
26
|
-
}
|
|
27
|
-
matches.sort();
|
|
28
|
-
const text = matches.length === 0 ? "No files matched" : matches.join("\n");
|
|
23
|
+
// Bounded walk: prunes heavy dirs and caps total entries scanned so a
|
|
24
|
+
// recursive glob from a huge root can't exhaust the process heap.
|
|
25
|
+
const outcome = await globFiles(pattern, { base });
|
|
29
26
|
return {
|
|
30
|
-
content: [{ type: "text", text }],
|
|
27
|
+
content: [{ type: "text", text: renderGlobOutcome(outcome) }],
|
|
31
28
|
details: {},
|
|
32
29
|
};
|
|
33
30
|
} catch (e) {
|
|
@@ -3,7 +3,16 @@ import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
|
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { dirname, join, resolve } from "node:path";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
applyEnvOverrides,
|
|
8
|
+
loadProviders,
|
|
9
|
+
mergeProviders,
|
|
10
|
+
resolveOverridePath,
|
|
11
|
+
propsUrlFor,
|
|
12
|
+
contextWindowFromProps,
|
|
13
|
+
probeContextWindow,
|
|
14
|
+
type ProviderEntry,
|
|
15
|
+
} from "./config.ts";
|
|
7
16
|
|
|
8
17
|
const sampleProvider = (baseUrl: string, modelId: string): ProviderEntry => ({
|
|
9
18
|
api: "openai-completions",
|
|
@@ -185,3 +194,65 @@ describe("shipped models.json", () => {
|
|
|
185
194
|
expect(Object.keys(result.providers).sort()).toEqual(["llamacpp", "lmstudio", "ollama"]);
|
|
186
195
|
});
|
|
187
196
|
});
|
|
197
|
+
|
|
198
|
+
describe("propsUrlFor", () => {
|
|
199
|
+
it("strips a trailing /v1 and points at the server root /props", () => {
|
|
200
|
+
expect(propsUrlFor("http://127.0.0.1:8888/v1")).toBe("http://127.0.0.1:8888/props");
|
|
201
|
+
expect(propsUrlFor("http://host:8888/v1/")).toBe("http://host:8888/props");
|
|
202
|
+
expect(propsUrlFor("http://host:8888")).toBe("http://host:8888/props");
|
|
203
|
+
expect(propsUrlFor("http://host:8888/")).toBe("http://host:8888/props");
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
describe("contextWindowFromProps", () => {
|
|
208
|
+
it("reads default_generation_settings.n_ctx (real llama.cpp shape)", () => {
|
|
209
|
+
expect(contextWindowFromProps({ default_generation_settings: { n_ctx: 131072 } })).toBe(131072);
|
|
210
|
+
});
|
|
211
|
+
it("falls back to a top-level n_ctx", () => {
|
|
212
|
+
expect(contextWindowFromProps({ n_ctx: 65536 })).toBe(65536);
|
|
213
|
+
});
|
|
214
|
+
it("returns undefined when absent or non-positive", () => {
|
|
215
|
+
expect(contextWindowFromProps({})).toBeUndefined();
|
|
216
|
+
expect(contextWindowFromProps({ default_generation_settings: { n_ctx: 0 } })).toBeUndefined();
|
|
217
|
+
expect(contextWindowFromProps({ default_generation_settings: { n_ctx: "lots" } })).toBeUndefined();
|
|
218
|
+
expect(contextWindowFromProps(null)).toBeUndefined();
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
describe("probeContextWindow", () => {
|
|
223
|
+
const okRes = (body: unknown) => ({ ok: true, json: async () => body }) as Response;
|
|
224
|
+
|
|
225
|
+
it("returns the server's n_ctx on success", async () => {
|
|
226
|
+
const fetchImpl = (async () =>
|
|
227
|
+
okRes({ default_generation_settings: { n_ctx: 131072 } })) as unknown as typeof fetch;
|
|
228
|
+
expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBe(131072);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("returns undefined on a non-OK response", async () => {
|
|
232
|
+
const fetchImpl = (async () => ({ ok: false }) as Response) as unknown as typeof fetch;
|
|
233
|
+
expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
it("returns undefined when fetch throws (server down / unreachable)", async () => {
|
|
237
|
+
const fetchImpl = (async () => {
|
|
238
|
+
throw new Error("ECONNREFUSED");
|
|
239
|
+
}) as unknown as typeof fetch;
|
|
240
|
+
expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
it("returns undefined when the response lacks n_ctx", async () => {
|
|
244
|
+
const fetchImpl = (async () => okRes({ total_slots: 1 })) as unknown as typeof fetch;
|
|
245
|
+
expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
it("honors an explicit props url override", async () => {
|
|
249
|
+
let seen = "";
|
|
250
|
+
const fetchImpl = (async (u: string) => {
|
|
251
|
+
seen = u;
|
|
252
|
+
return okRes({ default_generation_settings: { n_ctx: 40960 } });
|
|
253
|
+
}) as unknown as typeof fetch;
|
|
254
|
+
const got = await probeContextWindow("http://x:8888/v1", { fetchImpl, url: "http://other/props" });
|
|
255
|
+
expect(seen).toBe("http://other/props");
|
|
256
|
+
expect(got).toBe(40960);
|
|
257
|
+
});
|
|
258
|
+
});
|
|
@@ -146,3 +146,54 @@ export function loadProviders(pkgRoot: string, env: NodeJS.ProcessEnv = process.
|
|
|
146
146
|
const withEnv = applyEnvOverrides(merged, env);
|
|
147
147
|
return { providers: withEnv, sources };
|
|
148
148
|
}
|
|
149
|
+
|
|
150
|
+
// ── live context-window detection (llama.cpp /props) ────────────────────────
|
|
151
|
+
// little-coder budgets against the model's registered contextWindow. Rather than
|
|
152
|
+
// trust the static value in models.json, we ask a running llama.cpp server for
|
|
153
|
+
// its actual n_ctx at startup, so a `-c 131072` server shows 128k instead of the
|
|
154
|
+
// declared default. Best-effort: any failure falls back to the declared window.
|
|
155
|
+
|
|
156
|
+
/** Derive the llama.cpp `/props` URL from an OpenAI-style baseUrl. llama-server
|
|
157
|
+
* serves /props at the server ROOT, not under /v1 (which 404s), so strip a
|
|
158
|
+
* trailing /v1 (and any trailing slash) before appending /props. */
|
|
159
|
+
export function propsUrlFor(baseUrl: string): string {
|
|
160
|
+
const root = baseUrl.replace(/\/+$/, "").replace(/\/v1$/, "");
|
|
161
|
+
return `${root}/props`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/** Pull the context window (n_ctx) out of a llama.cpp /props response. It lives
|
|
165
|
+
* at default_generation_settings.n_ctx (the per-slot window — exactly what one
|
|
166
|
+
* conversation can use); some builds also expose a top-level n_ctx. Returns
|
|
167
|
+
* undefined when absent or not a positive number. */
|
|
168
|
+
export function contextWindowFromProps(json: unknown): number | undefined {
|
|
169
|
+
const j = json as { default_generation_settings?: { n_ctx?: unknown }; n_ctx?: unknown } | null;
|
|
170
|
+
const n = Number(j?.default_generation_settings?.n_ctx ?? j?.n_ctx);
|
|
171
|
+
return Number.isFinite(n) && n > 0 ? n : undefined;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export interface ProbeDeps {
|
|
175
|
+
fetchImpl?: typeof fetch;
|
|
176
|
+
timeoutMs?: number;
|
|
177
|
+
url?: string;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** Ask a llama.cpp server for its live context window via /props. Returns
|
|
181
|
+
* undefined on ANY failure (server down, no /props, non-JSON, timeout) so the
|
|
182
|
+
* caller falls back to the declared window — never throws, never blocks beyond
|
|
183
|
+
* timeoutMs. */
|
|
184
|
+
export async function probeContextWindow(baseUrl: string, deps: ProbeDeps = {}): Promise<number | undefined> {
|
|
185
|
+
const fetchImpl = deps.fetchImpl ?? fetch;
|
|
186
|
+
const url = deps.url ?? propsUrlFor(baseUrl);
|
|
187
|
+
const timeoutMs = deps.timeoutMs ?? 1500;
|
|
188
|
+
const ctrl = new AbortController();
|
|
189
|
+
const timer = setTimeout(() => ctrl.abort(), timeoutMs);
|
|
190
|
+
try {
|
|
191
|
+
const res = await fetchImpl(url, { signal: ctrl.signal });
|
|
192
|
+
if (!res.ok) return undefined;
|
|
193
|
+
return contextWindowFromProps(await res.json());
|
|
194
|
+
} catch {
|
|
195
|
+
return undefined;
|
|
196
|
+
} finally {
|
|
197
|
+
clearTimeout(timer);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { dirname, resolve } from "node:path";
|
|
2
2
|
import { fileURLToPath } from "node:url";
|
|
3
3
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
4
|
-
import { loadProviders } from "./config.ts";
|
|
4
|
+
import { loadProviders, probeContextWindow } from "./config.ts";
|
|
5
5
|
|
|
6
6
|
// Data-driven provider registration. Reads:
|
|
7
7
|
// 1. <pkgRoot>/models.json (shipped default)
|
|
@@ -16,7 +16,7 @@ import { loadProviders } from "./config.ts";
|
|
|
16
16
|
const here = dirname(fileURLToPath(import.meta.url));
|
|
17
17
|
const pkgRoot = resolve(here, "..", "..", "..");
|
|
18
18
|
|
|
19
|
-
export default function (pi: ExtensionAPI) {
|
|
19
|
+
export default async function (pi: ExtensionAPI) {
|
|
20
20
|
const result = loadProviders(pkgRoot);
|
|
21
21
|
|
|
22
22
|
for (const src of result.sources) {
|
|
@@ -33,12 +33,32 @@ export default function (pi: ExtensionAPI) {
|
|
|
33
33
|
return;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
+
// Opt-out for offline / CI / no-server launches that don't want a startup probe.
|
|
37
|
+
const probeDisabled = process.env.LITTLE_CODER_NO_CTX_PROBE === "1";
|
|
38
|
+
|
|
36
39
|
for (const [name, entry] of Object.entries(result.providers)) {
|
|
40
|
+
let models = entry.models;
|
|
41
|
+
|
|
42
|
+
// Auto-detect the server's live context window so the model registers with
|
|
43
|
+
// the real n_ctx (e.g. a `-c 131072` server) instead of models.json's
|
|
44
|
+
// declared default — the TUI readout, read-guard, and context budget all
|
|
45
|
+
// follow the registered window. llama.cpp-only (the /props endpoint); any
|
|
46
|
+
// failure silently keeps the declared window, so this never breaks startup.
|
|
47
|
+
if (!probeDisabled && name === "llamacpp" && entry.models.length > 0) {
|
|
48
|
+
const probed = await probeContextWindow(entry.baseUrl, {
|
|
49
|
+
url: process.env.LITTLE_CODER_LLAMACPP_PROPS_URL || undefined,
|
|
50
|
+
timeoutMs: Number(process.env.LITTLE_CODER_CTX_PROBE_TIMEOUT_MS) || undefined,
|
|
51
|
+
});
|
|
52
|
+
if (probed) {
|
|
53
|
+
models = entry.models.map((m) => ({ ...m, contextWindow: probed }));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
37
57
|
pi.registerProvider(name, {
|
|
38
58
|
baseUrl: entry.baseUrl,
|
|
39
59
|
apiKey: entry.apiKey,
|
|
40
60
|
api: entry.api,
|
|
41
|
-
models
|
|
61
|
+
models,
|
|
42
62
|
});
|
|
43
63
|
}
|
|
44
64
|
}
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
|
|
4
4
|
|
|
5
|
+
## [v1.8.1] — 2026-05-23
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- **`glob` no longer exhausts memory on a recursive search from a huge root.** The tool capped *matches* at 500 but never bounded the *walk*: run from a home directory (or any tree with macOS `Library`, caches, or `node_modules`), `fs.glob` recursively descended everything and its internal traversal state grew until the Node **process** ran out of heap — a host-memory crash (`Ineffective mark-compacts near heap limit`), entirely distinct from the model's *context window* (the read-guard / window machinery operates on tool *results* in tokens; this died mid-walk, before any result existed). The walk is now bounded two ways: heavy/irrelevant directories (`node_modules`, `.git`, `dist`, `.cache`, `Library`, `venv`, `target`, …) are **pruned** — never descended — and a hard scan budget (200 000 entries) halts the walk through the one hook `fs.glob` calls per entry (`exclude`), since it exposes no signal/abort. When results are cut short the output says so, so the model narrows its search. New unit-tested `globFiles` / `renderGlobOutcome` helpers (`.pi/extensions/extra-tools/glob.ts`), verified to prune `node_modules` (0 descent) and to halt at the scan budget.
|
|
9
|
+
|
|
10
|
+
### Notes for upgraders
|
|
11
|
+
- For a focused search, pass a `path` (a project subdirectory) instead of globbing from a home directory. Hidden directories continue to be skipped by `fs.glob` as before.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## [v1.8.0] — 2026-05-23
|
|
16
|
+
|
|
17
|
+
little-coder now **auto-detects the llama.cpp server's live context window** at startup and registers the model with it, so a `llama-server -c 131072` shows 128k instead of the declared default — no config edit. This completes [v1.7.0](#v170--2026-05-23): the budget already *followed* the registered window; now the registered window itself comes from the running server.
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
- **Live context-window detection for llama.cpp.** On startup `llama-cpp-provider` GETs the server's `/props` endpoint, reads its actual `n_ctx`, and registers the model with that window in place of the static `contextWindow` in `models.json`. The TUI context readout, read-guard's overflow trim, and the skill/knowledge budgets all then track the server's real window — bump `llama-server -c` and little-coder follows, no `models.json` or settings edit. The `/props` URL is derived from the provider baseUrl by stripping `/v1` (llama-server serves it at the root); the value is read from `default_generation_settings.n_ctx`. New tested helpers `propsUrlFor` / `contextWindowFromProps` / `probeContextWindow`, validated end-to-end against a live `-c 131072` server (→ 131072).
|
|
21
|
+
- **Best-effort and safe:** 1.5 s timeout, `llamacpp` provider only, and ANY failure (server down, no `/props`, non-JSON, timeout) silently falls back to the declared window — startup is never blocked or broken.
|
|
22
|
+
- **Env knobs:** `LITTLE_CODER_NO_CTX_PROBE=1` disables the probe (offline / CI); `LITTLE_CODER_LLAMACPP_PROPS_URL` overrides the `/props` URL for non-standard setups; `LITTLE_CODER_CTX_PROBE_TIMEOUT_MS` tunes the timeout.
|
|
23
|
+
|
|
24
|
+
### Notes for upgraders
|
|
25
|
+
- This adds one best-effort HTTP GET to the llama.cpp `/props` endpoint at launch (only for the `llamacpp` provider). If your server/proxy doesn't expose `/props`, behaviour is unchanged — the declared `models.json` `contextWindow` (default 32768) is used. Set `LITTLE_CODER_NO_CTX_PROBE=1` to skip the probe entirely.
|
|
26
|
+
- No CLI-flag or public-API changes.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
5
30
|
## [v1.7.0] — 2026-05-23
|
|
6
31
|
|
|
7
32
|
little-coder's context budget now follows the model's **live registered context window** instead of a hardcoded 32 768. Whatever window your provider declares for the active model (`contextWindow` in `models.json`, user-overridable) is what the whole harness budgets against — bump the model once and the TUI's context readout, read-guard's overflow trim, and the skill/knowledge-injection budgets all move together. This closes the common report: *"I bumped llama.cpp to 128k but little-coder still says 33k."*
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "little-coder",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.8.1",
|
|
4
4
|
"description": "A pi-based coding agent optimized for small local language models. Reproduces the whitepaper's scaffold-model-fit adaptations as pi extensions.",
|
|
5
5
|
"homepage": "https://github.com/itayinbarr/little-coder",
|
|
6
6
|
"repository": {
|