@a13xu/lucid 1.16.1 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/compression/semantic.js +5 -1
- package/build/database.d.ts +51 -0
- package/build/database.js +86 -0
- package/build/guardian/checklist.d.ts +2 -1
- package/build/guardian/checklist.js +20 -1
- package/build/guardian/coding-rules.d.ts +2 -1
- package/build/guardian/coding-rules.js +20 -1
- package/build/guardian/session-tracker.d.ts +34 -0
- package/build/guardian/session-tracker.js +105 -0
- package/build/guardian/truncate-guard.d.ts +54 -0
- package/build/guardian/truncate-guard.js +136 -0
- package/build/index.js +745 -742
- package/build/local-llm/client.d.ts +20 -0
- package/build/local-llm/client.js +140 -0
- package/build/local-llm/config.d.ts +11 -0
- package/build/local-llm/config.js +50 -0
- package/build/local-llm/runtimes.d.ts +16 -0
- package/build/local-llm/runtimes.js +82 -0
- package/build/local-llm/setup-cli.d.ts +5 -0
- package/build/local-llm/setup-cli.js +298 -0
- package/build/local-llm/types.d.ts +34 -0
- package/build/local-llm/types.js +5 -0
- package/build/tools/backup.d.ts +47 -0
- package/build/tools/backup.js +107 -0
- package/build/tools/delegate-local.d.ts +23 -0
- package/build/tools/delegate-local.js +75 -0
- package/build/tools/init.js +124 -2
- package/build/tools/plan.js +2 -2
- package/build/tools/session.d.ts +13 -0
- package/build/tools/session.js +59 -0
- package/package.json +3 -1
- package/skills/lucid-audit/SKILL.md +11 -0
- package/skills/lucid-context/SKILL.md +9 -0
- package/skills/lucid-plan/SKILL.md +9 -0
- package/skills/lucid-security/SKILL.md +9 -0
- package/skills/lucid-start/SKILL.md +9 -0
- package/skills/lucid-webdev/SKILL.md +14 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTTP client wrapping Ollama and OpenAI-compatible /v1/chat/completions
|
|
3
|
+
* endpoints behind a single normalized interface.
|
|
4
|
+
*
|
|
5
|
+
* Inputs are validated; the configured endpoint is registered with the SSRF
|
|
6
|
+
* allowlist by the caller (see src/index.ts) so remote endpoints work after
|
|
7
|
+
* explicit user opt-in via `lucid local init`.
|
|
8
|
+
*/
|
|
9
|
+
import type { GenerateRequest, GenerateResponse, LocalLlmConfig } from "./types.js";
|
|
10
|
+
export declare class LocalLlmError extends Error {
|
|
11
|
+
readonly statusCode?: number | undefined;
|
|
12
|
+
constructor(message: string, statusCode?: number | undefined);
|
|
13
|
+
}
|
|
14
|
+
export declare function generate(cfg: LocalLlmConfig, req: GenerateRequest): Promise<GenerateResponse>;
|
|
15
|
+
/** Lightweight reachability check — just probes /api/tags or /v1/models. */
|
|
16
|
+
export declare function ping(cfg: LocalLlmConfig): Promise<{
|
|
17
|
+
ok: boolean;
|
|
18
|
+
latency_ms: number;
|
|
19
|
+
detail?: string;
|
|
20
|
+
}>;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTTP client wrapping Ollama and OpenAI-compatible /v1/chat/completions
|
|
3
|
+
* endpoints behind a single normalized interface.
|
|
4
|
+
*
|
|
5
|
+
* Inputs are validated; the configured endpoint is registered with the SSRF
|
|
6
|
+
* allowlist by the caller (see src/index.ts) so remote endpoints work after
|
|
7
|
+
* explicit user opt-in via `lucid local init`.
|
|
8
|
+
*/
|
|
9
|
+
export class LocalLlmError extends Error {
|
|
10
|
+
statusCode;
|
|
11
|
+
constructor(message, statusCode) {
|
|
12
|
+
super(message);
|
|
13
|
+
this.statusCode = statusCode;
|
|
14
|
+
this.name = "LocalLlmError";
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
export async function generate(cfg, req) {
|
|
18
|
+
if (!cfg.enabled) {
|
|
19
|
+
throw new LocalLlmError("Local LLM is disabled. Run `lucid local init` to set it up.");
|
|
20
|
+
}
|
|
21
|
+
return cfg.runtime === "ollama"
|
|
22
|
+
? generateOllama(cfg, req)
|
|
23
|
+
: generateOpenAi(cfg, req);
|
|
24
|
+
}
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Ollama POST /api/chat (preferred — gives system role)
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
async function generateOllama(cfg, req) {
|
|
29
|
+
const url = `${cfg.endpoint.replace(/\/+$/, "")}/api/chat`;
|
|
30
|
+
const messages = [];
|
|
31
|
+
if (req.system)
|
|
32
|
+
messages.push({ role: "system", content: req.system });
|
|
33
|
+
messages.push({ role: "user", content: req.prompt });
|
|
34
|
+
const body = {
|
|
35
|
+
model: cfg.model,
|
|
36
|
+
messages,
|
|
37
|
+
stream: false,
|
|
38
|
+
options: {
|
|
39
|
+
temperature: req.temperature ?? 0.2,
|
|
40
|
+
num_predict: req.max_tokens ?? 2048,
|
|
41
|
+
...(req.stop ? { stop: req.stop } : {}),
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
const start = Date.now();
|
|
45
|
+
const res = await postJson(url, body, cfg);
|
|
46
|
+
const latency = Date.now() - start;
|
|
47
|
+
const content = res.message?.content ?? "";
|
|
48
|
+
return {
|
|
49
|
+
text: content,
|
|
50
|
+
model: cfg.model,
|
|
51
|
+
latency_ms: latency,
|
|
52
|
+
prompt_tokens: res.prompt_eval_count,
|
|
53
|
+
completion_tokens: res.eval_count,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// OpenAI-compatible POST /v1/chat/completions
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
async function generateOpenAi(cfg, req) {
|
|
60
|
+
const url = `${cfg.endpoint.replace(/\/+$/, "")}/v1/chat/completions`;
|
|
61
|
+
const messages = [];
|
|
62
|
+
if (req.system)
|
|
63
|
+
messages.push({ role: "system", content: req.system });
|
|
64
|
+
messages.push({ role: "user", content: req.prompt });
|
|
65
|
+
const body = {
|
|
66
|
+
model: cfg.model,
|
|
67
|
+
messages,
|
|
68
|
+
temperature: req.temperature ?? 0.2,
|
|
69
|
+
max_tokens: req.max_tokens ?? 2048,
|
|
70
|
+
stream: false,
|
|
71
|
+
};
|
|
72
|
+
if (req.stop)
|
|
73
|
+
body["stop"] = req.stop;
|
|
74
|
+
const start = Date.now();
|
|
75
|
+
const res = await postJson(url, body, cfg);
|
|
76
|
+
const latency = Date.now() - start;
|
|
77
|
+
const choice = res.choices?.[0];
|
|
78
|
+
const usage = res.usage;
|
|
79
|
+
return {
|
|
80
|
+
text: choice?.message?.content ?? "",
|
|
81
|
+
model: cfg.model,
|
|
82
|
+
latency_ms: latency,
|
|
83
|
+
prompt_tokens: usage?.prompt_tokens,
|
|
84
|
+
completion_tokens: usage?.completion_tokens,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// Shared transport
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
async function postJson(url, body, cfg) {
|
|
91
|
+
const ac = new AbortController();
|
|
92
|
+
const timer = setTimeout(() => ac.abort(), cfg.timeout_ms);
|
|
93
|
+
const headers = { "Content-Type": "application/json" };
|
|
94
|
+
if (cfg.api_key)
|
|
95
|
+
headers["Authorization"] = `Bearer ${cfg.api_key}`;
|
|
96
|
+
try {
|
|
97
|
+
const res = await fetch(url, {
|
|
98
|
+
method: "POST",
|
|
99
|
+
headers,
|
|
100
|
+
body: JSON.stringify(body),
|
|
101
|
+
signal: ac.signal,
|
|
102
|
+
});
|
|
103
|
+
if (!res.ok) {
|
|
104
|
+
const text = await res.text().catch(() => "");
|
|
105
|
+
throw new LocalLlmError(`Local LLM request failed: ${res.status} ${res.statusText}${text ? " — " + text.slice(0, 200) : ""}`, res.status);
|
|
106
|
+
}
|
|
107
|
+
return await res.json();
|
|
108
|
+
}
|
|
109
|
+
catch (e) {
|
|
110
|
+
if (e instanceof LocalLlmError)
|
|
111
|
+
throw e;
|
|
112
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
113
|
+
if (msg.includes("aborted")) {
|
|
114
|
+
throw new LocalLlmError(`Local LLM request timed out after ${cfg.timeout_ms}ms`);
|
|
115
|
+
}
|
|
116
|
+
throw new LocalLlmError(`Local LLM request failed: ${msg}`);
|
|
117
|
+
}
|
|
118
|
+
finally {
|
|
119
|
+
clearTimeout(timer);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/** Lightweight reachability check — just probes /api/tags or /v1/models. */
|
|
123
|
+
export async function ping(cfg) {
|
|
124
|
+
const url = cfg.runtime === "ollama"
|
|
125
|
+
? `${cfg.endpoint.replace(/\/+$/, "")}/api/tags`
|
|
126
|
+
: `${cfg.endpoint.replace(/\/+$/, "")}/v1/models`;
|
|
127
|
+
const start = Date.now();
|
|
128
|
+
const ac = new AbortController();
|
|
129
|
+
const timer = setTimeout(() => ac.abort(), 3_000);
|
|
130
|
+
try {
|
|
131
|
+
const res = await fetch(url, { signal: ac.signal });
|
|
132
|
+
return { ok: res.ok, latency_ms: Date.now() - start, detail: res.ok ? undefined : `HTTP ${res.status}` };
|
|
133
|
+
}
|
|
134
|
+
catch (e) {
|
|
135
|
+
return { ok: false, latency_ms: Date.now() - start, detail: e instanceof Error ? e.message : "unreachable" };
|
|
136
|
+
}
|
|
137
|
+
finally {
|
|
138
|
+
clearTimeout(timer);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Global config for the local-LLM endpoint, persisted at ~/.lucid/local.json.
|
|
3
|
+
* Project lucid.config.json may override (read by loadConfig in src/config.ts —
|
|
4
|
+
* here we only handle the global file so all projects share one setup by default).
|
|
5
|
+
*/
|
|
6
|
+
import type { LocalLlmConfig } from "./types.js";
|
|
7
|
+
export declare function getConfigPath(): string;
|
|
8
|
+
export declare function loadLocalConfig(): LocalLlmConfig | null;
|
|
9
|
+
export declare function saveLocalConfig(cfg: LocalLlmConfig): void;
|
|
10
|
+
export declare function disableLocalConfig(): boolean;
|
|
11
|
+
export declare function isConfigured(): boolean;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Global config for the local-LLM endpoint, persisted at ~/.lucid/local.json.
|
|
3
|
+
* Project lucid.config.json may override (read by loadConfig in src/config.ts —
|
|
4
|
+
* here we only handle the global file so all projects share one setup by default).
|
|
5
|
+
*/
|
|
6
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
7
|
+
import { homedir } from "os";
|
|
8
|
+
import { dirname, join } from "path";
|
|
9
|
+
const CONFIG_DIR = join(homedir(), ".lucid");
|
|
10
|
+
const CONFIG_PATH = join(CONFIG_DIR, "local.json");
|
|
11
|
+
export function getConfigPath() {
|
|
12
|
+
return CONFIG_PATH;
|
|
13
|
+
}
|
|
14
|
+
export function loadLocalConfig() {
|
|
15
|
+
if (!existsSync(CONFIG_PATH))
|
|
16
|
+
return null;
|
|
17
|
+
try {
|
|
18
|
+
const raw = readFileSync(CONFIG_PATH, "utf-8");
|
|
19
|
+
const parsed = JSON.parse(raw);
|
|
20
|
+
if (typeof parsed.endpoint !== "string" || typeof parsed.model !== "string")
|
|
21
|
+
return null;
|
|
22
|
+
return {
|
|
23
|
+
enabled: parsed.enabled !== false,
|
|
24
|
+
runtime: parsed.runtime ?? "ollama",
|
|
25
|
+
endpoint: parsed.endpoint,
|
|
26
|
+
model: parsed.model,
|
|
27
|
+
api_key: parsed.api_key,
|
|
28
|
+
timeout_ms: parsed.timeout_ms ?? 60_000,
|
|
29
|
+
configured_at: parsed.configured_at ?? new Date().toISOString(),
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
export function saveLocalConfig(cfg) {
|
|
37
|
+
mkdirSync(dirname(CONFIG_PATH), { recursive: true });
|
|
38
|
+
writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2) + "\n", "utf-8");
|
|
39
|
+
}
|
|
40
|
+
export function disableLocalConfig() {
|
|
41
|
+
const cfg = loadLocalConfig();
|
|
42
|
+
if (!cfg)
|
|
43
|
+
return false;
|
|
44
|
+
saveLocalConfig({ ...cfg, enabled: false });
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
export function isConfigured() {
|
|
48
|
+
const cfg = loadLocalConfig();
|
|
49
|
+
return cfg !== null && cfg.enabled;
|
|
50
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Probe known local-LLM runtimes on localhost (and any user-supplied endpoint).
|
|
3
|
+
*
|
|
4
|
+
* Detection rules:
|
|
5
|
+
* GET /api/tags → Ollama (lists pulled models)
|
|
6
|
+
* GET /v1/models → OpenAI-compat (LM Studio, llama.cpp server, vLLM, …)
|
|
7
|
+
*
|
|
8
|
+
* Probes are short-timeout (1.5s) so they don't block setup.
|
|
9
|
+
*/
|
|
10
|
+
import type { DetectedRuntime, RuntimeKind } from "./types.js";
|
|
11
|
+
/** Probe one specific endpoint. Returns null if nothing answers. */
|
|
12
|
+
export declare function probeEndpoint(endpoint: string, headers?: Record<string, string>): Promise<DetectedRuntime | null>;
|
|
13
|
+
/** Probe all known local ports — used for first-run auto-detect. */
|
|
14
|
+
export declare function autoDetectLocal(): Promise<DetectedRuntime[]>;
|
|
15
|
+
/** Human-readable runtime label, never throws. */
|
|
16
|
+
export declare function describeRuntime(kind: RuntimeKind): string;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Probe known local-LLM runtimes on localhost (and any user-supplied endpoint).
|
|
3
|
+
*
|
|
4
|
+
* Detection rules:
|
|
5
|
+
* GET /api/tags → Ollama (lists pulled models)
|
|
6
|
+
* GET /v1/models → OpenAI-compat (LM Studio, llama.cpp server, vLLM, …)
|
|
7
|
+
*
|
|
8
|
+
* Probes are short-timeout (1.5s) so they don't block setup.
|
|
9
|
+
*/
|
|
10
|
+
const KNOWN_LOCAL_PORTS = [
|
|
11
|
+
{ port: 11434, hint: "Ollama default" },
|
|
12
|
+
{ port: 1234, hint: "LM Studio default" },
|
|
13
|
+
{ port: 8080, hint: "llama.cpp server default" },
|
|
14
|
+
{ port: 8000, hint: "vLLM / generic" },
|
|
15
|
+
];
|
|
16
|
+
const PROBE_TIMEOUT_MS = 1_500;
|
|
17
|
+
async function probeUrl(url, headers) {
|
|
18
|
+
const ac = new AbortController();
|
|
19
|
+
const timer = setTimeout(() => ac.abort(), PROBE_TIMEOUT_MS);
|
|
20
|
+
const start = Date.now();
|
|
21
|
+
try {
|
|
22
|
+
const res = await fetch(url, { signal: ac.signal, headers });
|
|
23
|
+
if (!res.ok)
|
|
24
|
+
return { ok: false, latency: Date.now() - start };
|
|
25
|
+
const body = await res.json().catch(() => null);
|
|
26
|
+
return { ok: true, latency: Date.now() - start, body: body ?? undefined };
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return { ok: false, latency: Date.now() - start };
|
|
30
|
+
}
|
|
31
|
+
finally {
|
|
32
|
+
clearTimeout(timer);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/** Probe one specific endpoint. Returns null if nothing answers. */
|
|
36
|
+
export async function probeEndpoint(endpoint, headers) {
|
|
37
|
+
const base = endpoint.replace(/\/+$/, "");
|
|
38
|
+
// Try Ollama first (cheap & specific) — body MUST have `.models` array,
|
|
39
|
+
// otherwise it's just a random service that happens to 200 on /api/tags.
|
|
40
|
+
const ollama = await probeUrl(`${base}/api/tags`, headers);
|
|
41
|
+
if (ollama.ok && hasOllamaShape(ollama.body)) {
|
|
42
|
+
return { kind: "ollama", endpoint: base, models: extractOllamaModels(ollama.body), latency_ms: ollama.latency };
|
|
43
|
+
}
|
|
44
|
+
// Then try OpenAI-compatible — body MUST have `.data` array of model entries.
|
|
45
|
+
const oai = await probeUrl(`${base}/v1/models`, headers);
|
|
46
|
+
if (oai.ok && hasOpenAiShape(oai.body)) {
|
|
47
|
+
return { kind: "openai-compat", endpoint: base, models: extractOpenAiModels(oai.body), latency_ms: oai.latency };
|
|
48
|
+
}
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
function hasOllamaShape(body) {
|
|
52
|
+
return !!body && typeof body === "object" && Array.isArray(body.models);
|
|
53
|
+
}
|
|
54
|
+
function hasOpenAiShape(body) {
|
|
55
|
+
return !!body && typeof body === "object" && Array.isArray(body.data);
|
|
56
|
+
}
|
|
57
|
+
/** Probe all known local ports — used for first-run auto-detect. */
|
|
58
|
+
export async function autoDetectLocal() {
|
|
59
|
+
const probes = KNOWN_LOCAL_PORTS.map((p) => probeEndpoint(`http://localhost:${p.port}`));
|
|
60
|
+
const results = await Promise.all(probes);
|
|
61
|
+
return results.filter((r) => r !== null);
|
|
62
|
+
}
|
|
63
|
+
function extractOllamaModels(body) {
|
|
64
|
+
if (!body || typeof body !== "object")
|
|
65
|
+
return [];
|
|
66
|
+
const list = body.models;
|
|
67
|
+
return Array.isArray(list) ? list.map((m) => m.name ?? "").filter(Boolean) : [];
|
|
68
|
+
}
|
|
69
|
+
function extractOpenAiModels(body) {
|
|
70
|
+
if (!body || typeof body !== "object")
|
|
71
|
+
return [];
|
|
72
|
+
const data = body.data;
|
|
73
|
+
return Array.isArray(data) ? data.map((m) => m.id ?? "").filter(Boolean) : [];
|
|
74
|
+
}
|
|
75
|
+
/** Human-readable runtime label, never throws. */
|
|
76
|
+
export function describeRuntime(kind) {
|
|
77
|
+
switch (kind) {
|
|
78
|
+
case "ollama": return "Ollama";
|
|
79
|
+
case "openai-compat": return "OpenAI-compatible (LM Studio / llama.cpp / vLLM)";
|
|
80
|
+
default: return "unknown";
|
|
81
|
+
}
|
|
82
|
+
}
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive `lucid local <subcmd>` CLI.
|
|
3
|
+
* Subcommands: init | status | test | disable | pull
|
|
4
|
+
*/
|
|
5
|
+
import { createInterface } from "readline";
|
|
6
|
+
import { spawn } from "child_process";
|
|
7
|
+
import { loadLocalConfig, saveLocalConfig, disableLocalConfig, getConfigPath, } from "./config.js";
|
|
8
|
+
import { autoDetectLocal, probeEndpoint, describeRuntime } from "./runtimes.js";
|
|
9
|
+
import { generate, ping } from "./client.js";
|
|
10
|
+
const RECOMMENDED_MODELS = [
|
|
11
|
+
{ name: "qwen2.5-coder:1.5b", size: "~1 GB", note: "fast on CPU (~30 tok/s) — recommended for brief synthesis" },
|
|
12
|
+
{ name: "qwen2.5-coder:3b", size: "~3 GB", note: "balanced (~15 tok/s on CPU)" },
|
|
13
|
+
{ name: "qwen2.5-coder:7b", size: "~7 GB", note: "best quality (~7 tok/s on CPU; 60+ on GPU)" },
|
|
14
|
+
];
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Entrypoint
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
export async function runLocalLlmCli(args) {
|
|
19
|
+
const sub = args[0];
|
|
20
|
+
if (sub === "init")
|
|
21
|
+
return await cmdInit(args.slice(1));
|
|
22
|
+
if (sub === "status")
|
|
23
|
+
return await cmdStatus();
|
|
24
|
+
if (sub === "test")
|
|
25
|
+
return await cmdTest();
|
|
26
|
+
if (sub === "disable")
|
|
27
|
+
return cmdDisable();
|
|
28
|
+
if (sub === "pull")
|
|
29
|
+
return await cmdPull(args.slice(1));
|
|
30
|
+
process.stderr.write(`Usage: lucid local <init|status|test|disable|pull <model>>\n`);
|
|
31
|
+
return 64;
|
|
32
|
+
}
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// init — guided 5-step setup
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
async function cmdInit(_args) {
|
|
37
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
38
|
+
try {
|
|
39
|
+
process.stdout.write("\n🤖 Lucid Local LLM — interactive setup\n");
|
|
40
|
+
process.stdout.write(` Config will be saved to ${getConfigPath()}\n\n`);
|
|
41
|
+
// ── Step 1: detect or accept remote endpoint ──────────────────────────
|
|
42
|
+
process.stdout.write("Step 1/5 Detecting local runtimes…\n");
|
|
43
|
+
const detected = await autoDetectLocal();
|
|
44
|
+
let chosen = null;
|
|
45
|
+
if (detected.length > 0) {
|
|
46
|
+
process.stdout.write(` Found ${detected.length} runtime(s):\n`);
|
|
47
|
+
detected.forEach((d, i) => {
|
|
48
|
+
process.stdout.write(` [${i + 1}] ${describeRuntime(d.kind)} ${d.endpoint} (${d.latency_ms}ms, ${d.models?.length ?? 0} models)\n`);
|
|
49
|
+
});
|
|
50
|
+
process.stdout.write(` [r] Enter remote endpoint URL\n`);
|
|
51
|
+
process.stdout.write(` [s] Skip — show install instructions\n`);
|
|
52
|
+
const ans = (await ask(rl, " Choice [1]: ")).trim().toLowerCase() || "1";
|
|
53
|
+
if (ans === "s") {
|
|
54
|
+
showInstallInstructions();
|
|
55
|
+
return 0;
|
|
56
|
+
}
|
|
57
|
+
if (ans === "r") {
|
|
58
|
+
chosen = await promptRemoteEndpoint(rl);
|
|
59
|
+
if (!chosen)
|
|
60
|
+
return 1;
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
const idx = Number(ans) - 1;
|
|
64
|
+
if (Number.isFinite(idx) && idx >= 0 && idx < detected.length) {
|
|
65
|
+
chosen = detected[idx];
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
process.stderr.write(" Invalid choice.\n");
|
|
69
|
+
return 64;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
process.stdout.write(" No local runtime detected on common ports (11434, 1234, 8080, 8000).\n");
|
|
75
|
+
process.stdout.write(" [r] Enter remote endpoint URL\n");
|
|
76
|
+
process.stdout.write(" [s] Show install instructions and exit\n");
|
|
77
|
+
const ans = (await ask(rl, " Choice [r]: ")).trim().toLowerCase() || "r";
|
|
78
|
+
if (ans === "s") {
|
|
79
|
+
showInstallInstructions();
|
|
80
|
+
return 0;
|
|
81
|
+
}
|
|
82
|
+
chosen = await promptRemoteEndpoint(rl);
|
|
83
|
+
if (!chosen)
|
|
84
|
+
return 1;
|
|
85
|
+
}
|
|
86
|
+
// ── Step 2: choose model ──────────────────────────────────────────────
|
|
87
|
+
process.stdout.write("\nStep 2/5 Choose model\n");
|
|
88
|
+
if (chosen.models && chosen.models.length > 0) {
|
|
89
|
+
process.stdout.write(" Already pulled on this runtime:\n");
|
|
90
|
+
chosen.models.slice(0, 10).forEach((m, i) => process.stdout.write(` [${i + 1}] ${m}\n`));
|
|
91
|
+
process.stdout.write(" [n] None of these — show recommended downloads\n");
|
|
92
|
+
const ans = (await ask(rl, " Choice [n]: ")).trim().toLowerCase() || "n";
|
|
93
|
+
if (ans !== "n") {
|
|
94
|
+
const models = chosen.models;
|
|
95
|
+
const idx = Number(ans) - 1;
|
|
96
|
+
if (Number.isFinite(idx) && idx >= 0 && idx < models.length) {
|
|
97
|
+
const model = models[idx];
|
|
98
|
+
return await finalizeSetup(rl, chosen.kind, chosen.endpoint, model, false);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
process.stdout.write("\n Recommended (Python-specialized coders):\n");
|
|
103
|
+
RECOMMENDED_MODELS.forEach((m, i) => {
|
|
104
|
+
process.stdout.write(` [${i + 1}] ${m.name.padEnd(24)} ${m.size.padEnd(7)} ${m.note}\n`);
|
|
105
|
+
});
|
|
106
|
+
process.stdout.write(" [c] Custom model name (already pulled or to pull)\n");
|
|
107
|
+
const mAns = (await ask(rl, " Choice [1]: ")).trim().toLowerCase() || "1";
|
|
108
|
+
let modelName;
|
|
109
|
+
if (mAns === "c") {
|
|
110
|
+
modelName = (await ask(rl, " Model name (e.g. qwen2.5-coder:7b): ")).trim();
|
|
111
|
+
if (!modelName) {
|
|
112
|
+
process.stderr.write(" Empty name.\n");
|
|
113
|
+
return 64;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
const idx = Number(mAns) - 1;
|
|
118
|
+
if (!Number.isFinite(idx) || idx < 0 || idx >= RECOMMENDED_MODELS.length) {
|
|
119
|
+
process.stderr.write(" Invalid choice.\n");
|
|
120
|
+
return 64;
|
|
121
|
+
}
|
|
122
|
+
modelName = RECOMMENDED_MODELS[idx].name;
|
|
123
|
+
}
|
|
124
|
+
return await finalizeSetup(rl, chosen.kind, chosen.endpoint, modelName, true);
|
|
125
|
+
}
|
|
126
|
+
finally {
|
|
127
|
+
rl.close();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
async function finalizeSetup(rl, kind, endpoint, model, mayPull) {
|
|
131
|
+
// ── Step 3: optional pull ─────────────────────────────────────────────
|
|
132
|
+
if (mayPull && kind === "ollama") {
|
|
133
|
+
const pullAns = (await ask(rl, `\nStep 3/5 Pull "${model}" via ollama now? [Y/n]: `)).trim().toLowerCase();
|
|
134
|
+
if (pullAns === "" || pullAns === "y" || pullAns === "yes") {
|
|
135
|
+
const code = await streamPull(model);
|
|
136
|
+
if (code !== 0) {
|
|
137
|
+
process.stderr.write(` ⚠️ ollama pull exited with code ${code}. You can rerun: ollama pull ${model}\n`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
process.stdout.write("\nStep 3/5 Skipping model pull (handled by runtime).\n");
|
|
143
|
+
}
|
|
144
|
+
// ── Step 4: test ──────────────────────────────────────────────────────
|
|
145
|
+
process.stdout.write("\nStep 4/5 Testing endpoint…\n");
|
|
146
|
+
const probeCfg = {
|
|
147
|
+
enabled: true, runtime: kind, endpoint, model,
|
|
148
|
+
timeout_ms: 30_000,
|
|
149
|
+
configured_at: new Date().toISOString(),
|
|
150
|
+
};
|
|
151
|
+
const reach = await ping(probeCfg);
|
|
152
|
+
if (!reach.ok) {
|
|
153
|
+
process.stderr.write(` ❌ Endpoint not reachable: ${reach.detail ?? "?"}\n`);
|
|
154
|
+
const cont = (await ask(rl, " Save config anyway? [y/N]: ")).trim().toLowerCase();
|
|
155
|
+
if (cont !== "y" && cont !== "yes")
|
|
156
|
+
return 1;
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
process.stdout.write(` ✓ Endpoint reachable (${reach.latency_ms}ms). Running 1-token generate…\n`);
|
|
160
|
+
try {
|
|
161
|
+
const out = await generate(probeCfg, { prompt: "Say OK.", max_tokens: 8, temperature: 0 });
|
|
162
|
+
const preview = out.text.replace(/\s+/g, " ").trim().slice(0, 60);
|
|
163
|
+
process.stdout.write(` ✓ Model responded in ${out.latency_ms}ms: "${preview}"\n`);
|
|
164
|
+
}
|
|
165
|
+
catch (e) {
|
|
166
|
+
process.stderr.write(` ⚠️ Generation failed: ${e.message}\n`);
|
|
167
|
+
const cont = (await ask(rl, " Save config anyway? [y/N]: ")).trim().toLowerCase();
|
|
168
|
+
if (cont !== "y" && cont !== "yes")
|
|
169
|
+
return 1;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// ── Step 5: save ──────────────────────────────────────────────────────
|
|
173
|
+
saveLocalConfig(probeCfg);
|
|
174
|
+
process.stdout.write(`\nStep 5/5 ✅ Saved → ${getConfigPath()}\n`);
|
|
175
|
+
process.stdout.write(`\nRestart Claude Code to activate. delegate_local() will then be available.\n\n`);
|
|
176
|
+
return 0;
|
|
177
|
+
}
|
|
178
|
+
async function promptRemoteEndpoint(rl) {
|
|
179
|
+
const url = (await ask(rl, " Endpoint URL (e.g. http://gpu.lan:11434): ")).trim();
|
|
180
|
+
if (!url)
|
|
181
|
+
return null;
|
|
182
|
+
const apiKey = (await ask(rl, " Bearer token (optional, press Enter to skip): ")).trim();
|
|
183
|
+
process.stdout.write(` Probing ${url}…\n`);
|
|
184
|
+
const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
|
|
185
|
+
const det = await probeEndpoint(url, headers);
|
|
186
|
+
if (!det) {
|
|
187
|
+
process.stderr.write(` ❌ No Ollama or OpenAI-compatible endpoint found at ${url}\n`);
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
process.stdout.write(` ✓ ${describeRuntime(det.kind)} detected (${det.latency_ms}ms, ${det.models?.length ?? 0} models)\n`);
|
|
191
|
+
// Stash the api key on the returned struct via a side channel (set on cfg later).
|
|
192
|
+
if (apiKey)
|
|
193
|
+
det.api_key = apiKey;
|
|
194
|
+
return det;
|
|
195
|
+
}
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// status / test / disable / pull
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
async function cmdStatus() {
|
|
200
|
+
const cfg = loadLocalConfig();
|
|
201
|
+
if (!cfg) {
|
|
202
|
+
process.stdout.write("Local LLM: not configured. Run `lucid local init`.\n");
|
|
203
|
+
return 0;
|
|
204
|
+
}
|
|
205
|
+
process.stdout.write([
|
|
206
|
+
`Local LLM: ${cfg.enabled ? "enabled" : "disabled"}`,
|
|
207
|
+
` runtime: ${describeRuntime(cfg.runtime)}`,
|
|
208
|
+
` endpoint: ${cfg.endpoint}`,
|
|
209
|
+
` model: ${cfg.model}`,
|
|
210
|
+
` api_key: ${cfg.api_key ? "(set)" : "(none)"}`,
|
|
211
|
+
` config: ${getConfigPath()}`,
|
|
212
|
+
` saved at: ${cfg.configured_at}`,
|
|
213
|
+
].join("\n") + "\n");
|
|
214
|
+
const reach = await ping(cfg);
|
|
215
|
+
process.stdout.write(` reachable: ${reach.ok ? `✓ ${reach.latency_ms}ms` : `✗ ${reach.detail ?? "?"}`}\n`);
|
|
216
|
+
return 0;
|
|
217
|
+
}
|
|
218
|
+
async function cmdTest() {
|
|
219
|
+
const cfg = loadLocalConfig();
|
|
220
|
+
if (!cfg) {
|
|
221
|
+
process.stderr.write("Not configured. Run `lucid local init` first.\n");
|
|
222
|
+
return 1;
|
|
223
|
+
}
|
|
224
|
+
process.stdout.write(`Testing ${cfg.model} on ${cfg.endpoint}…\n`);
|
|
225
|
+
try {
|
|
226
|
+
const out = await generate(cfg, {
|
|
227
|
+
prompt: "Write a one-line Python function that returns the square of its argument.",
|
|
228
|
+
max_tokens: 64, temperature: 0.1,
|
|
229
|
+
});
|
|
230
|
+
process.stdout.write(`✓ ${out.latency_ms}ms (prompt=${out.prompt_tokens ?? "?"}, completion=${out.completion_tokens ?? "?"})\n`);
|
|
231
|
+
process.stdout.write(`---\n${out.text.trim()}\n---\n`);
|
|
232
|
+
return 0;
|
|
233
|
+
}
|
|
234
|
+
catch (e) {
|
|
235
|
+
process.stderr.write(`✗ ${e.message}\n`);
|
|
236
|
+
return 1;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
function cmdDisable() {
|
|
240
|
+
const ok = disableLocalConfig();
|
|
241
|
+
process.stdout.write(ok ? "Local LLM disabled.\n" : "Nothing to disable (not configured).\n");
|
|
242
|
+
return 0;
|
|
243
|
+
}
|
|
244
|
+
async function cmdPull(args) {
|
|
245
|
+
const cfg = loadLocalConfig();
|
|
246
|
+
const model = args[0] ?? cfg?.model;
|
|
247
|
+
if (!model) {
|
|
248
|
+
process.stderr.write("Usage: lucid local pull <model>\n");
|
|
249
|
+
return 64;
|
|
250
|
+
}
|
|
251
|
+
if (cfg && cfg.runtime !== "ollama") {
|
|
252
|
+
process.stderr.write(`pull is only supported for Ollama runtimes. For ${describeRuntime(cfg.runtime)}, fetch the model via its own UI.\n`);
|
|
253
|
+
return 64;
|
|
254
|
+
}
|
|
255
|
+
return await streamPull(model);
|
|
256
|
+
}
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
// Helpers
|
|
259
|
+
// ---------------------------------------------------------------------------
|
|
260
|
+
function ask(rl, prompt) {
|
|
261
|
+
return new Promise((resolveAns) => rl.question(prompt, resolveAns));
|
|
262
|
+
}
|
|
263
|
+
async function streamPull(model) {
|
|
264
|
+
return new Promise((resolveCode) => {
|
|
265
|
+
const proc = spawn("ollama", ["pull", model], { stdio: "inherit" });
|
|
266
|
+
proc.on("error", (e) => {
|
|
267
|
+
process.stderr.write(` ⚠️ Could not run ollama: ${e.message}\n`);
|
|
268
|
+
process.stderr.write(` Install Ollama first (see \`lucid local init\` step 1 instructions).\n`);
|
|
269
|
+
resolveCode(127);
|
|
270
|
+
});
|
|
271
|
+
proc.on("exit", (code) => resolveCode(code ?? 0));
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
function showInstallInstructions() {
|
|
275
|
+
process.stdout.write([
|
|
276
|
+
"",
|
|
277
|
+
"──────────────────────────────────────────────────────────────",
|
|
278
|
+
"Install a local LLM runtime, then re-run `lucid local init`.",
|
|
279
|
+
"",
|
|
280
|
+
" Ollama (recommended, simplest):",
|
|
281
|
+
" Windows: winget install Ollama.Ollama",
|
|
282
|
+
" macOS: brew install ollama (or download from ollama.com/download)",
|
|
283
|
+
" Linux: curl -fsSL https://ollama.com/install.sh | sh",
|
|
284
|
+
"",
|
|
285
|
+
" LM Studio (GUI, OpenAI-compatible server):",
|
|
286
|
+
" Download: https://lmstudio.ai/",
|
|
287
|
+
" Start the local server in the GUI before re-running setup.",
|
|
288
|
+
"",
|
|
289
|
+
" llama.cpp server (advanced):",
|
|
290
|
+
" https://github.com/ggerganov/llama.cpp → ./server -m model.gguf",
|
|
291
|
+
"",
|
|
292
|
+
" Remote endpoint:",
|
|
293
|
+
" Any of the above hosted on another machine — re-run init and",
|
|
294
|
+
" pick `[r] Enter remote endpoint URL`. Bearer auth supported.",
|
|
295
|
+
"──────────────────────────────────────────────────────────────",
|
|
296
|
+
"",
|
|
297
|
+
].join("\n"));
|
|
298
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for the local-LLM subsystem (Ollama / LM Studio / llama.cpp /
|
|
3
|
+
* any OpenAI-compatible self-hosted endpoint).
|
|
4
|
+
*/
|
|
5
|
+
export type RuntimeKind = "ollama" | "openai-compat" | "unknown";
|
|
6
|
+
export interface LocalLlmConfig {
|
|
7
|
+
enabled: boolean;
|
|
8
|
+
runtime: RuntimeKind;
|
|
9
|
+
endpoint: string;
|
|
10
|
+
model: string;
|
|
11
|
+
api_key?: string;
|
|
12
|
+
timeout_ms: number;
|
|
13
|
+
configured_at: string;
|
|
14
|
+
}
|
|
15
|
+
export interface DetectedRuntime {
|
|
16
|
+
kind: RuntimeKind;
|
|
17
|
+
endpoint: string;
|
|
18
|
+
models?: string[];
|
|
19
|
+
latency_ms?: number;
|
|
20
|
+
}
|
|
21
|
+
export interface GenerateRequest {
|
|
22
|
+
prompt: string;
|
|
23
|
+
system?: string;
|
|
24
|
+
max_tokens?: number;
|
|
25
|
+
temperature?: number;
|
|
26
|
+
stop?: string[];
|
|
27
|
+
}
|
|
28
|
+
export interface GenerateResponse {
|
|
29
|
+
text: string;
|
|
30
|
+
model: string;
|
|
31
|
+
latency_ms: number;
|
|
32
|
+
prompt_tokens?: number;
|
|
33
|
+
completion_tokens?: number;
|
|
34
|
+
}
|