@shrkcrft/ai 0.1.0-alpha.2 → 0.1.0-alpha.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-request.d.ts +23 -0
- package/dist/ai-request.d.ts.map +1 -1
- package/dist/delegate/delegate-edit-schema.d.ts +44 -0
- package/dist/delegate/delegate-edit-schema.d.ts.map +1 -0
- package/dist/delegate/delegate-edit-schema.js +77 -0
- package/dist/delegate/parse-delegate-edit.d.ts +46 -0
- package/dist/delegate/parse-delegate-edit.d.ts.map +1 -0
- package/dist/delegate/parse-delegate-edit.js +128 -0
- package/dist/gemini/gemini-provider.d.ts +24 -0
- package/dist/gemini/gemini-provider.d.ts.map +1 -0
- package/dist/gemini/gemini-provider.js +97 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/llamacpp/llama-cpp-provider.d.ts +56 -0
- package/dist/llamacpp/llama-cpp-provider.d.ts.map +1 -0
- package/dist/llamacpp/llama-cpp-provider.js +296 -0
- package/dist/llm-hints.d.ts +36 -0
- package/dist/llm-hints.d.ts.map +1 -0
- package/dist/llm-hints.js +92 -0
- package/dist/llm-recommendations.d.ts +72 -0
- package/dist/llm-recommendations.d.ts.map +1 -0
- package/dist/llm-recommendations.js +188 -0
- package/dist/ollama/ollama-provider.d.ts +47 -0
- package/dist/ollama/ollama-provider.d.ts.map +1 -0
- package/dist/ollama/ollama-provider.js +190 -0
- package/dist/pipeline/enhancement-pipeline.d.ts +151 -0
- package/dist/pipeline/enhancement-pipeline.d.ts.map +1 -0
- package/dist/pipeline/enhancement-pipeline.js +339 -0
- package/dist/provider-resolver.d.ts +28 -0
- package/dist/provider-resolver.d.ts.map +1 -0
- package/dist/provider-resolver.js +80 -0
- package/package.json +6 -5
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import { existsSync } from 'node:fs';
|
|
2
|
+
import * as nodePath from 'node:path';
|
|
3
|
+
import { AppErrorImpl, ERROR_CODES, err, ok } from '@shrkcrft/core';
|
|
4
|
+
import { AbstractAiProvider } from "../ai-provider.js";
|
|
5
|
+
import { AiMessageRole } from "../ai-request.js";
|
|
6
|
+
const DEFAULT_CONTEXT_SIZE = 8192;
|
|
7
|
+
const DEFAULT_MAX_TOKENS = 1024;
|
|
8
|
+
/**
|
|
9
|
+
* In-process generative provider backed by `node-llama-cpp` (a Node
|
|
10
|
+
* binding for llama.cpp). No HTTP. No daemon. The model is loaded
|
|
11
|
+
* once into process memory and reused across requests.
|
|
12
|
+
*
|
|
13
|
+
* Configuration (env or `IAiProviderConfig`):
|
|
14
|
+
* - `LLAMACPP_MODEL_PATH` — absolute or repo-relative path to a
|
|
15
|
+
* local `.gguf` file. If unset, the
|
|
16
|
+
* provider is `isReady() === false`.
|
|
17
|
+
* - `LLAMACPP_CONTEXT_SIZE` — context window in tokens (default 8192).
|
|
18
|
+
* - `LLAMACPP_GPU` — `auto` (default) | `metal` | `cuda` | `off`.
|
|
19
|
+
*
|
|
20
|
+
* The first `send()` call pays the model-load cost (typically 1–10 s
|
|
21
|
+
* for a 3B Q4 model on Apple Silicon). Subsequent calls reuse
|
|
22
|
+
* the same `LlamaModel` + `LlamaContext`. A fresh `LlamaChatSession`
|
|
23
|
+
* is created per request so context isn't leaked between unrelated
|
|
24
|
+
* tasks.
|
|
25
|
+
*
|
|
26
|
+
* Tests can inject a fake generator via `_overrideForTests` to avoid
|
|
27
|
+
* pulling in the native binding and a 2 GB model file.
|
|
28
|
+
*/
|
|
29
|
+
export class LlamaCppProvider extends AbstractAiProvider {
|
|
30
|
+
id = 'llamacpp';
|
|
31
|
+
name = 'llama.cpp (in-process)';
|
|
32
|
+
/** Test hook — bypasses the native binding when set. */
|
|
33
|
+
static _overrideForTests = null;
|
|
34
|
+
/**
|
|
35
|
+
* Reads the module-level cache to expose the active model path for
|
|
36
|
+
* tools that need it (mostly the disposer). Returns null when no
|
|
37
|
+
* model has been loaded in this process.
|
|
38
|
+
*/
|
|
39
|
+
static activeModelPath() {
|
|
40
|
+
return sharedLlamaState?.modelPath ?? null;
|
|
41
|
+
}
|
|
42
|
+
isReady() {
|
|
43
|
+
return resolveModelPath(this.config.model) !== null;
|
|
44
|
+
}
|
|
45
|
+
async send(request) {
|
|
46
|
+
const modelPath = resolveModelPath(request.model ?? this.config.model);
|
|
47
|
+
if (modelPath === null) {
|
|
48
|
+
return err(new AppErrorImpl(ERROR_CODES.INVALID_INPUT, 'LLAMACPP_MODEL_PATH is not set or the file does not exist.', {
|
|
49
|
+
suggestion: 'Set LLAMACPP_MODEL_PATH=/path/to/qwen2.5-coder-3b.gguf in .env, or pass --model <path> on the CLI.',
|
|
50
|
+
}));
|
|
51
|
+
}
|
|
52
|
+
if (LlamaCppProvider._overrideForTests) {
|
|
53
|
+
try {
|
|
54
|
+
const value = await LlamaCppProvider._overrideForTests(request, modelPath);
|
|
55
|
+
return ok(value);
|
|
56
|
+
}
|
|
57
|
+
catch (e) {
|
|
58
|
+
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Test override failed: ${e.message}`, {
|
|
59
|
+
cause: e,
|
|
60
|
+
}));
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
let promptAbort;
|
|
64
|
+
let promptTimer;
|
|
65
|
+
let promptTimedOut = false;
|
|
66
|
+
try {
|
|
67
|
+
const tf = (await import('node-llama-cpp'));
|
|
68
|
+
const { LlamaChatSession } = tf;
|
|
69
|
+
const { model, context } = await this.ensureLoaded(modelPath);
|
|
70
|
+
const sequence = context.getSequence();
|
|
71
|
+
const session = new LlamaChatSession({
|
|
72
|
+
contextSequence: sequence,
|
|
73
|
+
systemPrompt: collectSystemPrompt(request.messages),
|
|
74
|
+
});
|
|
75
|
+
// Prior assistant/user turns get fed into the session in order so
|
|
76
|
+
// the model sees the conversation history. The trailing user turn
|
|
77
|
+
// is what we ask `prompt()` to respond to.
|
|
78
|
+
const turns = nonSystemTurns(request.messages);
|
|
79
|
+
for (let i = 0; i < turns.length - 1; i += 1) {
|
|
80
|
+
const turn = turns[i];
|
|
81
|
+
if (turn.role === AiMessageRole.Assistant) {
|
|
82
|
+
// node-llama-cpp 3.x exposes session.addAssistantMessage in some
|
|
83
|
+
// versions; older versions don't. Best effort: skip silently.
|
|
84
|
+
const fn = session.addAssistantMessage;
|
|
85
|
+
if (typeof fn === 'function')
|
|
86
|
+
fn.call(session, turn.content);
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
// For user turns that aren't the trailing one, prime them so the
|
|
90
|
+
// assistant response gets folded back into the context too.
|
|
91
|
+
await session.prompt(turn.content, {
|
|
92
|
+
maxTokens: 1,
|
|
93
|
+
stopOnAbortSignal: true,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
const lastUser = turns[turns.length - 1];
|
|
97
|
+
const userPrompt = lastUser && lastUser.role === AiMessageRole.User ? lastUser.content : '';
|
|
98
|
+
const maxTokens = request.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
99
|
+
const wantsJson = !!request.responseFormat;
|
|
100
|
+
// When the caller wants JSON, ask llama.cpp to enforce it at
|
|
101
|
+
// sample time via a grammar. This eliminates a whole class of
|
|
102
|
+
// parse failures (preamble prose, trailing markdown, runaway
|
|
103
|
+
// continuation) that small models routinely produce. Best effort:
|
|
104
|
+
// if the grammar constructor isn't available in this version we
|
|
105
|
+
// fall back to plain prompting + trim.
|
|
106
|
+
let grammar = undefined;
|
|
107
|
+
if (wantsJson) {
|
|
108
|
+
try {
|
|
109
|
+
const Ctor = tf.LlamaJsonSchemaGrammar;
|
|
110
|
+
// CRITICAL: pass the *same* Llama instance the model was
|
|
111
|
+
// loaded with. node-llama-cpp rejects mixing grammars from
|
|
112
|
+
// one instance with a session from another ("The
|
|
113
|
+
// LlamaGrammar … was created with a different Llama
|
|
114
|
+
// instance"). Calling getLlama() again would also leak a
|
|
115
|
+
// second native Metal device, which then crashes the
|
|
116
|
+
// process on exit (`ggml_metal_device_free`).
|
|
117
|
+
const sharedLlama = sharedLlamaState?.llama;
|
|
118
|
+
if (Ctor && request.responseFormat?.schema && sharedLlama) {
|
|
119
|
+
grammar = new Ctor(sharedLlama, request.responseFormat.schema);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
grammar = undefined;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const start = Date.now();
|
|
127
|
+
const onChunk = request.onTokenStream;
|
|
128
|
+
// Per-call wall-clock timeout: abort the decode if it overruns so a
|
|
129
|
+
// slow model can't hang the command. node-llama-cpp honours an
|
|
130
|
+
// AbortSignal when `stopOnAbortSignal` is set.
|
|
131
|
+
const timeoutMs = request.timeoutMs ?? this.config.timeoutMs;
|
|
132
|
+
if (timeoutMs && timeoutMs > 0) {
|
|
133
|
+
promptAbort = new AbortController();
|
|
134
|
+
promptTimer = setTimeout(() => {
|
|
135
|
+
promptTimedOut = true;
|
|
136
|
+
promptAbort?.abort();
|
|
137
|
+
}, timeoutMs);
|
|
138
|
+
}
|
|
139
|
+
const text = await session.prompt(userPrompt, {
|
|
140
|
+
maxTokens,
|
|
141
|
+
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
|
142
|
+
...(wantsJson ? { trimWhitespaceSuffix: true } : {}),
|
|
143
|
+
...(grammar ? { grammar: grammar } : {}),
|
|
144
|
+
...(promptAbort ? { signal: promptAbort.signal, stopOnAbortSignal: true } : {}),
|
|
145
|
+
...(onChunk
|
|
146
|
+
? {
|
|
147
|
+
onTextChunk: (chunk) => {
|
|
148
|
+
try {
|
|
149
|
+
onChunk(chunk);
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
// never let a callback failure break inference
|
|
153
|
+
}
|
|
154
|
+
},
|
|
155
|
+
}
|
|
156
|
+
: {}),
|
|
157
|
+
});
|
|
158
|
+
const elapsedMs = Date.now() - start;
|
|
159
|
+
// Release the LlamaContext sequence so the next send() can take it.
|
|
160
|
+
// Without this we hit "No sequences left" on the second call. The
|
|
161
|
+
// LlamaModel + LlamaContext themselves stay loaded across calls.
|
|
162
|
+
const sessionDisposable = session;
|
|
163
|
+
if (typeof sessionDisposable.dispose === 'function')
|
|
164
|
+
sessionDisposable.dispose();
|
|
165
|
+
const seqDisposable = sequence;
|
|
166
|
+
if (typeof seqDisposable.dispose === 'function')
|
|
167
|
+
seqDisposable.dispose();
|
|
168
|
+
return ok({
|
|
169
|
+
content: text,
|
|
170
|
+
model: nodePath.basename(modelPath),
|
|
171
|
+
finishReason: 'stop',
|
|
172
|
+
usage: {
|
|
173
|
+
// node-llama-cpp does not surface input/output token counts in a
|
|
174
|
+
// stable v3 API path; we leave usage undefined and let callers
|
|
175
|
+
// approximate from char count if needed.
|
|
176
|
+
},
|
|
177
|
+
raw: { backend: 'node-llama-cpp', modelPath, elapsedMs },
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
catch (e) {
|
|
181
|
+
if (promptTimedOut) {
|
|
182
|
+
return err(new AppErrorImpl(ERROR_CODES.TIMEOUT, `node-llama-cpp decode exceeded the per-call timeout and was aborted.`, {
|
|
183
|
+
suggestion: 'The model is too slow for the budget. Try a smaller model, fewer --enhance-passes, or raise the budget.',
|
|
184
|
+
}));
|
|
185
|
+
}
|
|
186
|
+
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `node-llama-cpp call failed: ${e.message}`, {
|
|
187
|
+
cause: e,
|
|
188
|
+
suggestion: 'Verify LLAMACPP_MODEL_PATH points to a valid .gguf file readable by llama.cpp.',
|
|
189
|
+
}));
|
|
190
|
+
}
|
|
191
|
+
finally {
|
|
192
|
+
if (promptTimer)
|
|
193
|
+
clearTimeout(promptTimer);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
async ensureLoaded(modelPath) {
|
|
197
|
+
// Cached at MODULE scope so the disposer can find it on process
|
|
198
|
+
// exit. (Per-instance caching used to live here, but the disposer
|
|
199
|
+
// doesn't know which provider instance to ask.)
|
|
200
|
+
if (sharedLlamaState && sharedLlamaState.modelPath === modelPath) {
|
|
201
|
+
return { model: sharedLlamaState.model, context: sharedLlamaState.context };
|
|
202
|
+
}
|
|
203
|
+
if (sharedLlamaState) {
|
|
204
|
+
// Different model requested — tear down the old one before
|
|
205
|
+
// loading a new one. Best-effort; failures are tolerated.
|
|
206
|
+
await disposeLlamaCppRuntime();
|
|
207
|
+
}
|
|
208
|
+
const { getLlama } = (await import('node-llama-cpp'));
|
|
209
|
+
const llama = await getLlama({
|
|
210
|
+
gpu: resolveGpuChoice(this.config.baseUrl),
|
|
211
|
+
});
|
|
212
|
+
const model = await llama.loadModel({ modelPath });
|
|
213
|
+
const contextSize = Number.isFinite(this.config.timeoutMs)
|
|
214
|
+
? DEFAULT_CONTEXT_SIZE
|
|
215
|
+
: Number(process.env.LLAMACPP_CONTEXT_SIZE ?? DEFAULT_CONTEXT_SIZE);
|
|
216
|
+
const context = await model.createContext({ contextSize });
|
|
217
|
+
sharedLlamaState = { llama, model, context, modelPath };
|
|
218
|
+
return { model, context };
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
let sharedLlamaState = null;
|
|
222
|
+
/**
|
|
223
|
+
* Release the loaded llama.cpp model + context so the process can
|
|
224
|
+
* exit cleanly.
|
|
225
|
+
*
|
|
226
|
+
* Without this, the libc++ destructor for the Metal device list
|
|
227
|
+
* aborts on `exit()` with `ggml_metal_device_free` because the
|
|
228
|
+
* device list isn't empty — same shape of teardown crash as the
|
|
229
|
+
* ONNX mutex issue, different native library. Disposing in the
|
|
230
|
+
* order session → context → model → llama lets the destructors
|
|
231
|
+
* run while the JS runtime is still healthy.
|
|
232
|
+
*
|
|
233
|
+
* Safe to call multiple times. Safe to call when no model was
|
|
234
|
+
* loaded. Errors during dispose are swallowed (the alternative is
|
|
235
|
+
* the abort we're trying to prevent).
|
|
236
|
+
*/
|
|
237
|
+
export async function disposeLlamaCppRuntime() {
|
|
238
|
+
const state = sharedLlamaState;
|
|
239
|
+
sharedLlamaState = null;
|
|
240
|
+
if (!state)
|
|
241
|
+
return false;
|
|
242
|
+
// Context first — it holds the sequence pool that depends on the model.
|
|
243
|
+
await callMaybeDispose(state.context);
|
|
244
|
+
// Then the model, which depends on the llama runtime.
|
|
245
|
+
await callMaybeDispose(state.model);
|
|
246
|
+
// Finally the Llama instance itself (releases the Metal device).
|
|
247
|
+
await callMaybeDispose(state.llama);
|
|
248
|
+
// libggml/Metal was loaded — even after disposing, this Node version still
|
|
249
|
+
// runs the native static destructor during `exit()` and it can abort with a
|
|
250
|
+
// GGML backtrace. The caller redirects fd 2 to a log file to contain it.
|
|
251
|
+
return true;
|
|
252
|
+
}
|
|
253
|
+
async function callMaybeDispose(target) {
|
|
254
|
+
if (!target || typeof target !== 'object')
|
|
255
|
+
return;
|
|
256
|
+
const maybe = target;
|
|
257
|
+
if (typeof maybe.dispose !== 'function')
|
|
258
|
+
return;
|
|
259
|
+
try {
|
|
260
|
+
const r = maybe.dispose();
|
|
261
|
+
if (r && typeof r.then === 'function') {
|
|
262
|
+
await r.catch(() => undefined);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
catch {
|
|
266
|
+
// ignore
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
function resolveModelPath(explicit) {
|
|
270
|
+
const envPath = process.env.LLAMACPP_MODEL_PATH;
|
|
271
|
+
const candidate = explicit && explicit.length > 0 ? explicit : envPath;
|
|
272
|
+
if (!candidate)
|
|
273
|
+
return null;
|
|
274
|
+
if (nodePath.isAbsolute(candidate)) {
|
|
275
|
+
return existsSync(candidate) ? candidate : null;
|
|
276
|
+
}
|
|
277
|
+
const fromCwd = nodePath.resolve(process.cwd(), candidate);
|
|
278
|
+
return existsSync(fromCwd) ? fromCwd : null;
|
|
279
|
+
}
|
|
280
|
+
function resolveGpuChoice(_baseUrl) {
|
|
281
|
+
const choice = (process.env.LLAMACPP_GPU ?? 'auto').trim().toLowerCase();
|
|
282
|
+
if (choice === 'metal')
|
|
283
|
+
return 'metal';
|
|
284
|
+
if (choice === 'cuda')
|
|
285
|
+
return 'cuda';
|
|
286
|
+
if (choice === 'off' || choice === 'false' || choice === 'no' || choice === 'cpu')
|
|
287
|
+
return false;
|
|
288
|
+
return 'auto';
|
|
289
|
+
}
|
|
290
|
+
function collectSystemPrompt(messages) {
|
|
291
|
+
const parts = messages.filter((m) => m.role === AiMessageRole.System).map((m) => m.content);
|
|
292
|
+
return parts.join('\n\n');
|
|
293
|
+
}
|
|
294
|
+
function nonSystemTurns(messages) {
|
|
295
|
+
return messages.filter((m) => m.role !== AiMessageRole.System);
|
|
296
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { AiProviderKind } from './provider-resolver.js';
|
|
2
|
+
import type { IAiProvider } from './ai-provider.js';
|
|
3
|
+
export type AiHintLevel = 'setup' | 'upgrade' | 'info';
|
|
4
|
+
export interface IAiHint {
|
|
5
|
+
level: AiHintLevel;
|
|
6
|
+
title: string;
|
|
7
|
+
steps: readonly string[];
|
|
8
|
+
}
|
|
9
|
+
export interface IAiBlock {
|
|
10
|
+
reachable: boolean;
|
|
11
|
+
requestedProvider: AiProviderKind;
|
|
12
|
+
providerId: string | null;
|
|
13
|
+
enhancementSkipped: boolean;
|
|
14
|
+
hints: readonly IAiHint[];
|
|
15
|
+
}
|
|
16
|
+
export interface IBuildAiBlockInput {
|
|
17
|
+
/** What `selectAiProvider` returned, or null if the caller didn't try. */
|
|
18
|
+
selection?: {
|
|
19
|
+
requested: AiProviderKind;
|
|
20
|
+
provider: IAiProvider | null;
|
|
21
|
+
} | null;
|
|
22
|
+
/** True when --no-enhance was passed (user opted out — don't nag). */
|
|
23
|
+
userOptedOut?: boolean;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Produces the structured `ai` block that lives on every audit report
|
|
27
|
+
* and any command using `enrichWithLlmRecommendations`. Without the
|
|
28
|
+
* AI block, `--no-enhance` and "no provider reachable" look the same
|
|
29
|
+
* to a downstream agent. The block disambiguates.
|
|
30
|
+
*
|
|
31
|
+
* Lives in `@shrkcrft/ai` so any package (CLI, packs, MCP server's
|
|
32
|
+
* read-only surfaces) can construct the same shape.
|
|
33
|
+
*/
|
|
34
|
+
export declare function buildAiBlock(input?: IBuildAiBlockInput): IAiBlock;
|
|
35
|
+
export declare function renderAiBlockMarkdown(block: IAiBlock): string;
|
|
36
|
+
//# sourceMappingURL=llm-hints.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-hints.d.ts","sourceRoot":"","sources":["../src/llm-hints.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,MAAM,MAAM,WAAW,GAAG,OAAO,GAAG,SAAS,GAAG,MAAM,CAAC;AAEvD,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,WAAW,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,OAAO,CAAC;IACnB,iBAAiB,EAAE,cAAc,CAAC;IAClC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,kBAAkB,EAAE,OAAO,CAAC;IAC5B,KAAK,EAAE,SAAS,OAAO,EAAE,CAAC;CAC3B;AAED,MAAM,WAAW,kBAAkB;IACjC,0EAA0E;IAC1E,SAAS,CAAC,EAAE;QAAE,SAAS,EAAE,cAAc,CAAC;QAAC,QAAQ,EAAE,WAAW,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI,CAAC;IAC/E,sEAAsE;IACtE,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAAC,KAAK,GAAE,kBAAuB,GAAG,QAAQ,CAiErE;AAED,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,QAAQ,GAAG,MAAM,CAiB7D"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { selectAiProvider } from "./provider-resolver.js";
|
|
2
|
+
/**
|
|
3
|
+
* Produces the structured `ai` block that lives on every audit report
|
|
4
|
+
* and any command using `enrichWithLlmRecommendations`. Without the
|
|
5
|
+
* AI block, `--no-enhance` and "no provider reachable" look the same
|
|
6
|
+
* to a downstream agent. The block disambiguates.
|
|
7
|
+
*
|
|
8
|
+
* Lives in `@shrkcrft/ai` so any package (CLI, packs, MCP server's
|
|
9
|
+
* read-only surfaces) can construct the same shape.
|
|
10
|
+
*/
|
|
11
|
+
export function buildAiBlock(input = {}) {
|
|
12
|
+
// Honour an explicitly-passed selection (including {provider: null} when
|
|
13
|
+
// --no-enhance is in play) without re-probing the auto chain. Only fall
|
|
14
|
+
// back to live probing when the caller didn't supply a selection at all.
|
|
15
|
+
const selection = input.selection !== undefined && input.selection !== null
|
|
16
|
+
? input.selection
|
|
17
|
+
: input.userOptedOut
|
|
18
|
+
? { requested: 'auto', provider: null }
|
|
19
|
+
: selectAiProvider(undefined);
|
|
20
|
+
const reachable = selection.provider !== null;
|
|
21
|
+
const providerId = selection.provider?.id ?? null;
|
|
22
|
+
const requested = selection.requested;
|
|
23
|
+
const userOptedOut = Boolean(input.userOptedOut);
|
|
24
|
+
const hints = [];
|
|
25
|
+
if (!reachable && !userOptedOut) {
|
|
26
|
+
hints.push({
|
|
27
|
+
level: 'setup',
|
|
28
|
+
title: 'Enable LLM enrichment for deeper analysis',
|
|
29
|
+
steps: [
|
|
30
|
+
'Local-first: install Ollama (https://ollama.com/download) or set LLAMACPP_MODEL_PATH for in-process inference.',
|
|
31
|
+
'Pull a model that fits your machine — e.g. `ollama pull llama3.2` (good general-purpose) or `ollama pull qwen2.5-coder:7b` (code-aware).',
|
|
32
|
+
'Optional: export OLLAMA_HOST=http://localhost:11434 (default) or point at a remote daemon.',
|
|
33
|
+
'Optional: export OLLAMA_MODEL=<id> to pin the model used by shrk.',
|
|
34
|
+
'Re-run without --no-enhance. The deterministic findings are unchanged; LLM critique appears under `llmFindings`.',
|
|
35
|
+
],
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
else if (!reachable && userOptedOut) {
|
|
39
|
+
hints.push({
|
|
40
|
+
level: 'info',
|
|
41
|
+
title: 'LLM enrichment disabled by --no-enhance',
|
|
42
|
+
steps: [
|
|
43
|
+
'Deterministic findings are first-class; LLM is purely additive.',
|
|
44
|
+
'Drop --no-enhance to layer LLM critique on top when a provider is available.',
|
|
45
|
+
],
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
hints.push({
|
|
50
|
+
level: 'info',
|
|
51
|
+
title: `LLM enrichment active via ${providerId}`,
|
|
52
|
+
steps: [
|
|
53
|
+
'LLM-derived findings appear with `[llm]` tags and a confidence score.',
|
|
54
|
+
'Tune behavior: --provider ollama|llamacpp, --model <id>, AI_PROVIDER env var (overrides --provider when unset).',
|
|
55
|
+
],
|
|
56
|
+
});
|
|
57
|
+
hints.push({
|
|
58
|
+
level: 'upgrade',
|
|
59
|
+
title: 'Sharpen LLM output if findings feel thin',
|
|
60
|
+
steps: [
|
|
61
|
+
'Prefer a code-aware model for technical staleness checks (e.g. qwen2.5-coder:7b, deepseek-coder-v2).',
|
|
62
|
+
'Larger models notice more drift but cost latency — try 7B for code, 14B+ for nuanced doc-content review.',
|
|
63
|
+
'For fix-plan enrichment, the same provider is reused; no separate config needed.',
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
return {
|
|
68
|
+
reachable,
|
|
69
|
+
requestedProvider: requested,
|
|
70
|
+
providerId,
|
|
71
|
+
enhancementSkipped: userOptedOut,
|
|
72
|
+
hints,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function renderAiBlockMarkdown(block) {
|
|
76
|
+
const out = [];
|
|
77
|
+
const status = block.reachable
|
|
78
|
+
? `active via \`${block.providerId}\``
|
|
79
|
+
: block.enhancementSkipped
|
|
80
|
+
? 'disabled by `--no-enhance`'
|
|
81
|
+
: 'unavailable (no local LLM detected)';
|
|
82
|
+
out.push(`## AI configuration — ${status}`);
|
|
83
|
+
out.push('');
|
|
84
|
+
for (const hint of block.hints) {
|
|
85
|
+
out.push(`### [${hint.level}] ${hint.title}`);
|
|
86
|
+
for (const step of hint.steps) {
|
|
87
|
+
out.push(`- ${step}`);
|
|
88
|
+
}
|
|
89
|
+
out.push('');
|
|
90
|
+
}
|
|
91
|
+
return out.join('\n');
|
|
92
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import type { IAiProvider } from './ai-provider.js';
|
|
2
|
+
import { type IAiBlock } from './llm-hints.js';
|
|
3
|
+
export type RecommendationSeverity = 'info' | 'warn' | 'error';
|
|
4
|
+
export interface ILlmRecommendation {
|
|
5
|
+
severity: RecommendationSeverity;
|
|
6
|
+
category: string;
|
|
7
|
+
/** Short, one-sentence description of what's recommended. */
|
|
8
|
+
title: string;
|
|
9
|
+
/** Detailed prose; typically 1-3 sentences with concrete next-steps. */
|
|
10
|
+
detail: string;
|
|
11
|
+
/** Optional target identifier (rule id, template id, file path) the recommendation applies to. */
|
|
12
|
+
target?: string;
|
|
13
|
+
/** Confidence in [0, 1]; lower for fuzzier judgments. */
|
|
14
|
+
confidence: number;
|
|
15
|
+
}
|
|
16
|
+
export interface IRecommendationEnvelope {
|
|
17
|
+
/** Always present, even when LLM is unavailable. */
|
|
18
|
+
ai: IAiBlock;
|
|
19
|
+
recommendations: readonly ILlmRecommendation[];
|
|
20
|
+
}
|
|
21
|
+
export interface IEnrichWithLlmRecommendationsInput {
|
|
22
|
+
/**
|
|
23
|
+
* The shape of the deterministic surface (e.g., 'doctor', 'templates-drift').
|
|
24
|
+
* Used in the LLM prompt so the model knows what it's looking at.
|
|
25
|
+
*/
|
|
26
|
+
surface: string;
|
|
27
|
+
/**
|
|
28
|
+
* Human-readable description of the deterministic findings (what's already
|
|
29
|
+
* known). Should be tight — the prompt fits into one LLM call.
|
|
30
|
+
*/
|
|
31
|
+
deterministicSummary: string;
|
|
32
|
+
/**
|
|
33
|
+
* Provider kind to request. Defaults to 'auto' (local-first walk).
|
|
34
|
+
*/
|
|
35
|
+
providerKind?: string;
|
|
36
|
+
/**
|
|
37
|
+
* Override the auto-selection by passing an already-resolved provider
|
|
38
|
+
* (useful for tests).
|
|
39
|
+
*/
|
|
40
|
+
providerOverride?: IAiProvider | null;
|
|
41
|
+
/**
|
|
42
|
+
* True when the caller's --no-enhance equivalent was passed.
|
|
43
|
+
* When true, no LLM call is made and the AI block records the opt-out.
|
|
44
|
+
*/
|
|
45
|
+
userOptedOut?: boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Per-surface ask: what should the LLM produce on top of the
|
|
48
|
+
* deterministic summary? E.g. "for each warning, produce one concrete
|
|
49
|
+
* next-step the user can run from the CLI."
|
|
50
|
+
*/
|
|
51
|
+
ask: string;
|
|
52
|
+
/**
|
|
53
|
+
* Optional override for the model used by the provider.
|
|
54
|
+
*/
|
|
55
|
+
model?: string;
|
|
56
|
+
maxTokens?: number;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Shared utility for layering LLM recommendations onto any deterministic
|
|
60
|
+
* surface. The deterministic portion is the caller's responsibility; this
|
|
61
|
+
* helper only adds the `ai` block and a structured `recommendations` array.
|
|
62
|
+
*
|
|
63
|
+
* Hard guarantee: if no LLM is reachable (or `userOptedOut` is true), the
|
|
64
|
+
* call is a no-op apart from emitting the `ai` block with setup hints.
|
|
65
|
+
*
|
|
66
|
+
* Lives in `@shrkcrft/ai` so any callable surface (CLI commands, packs,
|
|
67
|
+
* read-only MCP tools that want recommendations alongside their data)
|
|
68
|
+
* can reuse the same envelope shape.
|
|
69
|
+
*/
|
|
70
|
+
export declare function enrichWithLlmRecommendations(input: IEnrichWithLlmRecommendationsInput): Promise<IRecommendationEnvelope>;
|
|
71
|
+
export declare function renderRecommendationsMarkdown(envelope: IRecommendationEnvelope): string;
|
|
72
|
+
//# sourceMappingURL=llm-recommendations.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-recommendations.d.ts","sourceRoot":"","sources":["../src/llm-recommendations.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,OAAO,EAAgB,KAAK,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,MAAM,sBAAsB,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;AAE/D,MAAM,WAAW,kBAAkB;IACjC,QAAQ,EAAE,sBAAsB,CAAC;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,6DAA6D;IAC7D,KAAK,EAAE,MAAM,CAAC;IACd,wEAAwE;IACxE,MAAM,EAAE,MAAM,CAAC;IACf,kGAAkG;IAClG,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,yDAAyD;IACzD,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,uBAAuB;IACtC,oDAAoD;IACpD,EAAE,EAAE,QAAQ,CAAC;IACb,eAAe,EAAE,SAAS,kBAAkB,EAAE,CAAC;CAChD;AAED,MAAM,WAAW,kCAAkC;IACjD;;;OAGG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;OAGG;IACH,oBAAoB,EAAE,MAAM,CAAC;IAC7B;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;OAGG;IACH,gBAAgB,CAAC,EAAE,WAAW,GAAG,IAAI,CAAC;IACtC;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;;;OAIG;IACH,GAAG,EAAE,MAAM,CAAC;IACZ;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,4BAA4B,CAChD,KAAK,EAAE,kCAAkC,GACxC,OAAO,CAAC,uBAAuB,CAAC,CAoClC;AA2FD,wBAAgB,6BAA6B,CAAC,QAAQ,EAAE,uBAAuB,GAAG,MAAM,CA+BvF"}
|