@shrkcrft/ai 0.1.0-alpha.13 → 0.1.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-request.d.ts +8 -0
- package/dist/ai-request.d.ts.map +1 -1
- package/dist/llamacpp/llama-cpp-provider.d.ts +1 -1
- package/dist/llamacpp/llama-cpp-provider.d.ts.map +1 -1
- package/dist/llamacpp/llama-cpp-provider.js +29 -1
- package/dist/ollama/ollama-provider.d.ts.map +1 -1
- package/dist/ollama/ollama-provider.js +24 -0
- package/dist/pipeline/enhancement-pipeline.d.ts +28 -0
- package/dist/pipeline/enhancement-pipeline.d.ts.map +1 -1
- package/dist/pipeline/enhancement-pipeline.js +44 -0
- package/package.json +3 -3
package/dist/ai-request.d.ts
CHANGED
|
@@ -23,6 +23,14 @@ export interface IAiRequest {
|
|
|
23
23
|
* progress without the synchronous wait.
|
|
24
24
|
*/
|
|
25
25
|
onTokenStream?: (chunk: string) => void;
|
|
26
|
+
/**
|
|
27
|
+
* Per-call wall-clock timeout in milliseconds. When set and exceeded, the
|
|
28
|
+
* provider aborts the in-flight request and returns an `AppError` with code
|
|
29
|
+
* `TIMEOUT`. Bounds slow local models so a single call can never hang the
|
|
30
|
+
* command. Takes precedence over the provider's `config.timeoutMs`; when
|
|
31
|
+
* neither is set, no timeout is applied.
|
|
32
|
+
*/
|
|
33
|
+
timeoutMs?: number;
|
|
26
34
|
}
|
|
27
35
|
export interface IAiResponse {
|
|
28
36
|
content: string;
|
package/dist/ai-request.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-request.d.ts","sourceRoot":"","sources":["../src/ai-request.ts"],"names":[],"mappings":"AAAA,oBAAY,aAAa;IACvB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,SAAS,cAAc;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,aAAa,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,SAAS,UAAU,EAAE,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,iBAAiB,CAAC;IACnC;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"ai-request.d.ts","sourceRoot":"","sources":["../src/ai-request.ts"],"names":[],"mappings":"AAAA,oBAAY,aAAa;IACvB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,SAAS,cAAc;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,aAAa,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,SAAS,UAAU,EAAE,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,iBAAiB,CAAC;IACnC;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC;;;;;;OAMG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,GAAG,CAAC,EAAE,OAAO,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,aAAa,GAAG,aAAa,CAAC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
|
|
@@ -52,5 +52,5 @@ export declare class LlamaCppProvider extends AbstractAiProvider {
|
|
|
52
52
|
* loaded. Errors during dispose are swallowed (the alternative is
|
|
53
53
|
* the abort we're trying to prevent).
|
|
54
54
|
*/
|
|
55
|
-
export declare function disposeLlamaCppRuntime(): Promise<
|
|
55
|
+
export declare function disposeLlamaCppRuntime(): Promise<boolean>;
|
|
56
56
|
//# sourceMappingURL=llama-cpp-provider.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llama-cpp-provider.d.ts","sourceRoot":"","sources":["../../src/llamacpp/llama-cpp-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAkC,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAKrG;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,gBAAiB,SAAQ,kBAAkB;IACtD,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,wDAAwD;IACxD,MAAM,CAAC,iBAAiB,EACpB,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC,GAClE,IAAI,CAAQ;IAEhB;;;;OAIG;IACH,MAAM,CAAC,eAAe,IAAI,MAAM,GAAG,IAAI;IAIvC,OAAO,IAAI,OAAO;IAIZ,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"llama-cpp-provider.d.ts","sourceRoot":"","sources":["../../src/llamacpp/llama-cpp-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAkC,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAKrG;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,gBAAiB,SAAQ,kBAAkB;IACtD,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,wDAAwD;IACxD,MAAM,CAAC,iBAAiB,EACpB,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC,GAClE,IAAI,CAAQ;IAEhB;;;;OAIG;IACH,MAAM,CAAC,eAAe,IAAI,MAAM,GAAG,IAAI;IAIvC,OAAO,IAAI,OAAO;IAIZ,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YAwKzD,YAAY;CAwB3B;AAWD;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,OAAO,CAAC,CAc/D"}
|
|
@@ -60,6 +60,9 @@ export class LlamaCppProvider extends AbstractAiProvider {
|
|
|
60
60
|
}));
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
|
+
let promptAbort;
|
|
64
|
+
let promptTimer;
|
|
65
|
+
let promptTimedOut = false;
|
|
63
66
|
try {
|
|
64
67
|
const tf = (await import('node-llama-cpp'));
|
|
65
68
|
const { LlamaChatSession } = tf;
|
|
@@ -122,11 +125,23 @@ export class LlamaCppProvider extends AbstractAiProvider {
|
|
|
122
125
|
}
|
|
123
126
|
const start = Date.now();
|
|
124
127
|
const onChunk = request.onTokenStream;
|
|
128
|
+
// Per-call wall-clock timeout: abort the decode if it overruns so a
|
|
129
|
+
// slow model can't hang the command. node-llama-cpp honours an
|
|
130
|
+
// AbortSignal when `stopOnAbortSignal` is set.
|
|
131
|
+
const timeoutMs = request.timeoutMs ?? this.config.timeoutMs;
|
|
132
|
+
if (timeoutMs && timeoutMs > 0) {
|
|
133
|
+
promptAbort = new AbortController();
|
|
134
|
+
promptTimer = setTimeout(() => {
|
|
135
|
+
promptTimedOut = true;
|
|
136
|
+
promptAbort?.abort();
|
|
137
|
+
}, timeoutMs);
|
|
138
|
+
}
|
|
125
139
|
const text = await session.prompt(userPrompt, {
|
|
126
140
|
maxTokens,
|
|
127
141
|
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
|
128
142
|
...(wantsJson ? { trimWhitespaceSuffix: true } : {}),
|
|
129
143
|
...(grammar ? { grammar: grammar } : {}),
|
|
144
|
+
...(promptAbort ? { signal: promptAbort.signal, stopOnAbortSignal: true } : {}),
|
|
130
145
|
...(onChunk
|
|
131
146
|
? {
|
|
132
147
|
onTextChunk: (chunk) => {
|
|
@@ -163,11 +178,20 @@ export class LlamaCppProvider extends AbstractAiProvider {
|
|
|
163
178
|
});
|
|
164
179
|
}
|
|
165
180
|
catch (e) {
|
|
181
|
+
if (promptTimedOut) {
|
|
182
|
+
return err(new AppErrorImpl(ERROR_CODES.TIMEOUT, `node-llama-cpp decode exceeded the per-call timeout and was aborted.`, {
|
|
183
|
+
suggestion: 'The model is too slow for the budget. Try a smaller model, fewer --enhance-passes, or raise the budget.',
|
|
184
|
+
}));
|
|
185
|
+
}
|
|
166
186
|
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `node-llama-cpp call failed: ${e.message}`, {
|
|
167
187
|
cause: e,
|
|
168
188
|
suggestion: 'Verify LLAMACPP_MODEL_PATH points to a valid .gguf file readable by llama.cpp.',
|
|
169
189
|
}));
|
|
170
190
|
}
|
|
191
|
+
finally {
|
|
192
|
+
if (promptTimer)
|
|
193
|
+
clearTimeout(promptTimer);
|
|
194
|
+
}
|
|
171
195
|
}
|
|
172
196
|
async ensureLoaded(modelPath) {
|
|
173
197
|
// Cached at MODULE scope so the disposer can find it on process
|
|
@@ -214,13 +238,17 @@ export async function disposeLlamaCppRuntime() {
|
|
|
214
238
|
const state = sharedLlamaState;
|
|
215
239
|
sharedLlamaState = null;
|
|
216
240
|
if (!state)
|
|
217
|
-
return;
|
|
241
|
+
return false;
|
|
218
242
|
// Context first — it holds the sequence pool that depends on the model.
|
|
219
243
|
await callMaybeDispose(state.context);
|
|
220
244
|
// Then the model, which depends on the llama runtime.
|
|
221
245
|
await callMaybeDispose(state.model);
|
|
222
246
|
// Finally the Llama instance itself (releases the Metal device).
|
|
223
247
|
await callMaybeDispose(state.llama);
|
|
248
|
+
// libggml/Metal was loaded — even after disposing, this Node version still
|
|
249
|
+
// runs the native static destructor during `exit()` and it can abort with a
|
|
250
|
+
// GGML backtrace. The caller redirects fd 2 to a log file to contain it.
|
|
251
|
+
return true;
|
|
224
252
|
}
|
|
225
253
|
async function callMaybeDispose(target) {
|
|
226
254
|
if (!target || typeof target !== 'object')
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ollama-provider.d.ts","sourceRoot":"","sources":["../../src/ollama/ollama-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAiB,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAMpF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,cAAe,SAAQ,kBAAkB;IACpD,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,IAAI,OAAO;IAIlB;;;;;;;;;;;OAWG;IACG,WAAW,CACf,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAAC,YAAY,EAAE,OAAO,GAAG,IAAI,CAAA;KAAE,EAAE,QAAQ,CAAC,CAAC;IA+BxF,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"ollama-provider.d.ts","sourceRoot":"","sources":["../../src/ollama/ollama-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAiB,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAMpF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,cAAe,SAAQ,kBAAkB;IACpD,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,IAAI,OAAO;IAIlB;;;;;;;;;;;OAWG;IACG,WAAW,CACf,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAAC,YAAY,EAAE,OAAO,GAAG,IAAI,CAAA;KAAE,EAAE,QAAQ,CAAC,CAAC;IA+BxF,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;CAgGxE"}
|
|
@@ -81,11 +81,26 @@ export class OllamaProvider extends AbstractAiProvider {
|
|
|
81
81
|
const format = formatFor(request.responseFormat);
|
|
82
82
|
if (format !== undefined)
|
|
83
83
|
body.format = format;
|
|
84
|
+
// Per-call wall-clock timeout. Without this a slow local model (a large
|
|
85
|
+
// 20B+ model, or one still loading) hangs the request indefinitely — the
|
|
86
|
+
// root cause of `smart-context` "running too long". Manual controller +
|
|
87
|
+
// timer (rather than AbortSignal.timeout) so the catch can distinguish a
|
|
88
|
+
// timeout from an unrelated network error.
|
|
89
|
+
const timeoutMs = request.timeoutMs ?? this.config.timeoutMs;
|
|
90
|
+
const controller = timeoutMs && timeoutMs > 0 ? new AbortController() : undefined;
|
|
91
|
+
let timedOut = false;
|
|
92
|
+
const timer = controller && timeoutMs
|
|
93
|
+
? setTimeout(() => {
|
|
94
|
+
timedOut = true;
|
|
95
|
+
controller.abort();
|
|
96
|
+
}, timeoutMs)
|
|
97
|
+
: undefined;
|
|
84
98
|
try {
|
|
85
99
|
const res = await fetch(`${baseUrl}/api/chat`, {
|
|
86
100
|
method: 'POST',
|
|
87
101
|
headers: { 'content-type': 'application/json' },
|
|
88
102
|
body: JSON.stringify(body),
|
|
103
|
+
...(controller ? { signal: controller.signal } : {}),
|
|
89
104
|
});
|
|
90
105
|
if (!res.ok) {
|
|
91
106
|
const text = await res.text();
|
|
@@ -107,11 +122,20 @@ export class OllamaProvider extends AbstractAiProvider {
|
|
|
107
122
|
});
|
|
108
123
|
}
|
|
109
124
|
catch (e) {
|
|
125
|
+
if (timedOut) {
|
|
126
|
+
return err(new AppErrorImpl(ERROR_CODES.TIMEOUT, `Ollama call exceeded ${timeoutMs}ms and was aborted (model "${model}").`, {
|
|
127
|
+
suggestion: `The model is too slow for the budget. Try a smaller --model, fewer --enhance-passes, or raise the budget.`,
|
|
128
|
+
}));
|
|
129
|
+
}
|
|
110
130
|
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Failed to call Ollama at ${baseUrl}: ${e.message}`, {
|
|
111
131
|
cause: e,
|
|
112
132
|
suggestion: `Is Ollama running? Try \`ollama serve\` or set OLLAMA_HOST to a reachable instance.`,
|
|
113
133
|
}));
|
|
114
134
|
}
|
|
135
|
+
finally {
|
|
136
|
+
if (timer)
|
|
137
|
+
clearTimeout(timer);
|
|
138
|
+
}
|
|
115
139
|
}
|
|
116
140
|
}
|
|
117
141
|
function roleFor(role) {
|
|
@@ -53,6 +53,19 @@ export interface IEnhancementPipelineOptions {
|
|
|
53
53
|
temperature?: number;
|
|
54
54
|
/** Override the model selection (forwarded to the provider per call). */
|
|
55
55
|
model?: string;
|
|
56
|
+
/**
|
|
57
|
+
* Total wall-clock budget (ms) for the whole pipeline. Before each stage the
|
|
58
|
+
* elapsed time is checked; once the budget is spent the pipeline stops and
|
|
59
|
+
* returns the best output so far (degrading to the deterministic seed if not
|
|
60
|
+
* even the draft finished). Undefined = no budget (legacy behaviour).
|
|
61
|
+
*/
|
|
62
|
+
budgetMs?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Per-call timeout (ms) handed to the provider for each stage. Effective
|
|
65
|
+
* timeout is `min(perStageTimeoutMs, remaining budget)`. Bounds a single
|
|
66
|
+
* slow call so it can't blow the whole budget.
|
|
67
|
+
*/
|
|
68
|
+
perStageTimeoutMs?: number;
|
|
56
69
|
/** Optional progress hook — called once per stage. */
|
|
57
70
|
onStage?: (event: {
|
|
58
71
|
kind: EnhancementStageKind;
|
|
@@ -77,6 +90,11 @@ export interface IEnhancementPipelineRun {
|
|
|
77
90
|
* the deterministic seed unchanged.
|
|
78
91
|
*/
|
|
79
92
|
deterministicFallback: boolean;
|
|
93
|
+
/**
|
|
94
|
+
* True when the wall-clock `budgetMs` was reached before every planned
|
|
95
|
+
* stage ran. `finalOutput` still holds the best result produced so far.
|
|
96
|
+
*/
|
|
97
|
+
budgetExhausted: boolean;
|
|
80
98
|
}
|
|
81
99
|
/**
|
|
82
100
|
* Multi-pass refinement pipeline that turns a deterministic brief into
|
|
@@ -120,4 +138,14 @@ export declare class EnhancementPipeline {
|
|
|
120
138
|
* grounding.
|
|
121
139
|
*/
|
|
122
140
|
export declare function buildDefaultEnhancementStages(): IEnhancementStage[];
|
|
141
|
+
/**
|
|
142
|
+
* The fast default for interactive use: `draft → polish` (2 calls). Skips the
|
|
143
|
+
* slow critique + refine round-trip (the two passes small/large local models
|
|
144
|
+
* spend the most wall-clock on) while still applying the polish pass that
|
|
145
|
+
* gives the agent file:line refs and terse imperative bullets. Materially
|
|
146
|
+
* better than a single shot, ~half the calls of the full pipeline. Callers who
|
|
147
|
+
* want maximal density opt into `buildDefaultEnhancementStages()` (the
|
|
148
|
+
* `--plus` path).
|
|
149
|
+
*/
|
|
150
|
+
export declare function buildFastEnhancementStages(): IEnhancementStage[];
|
|
123
151
|
//# sourceMappingURL=enhancement-pipeline.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"enhancement-pipeline.d.ts","sourceRoot":"","sources":["../../src/pipeline/enhancement-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAiB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;GAMG;AACH,oBAAY,oBAAoB;IAC9B,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,MAAM,WAAW;IACjB,MAAM,WAAW;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,QAAQ,EAAE,MAAM,CAAC;IACjB,iEAAiE;IACjE,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,oBAAoB,CAAC;IAC3B;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,sBAAsB,GAAG,UAAU,EAAE,CAAC;CAC5D;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,uEAAuE;IACvE,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CACzD;AAED,MAAM,WAAW,2BAA2B;IAC1C,6EAA6E;IAC7E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sDAAsD;IACtD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,oBAAoB,CAAC;QAAC,EAAE,EAAE,OAAO,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;CACrG;AAED,MAAM,WAAW,uBAAuB;IACtC,uGAAuG;IACvG,WAAW,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,MAAM,EAAE,uBAAuB,EAAE,CAAC;IAClC,4EAA4E;IAC5E,UAAU,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"enhancement-pipeline.d.ts","sourceRoot":"","sources":["../../src/pipeline/enhancement-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAiB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;GAMG;AACH,oBAAY,oBAAoB;IAC9B,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,MAAM,WAAW;IACjB,MAAM,WAAW;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,QAAQ,EAAE,MAAM,CAAC;IACjB,iEAAiE;IACjE,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,oBAAoB,CAAC;IAC3B;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,sBAAsB,GAAG,UAAU,EAAE,CAAC;CAC5D;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,uEAAuE;IACvE,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CACzD;AAED,MAAM,WAAW,2BAA2B;IAC1C,6EAA6E;IAC7E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,sDAAsD;IACtD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,oBAAoB,CAAC;QAAC,EAAE,EAAE,OAAO,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;CACrG;AAED,MAAM,WAAW,uBAAuB;IACtC,uGAAuG;IACvG,WAAW,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,MAAM,EAAE,uBAAuB,EAAE,CAAC;IAClC,4EAA4E;IAC5E,UAAU,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAC/B;;;OAGG;IACH,eAAe,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmC;gBAE9C,MAAM,EAAE,aAAa,CAAC,iBAAiB,CAAC;IAI9C,GAAG,CACP,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,eAAe,EAAE,MAAM,CAAA;KAAE,EAChD,QAAQ,EAAE,WAAW,GAAG,IAAI,EAC5B,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,MAAM,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;CAkGtD;AAgBD;;;;;;;;GAQG;AACH,wBAAgB,6BAA6B,IAAI,iBAAiB,EAAE,CAOnE;AAED;;;;;;;;GAQG;AACH,wBAAgB,0BAA0B,IAAI,iBAAiB,EAAE,CAEhE"}
|
|
@@ -50,6 +50,7 @@ export class EnhancementPipeline {
|
|
|
50
50
|
stages: [],
|
|
51
51
|
totalUsage: { inputTokens: 0, outputTokens: 0 },
|
|
52
52
|
deterministicFallback: true,
|
|
53
|
+
budgetExhausted: false,
|
|
53
54
|
});
|
|
54
55
|
}
|
|
55
56
|
const cap = options.maxPasses ?? this.stages.length;
|
|
@@ -59,7 +60,16 @@ export class EnhancementPipeline {
|
|
|
59
60
|
let previous = '';
|
|
60
61
|
let lastCritique;
|
|
61
62
|
let lastGood = input.originalContext;
|
|
63
|
+
const startedAt = Date.now();
|
|
64
|
+
let budgetExhausted = false;
|
|
62
65
|
for (let i = 0; i < plan.length; i += 1) {
|
|
66
|
+
// Wall-clock budget guard: stop before starting a stage we have no time
|
|
67
|
+
// for, and keep the best output produced so far.
|
|
68
|
+
const remaining = options.budgetMs !== undefined ? options.budgetMs - (Date.now() - startedAt) : undefined;
|
|
69
|
+
if (remaining !== undefined && remaining <= MIN_STAGE_BUDGET_MS) {
|
|
70
|
+
budgetExhausted = true;
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
63
73
|
const stage = plan[i];
|
|
64
74
|
const messages = stage.buildMessages({
|
|
65
75
|
originalContext: input.originalContext,
|
|
@@ -67,11 +77,14 @@ export class EnhancementPipeline {
|
|
|
67
77
|
previous,
|
|
68
78
|
lastCritique,
|
|
69
79
|
});
|
|
80
|
+
// Effective per-call timeout = min(configured per-stage, remaining budget).
|
|
81
|
+
const perStageTimeout = effectiveTimeout(options.perStageTimeoutMs, remaining);
|
|
70
82
|
const stageResult = await callOnceWithRetry(provider, {
|
|
71
83
|
messages,
|
|
72
84
|
maxTokens: options.maxTokensPerStage ?? 4096,
|
|
73
85
|
temperature: options.temperature ?? 0.2,
|
|
74
86
|
...(options.model ? { model: options.model } : {}),
|
|
87
|
+
...(perStageTimeout !== undefined ? { timeoutMs: perStageTimeout } : {}),
|
|
75
88
|
});
|
|
76
89
|
const onStage = options.onStage;
|
|
77
90
|
if (!stageResult.ok) {
|
|
@@ -120,9 +133,23 @@ export class EnhancementPipeline {
|
|
|
120
133
|
stages: stagesOut,
|
|
121
134
|
totalUsage,
|
|
122
135
|
deterministicFallback: false,
|
|
136
|
+
budgetExhausted,
|
|
123
137
|
});
|
|
124
138
|
}
|
|
125
139
|
}
|
|
140
|
+
/** Don't start a stage with less than this much budget left (a call needs at
|
|
141
|
+
* least this long to have any chance of returning). */
|
|
142
|
+
const MIN_STAGE_BUDGET_MS = 250;
|
|
143
|
+
/**
|
|
144
|
+
* Effective per-call timeout: the tighter of an explicit per-stage cap and the
|
|
145
|
+
* remaining wall-clock budget. Returns undefined when neither is set.
|
|
146
|
+
*/
|
|
147
|
+
function effectiveTimeout(perStage, remaining) {
|
|
148
|
+
const candidates = [perStage, remaining].filter((n) => typeof n === 'number' && n > 0);
|
|
149
|
+
if (candidates.length === 0)
|
|
150
|
+
return undefined;
|
|
151
|
+
return Math.min(...candidates);
|
|
152
|
+
}
|
|
126
153
|
/**
|
|
127
154
|
* The default stage set for "make this brief more useful to the Claude
|
|
128
155
|
* agent". Tuned for small local models (Qwen2.5-Coder-3B, Llama-3.1-8B).
|
|
@@ -140,6 +167,18 @@ export function buildDefaultEnhancementStages() {
|
|
|
140
167
|
new PolishStage(),
|
|
141
168
|
];
|
|
142
169
|
}
|
|
170
|
+
/**
|
|
171
|
+
* The fast default for interactive use: `draft → polish` (2 calls). Skips the
|
|
172
|
+
* slow critique + refine round-trip (the two passes small/large local models
|
|
173
|
+
* spend the most wall-clock on) while still applying the polish pass that
|
|
174
|
+
* gives the agent file:line refs and terse imperative bullets. Materially
|
|
175
|
+
* better than a single shot, ~half the calls of the full pipeline. Callers who
|
|
176
|
+
* want maximal density opt into `buildDefaultEnhancementStages()` (the
|
|
177
|
+
* `--plus` path).
|
|
178
|
+
*/
|
|
179
|
+
export function buildFastEnhancementStages() {
|
|
180
|
+
return [new DraftStage(), new PolishStage()];
|
|
181
|
+
}
|
|
143
182
|
class DraftStage {
|
|
144
183
|
kind = EnhancementStageKind.Draft;
|
|
145
184
|
buildMessages(input) {
|
|
@@ -285,6 +324,11 @@ async function callOnceWithRetry(provider, request) {
|
|
|
285
324
|
if (first.ok) {
|
|
286
325
|
return ok({ content: first.value.content, model: first.value.model, usage: first.value.usage });
|
|
287
326
|
}
|
|
327
|
+
// Don't retry a timeout — the model is too slow for the budget, so a second
|
|
328
|
+
// attempt just burns another timeout period. Surface the timeout immediately.
|
|
329
|
+
if (first.error.code === ERROR_CODES.TIMEOUT) {
|
|
330
|
+
return first;
|
|
331
|
+
}
|
|
288
332
|
// One retry — small local models routinely 500 on the first request
|
|
289
333
|
// after a daemon start. Idempotent reissue is safe.
|
|
290
334
|
const second = await provider.send(request);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shrkcrft/ai",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.15",
|
|
4
4
|
"description": "SharkCraft local LLM provider abstraction: Ollama (HTTP) + llama.cpp (in-process) + multi-pass enhancement pipeline.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "SharkCraft contributors",
|
|
@@ -43,8 +43,8 @@
|
|
|
43
43
|
"typecheck": "tsc --noEmit -p tsconfig.json"
|
|
44
44
|
},
|
|
45
45
|
"dependencies": {
|
|
46
|
-
"@shrkcrft/core": "^0.1.0-alpha.
|
|
47
|
-
"@shrkcrft/context": "^0.1.0-alpha.
|
|
46
|
+
"@shrkcrft/core": "^0.1.0-alpha.15",
|
|
47
|
+
"@shrkcrft/context": "^0.1.0-alpha.15",
|
|
48
48
|
"node-llama-cpp": "^3.16.0"
|
|
49
49
|
},
|
|
50
50
|
"publishConfig": {
|