@shrkcrft/ai 0.1.0-alpha.14 → 0.1.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,14 @@ export interface IAiRequest {
23
23
  * progress without the synchronous wait.
24
24
  */
25
25
  onTokenStream?: (chunk: string) => void;
26
+ /**
27
+ * Per-call wall-clock timeout in milliseconds. When set and exceeded, the
28
+ * provider aborts the in-flight request and returns an `AppError` with code
29
+ * `TIMEOUT`. Bounds slow local models so a single call can never hang the
30
+ * command. Takes precedence over the provider's `config.timeoutMs`; when
31
+ * neither is set, no timeout is applied.
32
+ */
33
+ timeoutMs?: number;
26
34
  }
27
35
  export interface IAiResponse {
28
36
  content: string;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-request.d.ts","sourceRoot":"","sources":["../src/ai-request.ts"],"names":[],"mappings":"AAAA,oBAAY,aAAa;IACvB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,SAAS,cAAc;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,aAAa,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,SAAS,UAAU,EAAE,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,iBAAiB,CAAC;IACnC;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACzC;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,GAAG,CAAC,EAAE,OAAO,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,aAAa,GAAG,aAAa,CAAC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
1
+ {"version":3,"file":"ai-request.d.ts","sourceRoot":"","sources":["../src/ai-request.ts"],"names":[],"mappings":"AAAA,oBAAY,aAAa;IACvB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,SAAS,cAAc;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,aAAa,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,SAAS,UAAU,EAAE,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,iBAAiB,CAAC;IACnC;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC;;;;;;OAMG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,GAAG,CAAC,EAAE,OAAO,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,aAAa,GAAG,aAAa,CAAC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
@@ -52,5 +52,5 @@ export declare class LlamaCppProvider extends AbstractAiProvider {
52
52
  * loaded. Errors during dispose are swallowed (the alternative is
53
53
  * the abort we're trying to prevent).
54
54
  */
55
- export declare function disposeLlamaCppRuntime(): Promise<void>;
55
+ export declare function disposeLlamaCppRuntime(): Promise<boolean>;
56
56
  //# sourceMappingURL=llama-cpp-provider.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"llama-cpp-provider.d.ts","sourceRoot":"","sources":["../../src/llamacpp/llama-cpp-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAkC,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAKrG;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,gBAAiB,SAAQ,kBAAkB;IACtD,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,wDAAwD;IACxD,MAAM,CAAC,iBAAiB,EACpB,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC,GAClE,IAAI,CAAQ;IAEhB;;;;OAIG;IACH,MAAM,CAAC,eAAe,IAAI,MAAM,GAAG,IAAI;IAIvC,OAAO,IAAI,OAAO;IAIZ,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YA4IzD,YAAY;CAwB3B;AAWD;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,IAAI,CAAC,CAU5D"}
1
+ {"version":3,"file":"llama-cpp-provider.d.ts","sourceRoot":"","sources":["../../src/llamacpp/llama-cpp-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAkC,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAKrG;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,gBAAiB,SAAQ,kBAAkB;IACtD,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,wDAAwD;IACxD,MAAM,CAAC,iBAAiB,EACpB,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC,GAClE,IAAI,CAAQ;IAEhB;;;;OAIG;IACH,MAAM,CAAC,eAAe,IAAI,MAAM,GAAG,IAAI;IAIvC,OAAO,IAAI,OAAO;IAIZ,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YAwKzD,YAAY;CAwB3B;AAWD;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,OAAO,CAAC,CAc/D"}
@@ -60,6 +60,9 @@ export class LlamaCppProvider extends AbstractAiProvider {
60
60
  }));
61
61
  }
62
62
  }
63
+ let promptAbort;
64
+ let promptTimer;
65
+ let promptTimedOut = false;
63
66
  try {
64
67
  const tf = (await import('node-llama-cpp'));
65
68
  const { LlamaChatSession } = tf;
@@ -122,11 +125,23 @@ export class LlamaCppProvider extends AbstractAiProvider {
122
125
  }
123
126
  const start = Date.now();
124
127
  const onChunk = request.onTokenStream;
128
+ // Per-call wall-clock timeout: abort the decode if it overruns so a
129
+ // slow model can't hang the command. node-llama-cpp honours an
130
+ // AbortSignal when `stopOnAbortSignal` is set.
131
+ const timeoutMs = request.timeoutMs ?? this.config.timeoutMs;
132
+ if (timeoutMs && timeoutMs > 0) {
133
+ promptAbort = new AbortController();
134
+ promptTimer = setTimeout(() => {
135
+ promptTimedOut = true;
136
+ promptAbort?.abort();
137
+ }, timeoutMs);
138
+ }
125
139
  const text = await session.prompt(userPrompt, {
126
140
  maxTokens,
127
141
  ...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
128
142
  ...(wantsJson ? { trimWhitespaceSuffix: true } : {}),
129
143
  ...(grammar ? { grammar: grammar } : {}),
144
+ ...(promptAbort ? { signal: promptAbort.signal, stopOnAbortSignal: true } : {}),
130
145
  ...(onChunk
131
146
  ? {
132
147
  onTextChunk: (chunk) => {
@@ -163,11 +178,20 @@ export class LlamaCppProvider extends AbstractAiProvider {
163
178
  });
164
179
  }
165
180
  catch (e) {
181
+ if (promptTimedOut) {
182
+ return err(new AppErrorImpl(ERROR_CODES.TIMEOUT, `node-llama-cpp decode exceeded the per-call timeout and was aborted.`, {
183
+ suggestion: 'The model is too slow for the budget. Try a smaller model, fewer --enhance-passes, or raise the budget.',
184
+ }));
185
+ }
166
186
  return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `node-llama-cpp call failed: ${e.message}`, {
167
187
  cause: e,
168
188
  suggestion: 'Verify LLAMACPP_MODEL_PATH points to a valid .gguf file readable by llama.cpp.',
169
189
  }));
170
190
  }
191
+ finally {
192
+ if (promptTimer)
193
+ clearTimeout(promptTimer);
194
+ }
171
195
  }
172
196
  async ensureLoaded(modelPath) {
173
197
  // Cached at MODULE scope so the disposer can find it on process
@@ -214,13 +238,17 @@ export async function disposeLlamaCppRuntime() {
214
238
  const state = sharedLlamaState;
215
239
  sharedLlamaState = null;
216
240
  if (!state)
217
- return;
241
+ return false;
218
242
  // Context first — it holds the sequence pool that depends on the model.
219
243
  await callMaybeDispose(state.context);
220
244
  // Then the model, which depends on the llama runtime.
221
245
  await callMaybeDispose(state.model);
222
246
  // Finally the Llama instance itself (releases the Metal device).
223
247
  await callMaybeDispose(state.llama);
248
+ // libggml/Metal was loaded — even after disposing, this Node version still
249
+ // runs the native static destructor during `exit()` and it can abort with a
250
+ // GGML backtrace. The caller redirects fd 2 to a log file to contain it.
251
+ return true;
224
252
  }
225
253
  async function callMaybeDispose(target) {
226
254
  if (!target || typeof target !== 'object')
@@ -1 +1 @@
1
- {"version":3,"file":"ollama-provider.d.ts","sourceRoot":"","sources":["../../src/ollama/ollama-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAiB,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAMpF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,cAAe,SAAQ,kBAAkB;IACpD,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,IAAI,OAAO;IAIlB;;;;;;;;;;;OAWG;IACG,WAAW,CACf,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAAC,YAAY,EAAE,OAAO,GAAG,IAAI,CAAA;KAAE,EAAE,QAAQ,CAAC,CAAC;IA+BxF,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;CAkExE"}
1
+ {"version":3,"file":"ollama-provider.d.ts","sourceRoot":"","sources":["../../src/ollama/ollama-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAiB,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAMpF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,cAAe,SAAQ,kBAAkB;IACpD,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,IAAI,OAAO;IAIlB;;;;;;;;;;;OAWG;IACG,WAAW,CACf,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAAC,YAAY,EAAE,OAAO,GAAG,IAAI,CAAA;KAAE,EAAE,QAAQ,CAAC,CAAC;IA+BxF,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;CAgGxE"}
@@ -81,11 +81,26 @@ export class OllamaProvider extends AbstractAiProvider {
81
81
  const format = formatFor(request.responseFormat);
82
82
  if (format !== undefined)
83
83
  body.format = format;
84
+ // Per-call wall-clock timeout. Without this a slow local model (a large
85
+ // 20B+ model, or one still loading) hangs the request indefinitely — the
86
+ // root cause of `smart-context` "running too long". Manual controller +
87
+ // timer (rather than AbortSignal.timeout) so the catch can distinguish a
88
+ // timeout from an unrelated network error.
89
+ const timeoutMs = request.timeoutMs ?? this.config.timeoutMs;
90
+ const controller = timeoutMs && timeoutMs > 0 ? new AbortController() : undefined;
91
+ let timedOut = false;
92
+ const timer = controller && timeoutMs
93
+ ? setTimeout(() => {
94
+ timedOut = true;
95
+ controller.abort();
96
+ }, timeoutMs)
97
+ : undefined;
84
98
  try {
85
99
  const res = await fetch(`${baseUrl}/api/chat`, {
86
100
  method: 'POST',
87
101
  headers: { 'content-type': 'application/json' },
88
102
  body: JSON.stringify(body),
103
+ ...(controller ? { signal: controller.signal } : {}),
89
104
  });
90
105
  if (!res.ok) {
91
106
  const text = await res.text();
@@ -107,11 +122,20 @@ export class OllamaProvider extends AbstractAiProvider {
107
122
  });
108
123
  }
109
124
  catch (e) {
125
+ if (timedOut) {
126
+ return err(new AppErrorImpl(ERROR_CODES.TIMEOUT, `Ollama call exceeded ${timeoutMs}ms and was aborted (model "${model}").`, {
127
+ suggestion: `The model is too slow for the budget. Try a smaller --model, fewer --enhance-passes, or raise the budget.`,
128
+ }));
129
+ }
110
130
  return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Failed to call Ollama at ${baseUrl}: ${e.message}`, {
111
131
  cause: e,
112
132
  suggestion: `Is Ollama running? Try \`ollama serve\` or set OLLAMA_HOST to a reachable instance.`,
113
133
  }));
114
134
  }
135
+ finally {
136
+ if (timer)
137
+ clearTimeout(timer);
138
+ }
115
139
  }
116
140
  }
117
141
  function roleFor(role) {
@@ -53,6 +53,19 @@ export interface IEnhancementPipelineOptions {
53
53
  temperature?: number;
54
54
  /** Override the model selection (forwarded to the provider per call). */
55
55
  model?: string;
56
+ /**
57
+ * Total wall-clock budget (ms) for the whole pipeline. Before each stage the
58
+ * elapsed time is checked; once the budget is spent the pipeline stops and
59
+ * returns the best output so far (degrading to the deterministic seed if not
60
+ * even the draft finished). Undefined = no budget (legacy behaviour).
61
+ */
62
+ budgetMs?: number;
63
+ /**
64
+ * Per-call timeout (ms) handed to the provider for each stage. Effective
65
+ * timeout is `min(perStageTimeoutMs, remaining budget)`. Bounds a single
66
+ * slow call so it can't blow the whole budget.
67
+ */
68
+ perStageTimeoutMs?: number;
56
69
  /** Optional progress hook — called once per stage. */
57
70
  onStage?: (event: {
58
71
  kind: EnhancementStageKind;
@@ -77,6 +90,11 @@ export interface IEnhancementPipelineRun {
77
90
  * the deterministic seed unchanged.
78
91
  */
79
92
  deterministicFallback: boolean;
93
+ /**
94
+ * True when the wall-clock `budgetMs` was reached before every planned
95
+ * stage ran. `finalOutput` still holds the best result produced so far.
96
+ */
97
+ budgetExhausted: boolean;
80
98
  }
81
99
  /**
82
100
  * Multi-pass refinement pipeline that turns a deterministic brief into
@@ -120,4 +138,14 @@ export declare class EnhancementPipeline {
120
138
  * grounding.
121
139
  */
122
140
  export declare function buildDefaultEnhancementStages(): IEnhancementStage[];
141
+ /**
142
+ * The fast default for interactive use: `draft → polish` (2 calls). Skips the
143
+ * slow critique + refine round-trip (the two passes small/large local models
144
+ * spend the most wall-clock on) while still applying the polish pass that
145
+ * gives the agent file:line refs and terse imperative bullets. Materially
146
+ * better than a single shot, ~half the calls of the full pipeline. Callers who
147
+ * want maximal density opt into `buildDefaultEnhancementStages()` (the
148
+ * `--plus` path).
149
+ */
150
+ export declare function buildFastEnhancementStages(): IEnhancementStage[];
123
151
  //# sourceMappingURL=enhancement-pipeline.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"enhancement-pipeline.d.ts","sourceRoot":"","sources":["../../src/pipeline/enhancement-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAiB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;GAMG;AACH,oBAAY,oBAAoB;IAC9B,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,MAAM,WAAW;IACjB,MAAM,WAAW;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,QAAQ,EAAE,MAAM,CAAC;IACjB,iEAAiE;IACjE,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,oBAAoB,CAAC;IAC3B;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,sBAAsB,GAAG,UAAU,EAAE,CAAC;CAC5D;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,uEAAuE;IACvE,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CACzD;AAED,MAAM,WAAW,2BAA2B;IAC1C,6EAA6E;IAC7E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sDAAsD;IACtD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,oBAAoB,CAAC;QAAC,EAAE,EAAE,OAAO,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;CACrG;AAED,MAAM,WAAW,uBAAuB;IACtC,uGAAuG;IACvG,WAAW,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,MAAM,EAAE,uBAAuB,EAAE,CAAC;IAClC,4EAA4E;IAC5E,UAAU,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;CAChC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmC;gBAE9C,MAAM,EAAE,aAAa,CAAC,iBAAiB,CAAC;IAI9C,GAAG,CACP,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,eAAe,EAAE,MAAM,CAAA;KAAE,EAChD,QAAQ,EAAE,WAAW,GAAG,IAAI,EAC5B,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,MAAM,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;CAmFtD;AAED;;;;;;;;GAQG;AACH,wBAAgB,6BAA6B,IAAI,iBAAiB,EAAE,CAOnE"}
1
+ {"version":3,"file":"enhancement-pipeline.d.ts","sourceRoot":"","sources":["../../src/pipeline/enhancement-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAiB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;GAMG;AACH,oBAAY,oBAAoB;IAC9B,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,MAAM,WAAW;IACjB,MAAM,WAAW;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,QAAQ,EAAE,MAAM,CAAC;IACjB,iEAAiE;IACjE,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,oBAAoB,CAAC;IAC3B;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,sBAAsB,GAAG,UAAU,EAAE,CAAC;CAC5D;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,uEAAuE;IACvE,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CACzD;AAED,MAAM,WAAW,2BAA2B;IAC1C,6EAA6E;IAC7E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,sDAAsD;IACtD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,oBAAoB,CAAC;QAAC,EAAE,EAAE,OAAO,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;CACrG;AAED,MAAM,WAAW,uBAAuB;IACtC,uGAAuG;IACvG,WAAW,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,MAAM,EAAE,uBAAuB,EAAE,CAAC;IAClC,4EAA4E;IAC5E,UAAU,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAC/B;;;OAGG;IACH,eAAe,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmC;gBAE9C,MAAM,EAAE,aAAa,CAAC,iBAAiB,CAAC;IAI9C,GAAG,CACP,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,eAAe,EAAE,MAAM,CAAA;KAAE,EAChD,QAAQ,EAAE,WAAW,GAAG,IAAI,EAC5B,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,MAAM,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;CAkGtD;AAgBD;;;;;;;;GAQG;AACH,wBAAgB,6BAA6B,IAAI,iBAAiB,EAAE,CAOnE;AAED;;;;;;;;GAQG;AACH,wBAAgB,0BAA0B,IAAI,iBAAiB,EAAE,CAEhE"}
@@ -50,6 +50,7 @@ export class EnhancementPipeline {
50
50
  stages: [],
51
51
  totalUsage: { inputTokens: 0, outputTokens: 0 },
52
52
  deterministicFallback: true,
53
+ budgetExhausted: false,
53
54
  });
54
55
  }
55
56
  const cap = options.maxPasses ?? this.stages.length;
@@ -59,7 +60,16 @@ export class EnhancementPipeline {
59
60
  let previous = '';
60
61
  let lastCritique;
61
62
  let lastGood = input.originalContext;
63
+ const startedAt = Date.now();
64
+ let budgetExhausted = false;
62
65
  for (let i = 0; i < plan.length; i += 1) {
66
+ // Wall-clock budget guard: stop before starting a stage we have no time
67
+ // for, and keep the best output produced so far.
68
+ const remaining = options.budgetMs !== undefined ? options.budgetMs - (Date.now() - startedAt) : undefined;
69
+ if (remaining !== undefined && remaining <= MIN_STAGE_BUDGET_MS) {
70
+ budgetExhausted = true;
71
+ break;
72
+ }
63
73
  const stage = plan[i];
64
74
  const messages = stage.buildMessages({
65
75
  originalContext: input.originalContext,
@@ -67,11 +77,14 @@ export class EnhancementPipeline {
67
77
  previous,
68
78
  lastCritique,
69
79
  });
80
+ // Effective per-call timeout = min(configured per-stage, remaining budget).
81
+ const perStageTimeout = effectiveTimeout(options.perStageTimeoutMs, remaining);
70
82
  const stageResult = await callOnceWithRetry(provider, {
71
83
  messages,
72
84
  maxTokens: options.maxTokensPerStage ?? 4096,
73
85
  temperature: options.temperature ?? 0.2,
74
86
  ...(options.model ? { model: options.model } : {}),
87
+ ...(perStageTimeout !== undefined ? { timeoutMs: perStageTimeout } : {}),
75
88
  });
76
89
  const onStage = options.onStage;
77
90
  if (!stageResult.ok) {
@@ -120,9 +133,23 @@ export class EnhancementPipeline {
120
133
  stages: stagesOut,
121
134
  totalUsage,
122
135
  deterministicFallback: false,
136
+ budgetExhausted,
123
137
  });
124
138
  }
125
139
  }
140
+ /** Don't start a stage with less than this much budget left (a call needs at
141
+ * least this long to have any chance of returning). */
142
+ const MIN_STAGE_BUDGET_MS = 250;
143
+ /**
144
+ * Effective per-call timeout: the tighter of an explicit per-stage cap and the
145
+ * remaining wall-clock budget. Returns undefined when neither is set.
146
+ */
147
+ function effectiveTimeout(perStage, remaining) {
148
+ const candidates = [perStage, remaining].filter((n) => typeof n === 'number' && n > 0);
149
+ if (candidates.length === 0)
150
+ return undefined;
151
+ return Math.min(...candidates);
152
+ }
126
153
  /**
127
154
  * The default stage set for "make this brief more useful to the Claude
128
155
  * agent". Tuned for small local models (Qwen2.5-Coder-3B, Llama-3.1-8B).
@@ -140,6 +167,18 @@ export function buildDefaultEnhancementStages() {
140
167
  new PolishStage(),
141
168
  ];
142
169
  }
170
+ /**
171
+ * The fast default for interactive use: `draft → polish` (2 calls). Skips the
172
+ * slow critique + refine round-trip (the two passes small/large local models
173
+ * spend the most wall-clock on) while still applying the polish pass that
174
+ * gives the agent file:line refs and terse imperative bullets. Materially
175
+ * better than a single shot, ~half the calls of the full pipeline. Callers who
176
+ * want maximal density opt into `buildDefaultEnhancementStages()` (the
177
+ * `--plus` path).
178
+ */
179
+ export function buildFastEnhancementStages() {
180
+ return [new DraftStage(), new PolishStage()];
181
+ }
143
182
  class DraftStage {
144
183
  kind = EnhancementStageKind.Draft;
145
184
  buildMessages(input) {
@@ -285,6 +324,11 @@ async function callOnceWithRetry(provider, request) {
285
324
  if (first.ok) {
286
325
  return ok({ content: first.value.content, model: first.value.model, usage: first.value.usage });
287
326
  }
327
+ // Don't retry a timeout — the model is too slow for the budget, so a second
328
+ // attempt just burns another timeout period. Surface the timeout immediately.
329
+ if (first.error.code === ERROR_CODES.TIMEOUT) {
330
+ return first;
331
+ }
288
332
  // One retry — small local models routinely 500 on the first request
289
333
  // after a daemon start. Idempotent reissue is safe.
290
334
  const second = await provider.send(request);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shrkcrft/ai",
3
- "version": "0.1.0-alpha.14",
3
+ "version": "0.1.0-alpha.15",
4
4
  "description": "SharkCraft local LLM provider abstraction: Ollama (HTTP) + llama.cpp (in-process) + multi-pass enhancement pipeline.",
5
5
  "license": "MIT",
6
6
  "author": "SharkCraft contributors",
@@ -43,8 +43,8 @@
43
43
  "typecheck": "tsc --noEmit -p tsconfig.json"
44
44
  },
45
45
  "dependencies": {
46
- "@shrkcrft/core": "^0.1.0-alpha.14",
47
- "@shrkcrft/context": "^0.1.0-alpha.14",
46
+ "@shrkcrft/core": "^0.1.0-alpha.15",
47
+ "@shrkcrft/context": "^0.1.0-alpha.15",
48
48
  "node-llama-cpp": "^3.16.0"
49
49
  },
50
50
  "publishConfig": {