@shrkcrft/ai 0.1.0-alpha.2 → 0.1.0-alpha.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-request.d.ts +23 -0
- package/dist/ai-request.d.ts.map +1 -1
- package/dist/delegate/delegate-edit-schema.d.ts +44 -0
- package/dist/delegate/delegate-edit-schema.d.ts.map +1 -0
- package/dist/delegate/delegate-edit-schema.js +77 -0
- package/dist/delegate/parse-delegate-edit.d.ts +46 -0
- package/dist/delegate/parse-delegate-edit.d.ts.map +1 -0
- package/dist/delegate/parse-delegate-edit.js +128 -0
- package/dist/gemini/gemini-provider.d.ts +24 -0
- package/dist/gemini/gemini-provider.d.ts.map +1 -0
- package/dist/gemini/gemini-provider.js +97 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/llamacpp/llama-cpp-provider.d.ts +56 -0
- package/dist/llamacpp/llama-cpp-provider.d.ts.map +1 -0
- package/dist/llamacpp/llama-cpp-provider.js +296 -0
- package/dist/llm-hints.d.ts +36 -0
- package/dist/llm-hints.d.ts.map +1 -0
- package/dist/llm-hints.js +92 -0
- package/dist/llm-recommendations.d.ts +72 -0
- package/dist/llm-recommendations.d.ts.map +1 -0
- package/dist/llm-recommendations.js +188 -0
- package/dist/ollama/ollama-provider.d.ts +47 -0
- package/dist/ollama/ollama-provider.d.ts.map +1 -0
- package/dist/ollama/ollama-provider.js +190 -0
- package/dist/pipeline/enhancement-pipeline.d.ts +151 -0
- package/dist/pipeline/enhancement-pipeline.d.ts.map +1 -0
- package/dist/pipeline/enhancement-pipeline.js +339 -0
- package/dist/provider-resolver.d.ts +28 -0
- package/dist/provider-resolver.d.ts.map +1 -0
- package/dist/provider-resolver.js +80 -0
- package/package.json +6 -5
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { AiMessageRole } from "./ai-request.js";
|
|
2
|
+
import { selectAiProvider } from "./provider-resolver.js";
|
|
3
|
+
import { buildAiBlock } from "./llm-hints.js";
|
|
4
|
+
/**
|
|
5
|
+
* Shared utility for layering LLM recommendations onto any deterministic
|
|
6
|
+
* surface. The deterministic portion is the caller's responsibility; this
|
|
7
|
+
* helper only adds the `ai` block and a structured `recommendations` array.
|
|
8
|
+
*
|
|
9
|
+
* Hard guarantee: if no LLM is reachable (or `userOptedOut` is true), the
|
|
10
|
+
* call is a no-op apart from emitting the `ai` block with setup hints.
|
|
11
|
+
*
|
|
12
|
+
* Lives in `@shrkcrft/ai` so any callable surface (CLI commands, packs,
|
|
13
|
+
* read-only MCP tools that want recommendations alongside their data)
|
|
14
|
+
* can reuse the same envelope shape.
|
|
15
|
+
*/
|
|
16
|
+
export async function enrichWithLlmRecommendations(input) {
|
|
17
|
+
if (input.userOptedOut) {
|
|
18
|
+
const aiBlock = buildAiBlock({
|
|
19
|
+
selection: { requested: normaliseKind(input.providerKind), provider: null },
|
|
20
|
+
userOptedOut: true,
|
|
21
|
+
});
|
|
22
|
+
return { ai: aiBlock, recommendations: [] };
|
|
23
|
+
}
|
|
24
|
+
const selection = input.providerOverride !== undefined
|
|
25
|
+
? { requested: normaliseKind(input.providerKind), provider: input.providerOverride }
|
|
26
|
+
: selectAiProvider(input.providerKind);
|
|
27
|
+
if (!selection.provider) {
|
|
28
|
+
const aiBlock = buildAiBlock({ selection, userOptedOut: false });
|
|
29
|
+
return { ai: aiBlock, recommendations: [] };
|
|
30
|
+
}
|
|
31
|
+
if (input.model)
|
|
32
|
+
selection.provider.configure({ model: input.model });
|
|
33
|
+
const messages = buildRecommendationMessages(input);
|
|
34
|
+
let recommendations = [];
|
|
35
|
+
try {
|
|
36
|
+
const res = await selection.provider.send({
|
|
37
|
+
messages,
|
|
38
|
+
maxTokens: input.maxTokens ?? 1024,
|
|
39
|
+
});
|
|
40
|
+
if (res.ok) {
|
|
41
|
+
recommendations = parseRecommendations(res.value.content);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
// Swallow — recommendations stay empty; ai block still carries provider info.
|
|
46
|
+
}
|
|
47
|
+
const aiBlock = buildAiBlock({ selection, userOptedOut: false });
|
|
48
|
+
return { ai: aiBlock, recommendations };
|
|
49
|
+
}
|
|
50
|
+
function buildRecommendationMessages(input) {
|
|
51
|
+
const system = {
|
|
52
|
+
role: AiMessageRole.System,
|
|
53
|
+
content: [
|
|
54
|
+
`You are a critic layering concrete next-step recommendations on top of a deterministic SharkCraft "${input.surface}" report.`,
|
|
55
|
+
'The deterministic report is supplied verbatim — treat its findings as facts. Your job is to translate them into actions a developer (or an AI coding agent) can take immediately.',
|
|
56
|
+
'',
|
|
57
|
+
'The user-specified ask is:',
|
|
58
|
+
input.ask,
|
|
59
|
+
'',
|
|
60
|
+
'Return ONLY a JSON object with this exact shape, no preface, no fences:',
|
|
61
|
+
'{',
|
|
62
|
+
' "recommendations": [',
|
|
63
|
+
' {',
|
|
64
|
+
' "severity": "info" | "warn" | "error",',
|
|
65
|
+
' "category": "<short kebab-case category>",',
|
|
66
|
+
' "title": "<one-sentence summary>",',
|
|
67
|
+
' "detail": "<one to three sentences with concrete next-steps; name files, commands, or symbols when possible>",',
|
|
68
|
+
' "target": "<optional id or path>",',
|
|
69
|
+
' "confidence": 0.0',
|
|
70
|
+
' }',
|
|
71
|
+
' ]',
|
|
72
|
+
'}',
|
|
73
|
+
'Skip the bullet entirely if you cannot say anything specific. Better silence than ceremony.',
|
|
74
|
+
].join('\n'),
|
|
75
|
+
};
|
|
76
|
+
const user = {
|
|
77
|
+
role: AiMessageRole.User,
|
|
78
|
+
content: [`# Deterministic ${input.surface} summary`, '', input.deterministicSummary].join('\n'),
|
|
79
|
+
};
|
|
80
|
+
return [system, user];
|
|
81
|
+
}
|
|
82
|
+
function parseRecommendations(raw) {
|
|
83
|
+
const trimmed = raw.trim();
|
|
84
|
+
let jsonText = trimmed;
|
|
85
|
+
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
86
|
+
if (fenced)
|
|
87
|
+
jsonText = fenced[1].trim();
|
|
88
|
+
let parsed;
|
|
89
|
+
try {
|
|
90
|
+
parsed = JSON.parse(jsonText);
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
const first = jsonText.indexOf('{');
|
|
94
|
+
const last = jsonText.lastIndexOf('}');
|
|
95
|
+
if (first < 0 || last <= first)
|
|
96
|
+
return [];
|
|
97
|
+
try {
|
|
98
|
+
parsed = JSON.parse(jsonText.slice(first, last + 1));
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (!parsed || typeof parsed !== 'object')
|
|
105
|
+
return [];
|
|
106
|
+
const list = parsed.recommendations;
|
|
107
|
+
if (!Array.isArray(list))
|
|
108
|
+
return [];
|
|
109
|
+
const out = [];
|
|
110
|
+
for (const item of list) {
|
|
111
|
+
if (!item || typeof item !== 'object')
|
|
112
|
+
continue;
|
|
113
|
+
const obj = item;
|
|
114
|
+
const severity = coerceSeverity(obj.severity);
|
|
115
|
+
const category = typeof obj.category === 'string' && obj.category.trim()
|
|
116
|
+
? obj.category.trim()
|
|
117
|
+
: 'other';
|
|
118
|
+
const title = typeof obj.title === 'string' ? obj.title.trim() : '';
|
|
119
|
+
const detail = typeof obj.detail === 'string' ? obj.detail.trim() : '';
|
|
120
|
+
if (!title || !detail)
|
|
121
|
+
continue;
|
|
122
|
+
const confidence = typeof obj.confidence === 'number' && obj.confidence >= 0 && obj.confidence <= 1
|
|
123
|
+
? obj.confidence
|
|
124
|
+
: 0.5;
|
|
125
|
+
const target = typeof obj.target === 'string' && obj.target.trim() ? obj.target.trim() : undefined;
|
|
126
|
+
out.push({ severity, category, title, detail, confidence, ...(target ? { target } : {}) });
|
|
127
|
+
}
|
|
128
|
+
return out;
|
|
129
|
+
}
|
|
130
|
+
function coerceSeverity(value) {
|
|
131
|
+
if (value === 'error' || value === 'warn' || value === 'info')
|
|
132
|
+
return value;
|
|
133
|
+
if (value === 'warning')
|
|
134
|
+
return 'warn';
|
|
135
|
+
return 'info';
|
|
136
|
+
}
|
|
137
|
+
function normaliseKind(kind) {
|
|
138
|
+
const known = new Set(['claude', 'gemini', 'ollama', 'llamacpp']);
|
|
139
|
+
if (kind && known.has(kind.toLowerCase()))
|
|
140
|
+
return kind.toLowerCase();
|
|
141
|
+
return 'auto';
|
|
142
|
+
}
|
|
143
|
+
export function renderRecommendationsMarkdown(envelope) {
|
|
144
|
+
const out = [];
|
|
145
|
+
if (envelope.recommendations.length === 0) {
|
|
146
|
+
out.push('## LLM recommendations');
|
|
147
|
+
out.push('');
|
|
148
|
+
out.push(envelope.ai.reachable
|
|
149
|
+
? '(LLM returned no actionable recommendations — the deterministic output already covers the surface.)'
|
|
150
|
+
: '(LLM unavailable — see the AI configuration block below to enable.)');
|
|
151
|
+
out.push('');
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
out.push(`## LLM recommendations (${envelope.recommendations.length})`);
|
|
155
|
+
out.push('');
|
|
156
|
+
const order = ['error', 'warn', 'info'];
|
|
157
|
+
for (const sev of order) {
|
|
158
|
+
const group = envelope.recommendations.filter((r) => r.severity === sev);
|
|
159
|
+
if (group.length === 0)
|
|
160
|
+
continue;
|
|
161
|
+
for (const rec of group) {
|
|
162
|
+
out.push(`- **[${sev}]** \`${rec.category}\`${rec.target ? ` (${rec.target})` : ''} — ${rec.title} _(confidence ${rec.confidence.toFixed(2)})_`);
|
|
163
|
+
out.push(` - ${rec.detail}`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
out.push('');
|
|
167
|
+
}
|
|
168
|
+
out.push('---');
|
|
169
|
+
out.push('');
|
|
170
|
+
out.push(renderAiHintsCompact(envelope.ai));
|
|
171
|
+
return out.join('\n');
|
|
172
|
+
}
|
|
173
|
+
function renderAiHintsCompact(ai) {
|
|
174
|
+
const out = [];
|
|
175
|
+
const status = ai.reachable
|
|
176
|
+
? `active via \`${ai.providerId}\``
|
|
177
|
+
: ai.enhancementSkipped
|
|
178
|
+
? 'disabled by user'
|
|
179
|
+
: 'unavailable';
|
|
180
|
+
out.push(`### AI configuration — ${status}`);
|
|
181
|
+
for (const hint of ai.hints) {
|
|
182
|
+
out.push(`- [${hint.level}] **${hint.title}**`);
|
|
183
|
+
for (const step of hint.steps) {
|
|
184
|
+
out.push(` - ${step}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return out.join('\n');
|
|
188
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { type AppError, type Result } from '@shrkcrft/core';
|
|
2
|
+
import { AbstractAiProvider } from '../ai-provider.js';
|
|
3
|
+
import { type IAiRequest, type IAiResponse } from '../ai-request.js';
|
|
4
|
+
/**
|
|
5
|
+
* HTTP adapter for a local Ollama instance (https://ollama.com).
|
|
6
|
+
*
|
|
7
|
+
* Unlike Gemini/Claude, Ollama is host-based and does not need an API
|
|
8
|
+
* key — `isReady()` is always true; the actual reachability check is
|
|
9
|
+
* deferred to `send()`. The host is picked from `OLLAMA_HOST` (or the
|
|
10
|
+
* provider config). Two forms are accepted:
|
|
11
|
+
* - A full URL, e.g. `OLLAMA_HOST=http://my-box:11434`.
|
|
12
|
+
* - A bare hostname (or IP) when paired with `OLLAMA_PORT`, e.g.
|
|
13
|
+
* `OLLAMA_HOST=my-box` + `OLLAMA_PORT=11434`. The URL is assembled
|
|
14
|
+
* as `http://<host>:<port>`.
|
|
15
|
+
* Falls back to `http://localhost:11434`. The default model comes from
|
|
16
|
+
* `OLLAMA_MODEL` and may be overridden per request.
|
|
17
|
+
*
|
|
18
|
+
* Wire format: `POST /api/chat` with `{model, messages, stream:false,
|
|
19
|
+
* format?, options}`. The provider-neutral `IAiMessage` roles map
|
|
20
|
+
* directly onto Ollama roles. When `responseFormat` is supplied we ask
|
|
21
|
+
* Ollama for structured output — newer servers accept a JSON-schema
|
|
22
|
+
* object as `format`, older servers fall back to `format: "json"`.
|
|
23
|
+
*/
|
|
24
|
+
export declare class OllamaProvider extends AbstractAiProvider {
|
|
25
|
+
readonly id = "ollama";
|
|
26
|
+
readonly name = "Ollama (local HTTP)";
|
|
27
|
+
isReady(): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* One-shot preflight against `GET /api/tags`.
|
|
30
|
+
*
|
|
31
|
+
* Why this exists: Ollama is the one provider whose readiness is
|
|
32
|
+
* decoupled from env (the daemon may be down, the model may not be
|
|
33
|
+
* pulled). The two-stage planner calls this *before* stage 1 so it
|
|
34
|
+
* can fail with `ollama serve` / `ollama pull <model>` hints instead
|
|
35
|
+
* of a confusing network error mid-call.
|
|
36
|
+
*
|
|
37
|
+
* `requireModel` (optional) is checked against the server's tag list
|
|
38
|
+
* and reported separately so the caller can build a precise hint.
|
|
39
|
+
*/
|
|
40
|
+
healthCheck(requireModel?: string): Promise<Result<{
|
|
41
|
+
host: string;
|
|
42
|
+
models: string[];
|
|
43
|
+
modelPresent: boolean | null;
|
|
44
|
+
}, AppError>>;
|
|
45
|
+
send(request: IAiRequest): Promise<Result<IAiResponse, AppError>>;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=ollama-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ollama-provider.d.ts","sourceRoot":"","sources":["../../src/ollama/ollama-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAiB,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAMpF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,cAAe,SAAQ,kBAAkB;IACpD,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,IAAI,OAAO;IAIlB;;;;;;;;;;;OAWG;IACG,WAAW,CACf,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAAC,YAAY,EAAE,OAAO,GAAG,IAAI,CAAA;KAAE,EAAE,QAAQ,CAAC,CAAC;IA+BxF,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;CAgGxE"}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { AppErrorImpl, ERROR_CODES, err, ok } from '@shrkcrft/core';
|
|
2
|
+
import { AbstractAiProvider } from "../ai-provider.js";
|
|
3
|
+
import { AiMessageRole } from "../ai-request.js";
|
|
4
|
+
const DEFAULT_OLLAMA_HOST = 'http://localhost:11434';
|
|
5
|
+
const DEFAULT_OLLAMA_MODEL = 'llama3.1';
|
|
6
|
+
const DEFAULT_OLLAMA_PORT = 11434;
|
|
7
|
+
/**
|
|
8
|
+
* HTTP adapter for a local Ollama instance (https://ollama.com).
|
|
9
|
+
*
|
|
10
|
+
* Unlike Gemini/Claude, Ollama is host-based and does not need an API
|
|
11
|
+
* key — `isReady()` is always true; the actual reachability check is
|
|
12
|
+
* deferred to `send()`. The host is picked from `OLLAMA_HOST` (or the
|
|
13
|
+
* provider config). Two forms are accepted:
|
|
14
|
+
* - A full URL, e.g. `OLLAMA_HOST=http://my-box:11434`.
|
|
15
|
+
* - A bare hostname (or IP) when paired with `OLLAMA_PORT`, e.g.
|
|
16
|
+
* `OLLAMA_HOST=my-box` + `OLLAMA_PORT=11434`. The URL is assembled
|
|
17
|
+
* as `http://<host>:<port>`.
|
|
18
|
+
* Falls back to `http://localhost:11434`. The default model comes from
|
|
19
|
+
* `OLLAMA_MODEL` and may be overridden per request.
|
|
20
|
+
*
|
|
21
|
+
* Wire format: `POST /api/chat` with `{model, messages, stream:false,
|
|
22
|
+
* format?, options}`. The provider-neutral `IAiMessage` roles map
|
|
23
|
+
* directly onto Ollama roles. When `responseFormat` is supplied we ask
|
|
24
|
+
* Ollama for structured output — newer servers accept a JSON-schema
|
|
25
|
+
* object as `format`, older servers fall back to `format: "json"`.
|
|
26
|
+
*/
|
|
27
|
+
export class OllamaProvider extends AbstractAiProvider {
|
|
28
|
+
id = 'ollama';
|
|
29
|
+
name = 'Ollama (local HTTP)';
|
|
30
|
+
isReady() {
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* One-shot preflight against `GET /api/tags`.
|
|
35
|
+
*
|
|
36
|
+
* Why this exists: Ollama is the one provider whose readiness is
|
|
37
|
+
* decoupled from env (the daemon may be down, the model may not be
|
|
38
|
+
* pulled). The two-stage planner calls this *before* stage 1 so it
|
|
39
|
+
* can fail with `ollama serve` / `ollama pull <model>` hints instead
|
|
40
|
+
* of a confusing network error mid-call.
|
|
41
|
+
*
|
|
42
|
+
* `requireModel` (optional) is checked against the server's tag list
|
|
43
|
+
* and reported separately so the caller can build a precise hint.
|
|
44
|
+
*/
|
|
45
|
+
async healthCheck(requireModel) {
|
|
46
|
+
const baseUrl = resolveBaseUrl(this.config.baseUrl);
|
|
47
|
+
try {
|
|
48
|
+
const res = await fetch(`${baseUrl}/api/tags`, { method: 'GET' });
|
|
49
|
+
if (!res.ok) {
|
|
50
|
+
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Ollama health-check failed at ${baseUrl}/api/tags (HTTP ${res.status})`, { suggestion: `Is OLLAMA_HOST correct? Currently ${baseUrl}.` }));
|
|
51
|
+
}
|
|
52
|
+
const json = (await res.json());
|
|
53
|
+
const models = (json.models ?? []).map((m) => m.name ?? '').filter((n) => n.length > 0);
|
|
54
|
+
const modelPresent = requireModel ? models.includes(requireModel) : null;
|
|
55
|
+
return ok({ host: baseUrl, models, modelPresent });
|
|
56
|
+
}
|
|
57
|
+
catch (e) {
|
|
58
|
+
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Cannot reach Ollama at ${baseUrl}: ${e.message}`, {
|
|
59
|
+
cause: e,
|
|
60
|
+
suggestion: `Start the daemon (\`ollama serve\`) or set OLLAMA_HOST to a reachable instance.`,
|
|
61
|
+
}));
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
async send(request) {
|
|
65
|
+
const baseUrl = resolveBaseUrl(this.config.baseUrl);
|
|
66
|
+
const model = request.model ?? this.config.model ?? process.env.OLLAMA_MODEL ?? DEFAULT_OLLAMA_MODEL;
|
|
67
|
+
const maxTokens = request.maxTokens ?? 4096;
|
|
68
|
+
const messages = request.messages.map((m) => ({
|
|
69
|
+
role: roleFor(m.role),
|
|
70
|
+
content: m.content,
|
|
71
|
+
}));
|
|
72
|
+
const body = {
|
|
73
|
+
model,
|
|
74
|
+
messages,
|
|
75
|
+
stream: false,
|
|
76
|
+
options: {
|
|
77
|
+
num_predict: maxTokens,
|
|
78
|
+
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
const format = formatFor(request.responseFormat);
|
|
82
|
+
if (format !== undefined)
|
|
83
|
+
body.format = format;
|
|
84
|
+
// Per-call wall-clock timeout. Without this a slow local model (a large
|
|
85
|
+
// 20B+ model, or one still loading) hangs the request indefinitely — the
|
|
86
|
+
// root cause of `smart-context` "running too long". Manual controller +
|
|
87
|
+
// timer (rather than AbortSignal.timeout) so the catch can distinguish a
|
|
88
|
+
// timeout from an unrelated network error.
|
|
89
|
+
const timeoutMs = request.timeoutMs ?? this.config.timeoutMs;
|
|
90
|
+
const controller = timeoutMs && timeoutMs > 0 ? new AbortController() : undefined;
|
|
91
|
+
let timedOut = false;
|
|
92
|
+
const timer = controller && timeoutMs
|
|
93
|
+
? setTimeout(() => {
|
|
94
|
+
timedOut = true;
|
|
95
|
+
controller.abort();
|
|
96
|
+
}, timeoutMs)
|
|
97
|
+
: undefined;
|
|
98
|
+
try {
|
|
99
|
+
const res = await fetch(`${baseUrl}/api/chat`, {
|
|
100
|
+
method: 'POST',
|
|
101
|
+
headers: { 'content-type': 'application/json' },
|
|
102
|
+
body: JSON.stringify(body),
|
|
103
|
+
...(controller ? { signal: controller.signal } : {}),
|
|
104
|
+
});
|
|
105
|
+
if (!res.ok) {
|
|
106
|
+
const text = await res.text();
|
|
107
|
+
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Ollama API ${res.status}: ${text.slice(0, 500)}`, {
|
|
108
|
+
suggestion: `Check OLLAMA_HOST (currently ${baseUrl}) and that the model "${model}" is pulled (\`ollama pull ${model}\`).`,
|
|
109
|
+
}));
|
|
110
|
+
}
|
|
111
|
+
const json = (await res.json());
|
|
112
|
+
const content = json.message?.content ?? '';
|
|
113
|
+
return ok({
|
|
114
|
+
content,
|
|
115
|
+
model: json.model ?? model,
|
|
116
|
+
finishReason: json.done_reason,
|
|
117
|
+
usage: {
|
|
118
|
+
inputTokens: json.prompt_eval_count,
|
|
119
|
+
outputTokens: json.eval_count,
|
|
120
|
+
},
|
|
121
|
+
raw: json,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
catch (e) {
|
|
125
|
+
if (timedOut) {
|
|
126
|
+
return err(new AppErrorImpl(ERROR_CODES.TIMEOUT, `Ollama call exceeded ${timeoutMs}ms and was aborted (model "${model}").`, {
|
|
127
|
+
suggestion: `The model is too slow for the budget. Try a smaller --model, fewer --enhance-passes, or raise the budget.`,
|
|
128
|
+
}));
|
|
129
|
+
}
|
|
130
|
+
return err(new AppErrorImpl(ERROR_CODES.IO_ERROR, `Failed to call Ollama at ${baseUrl}: ${e.message}`, {
|
|
131
|
+
cause: e,
|
|
132
|
+
suggestion: `Is Ollama running? Try \`ollama serve\` or set OLLAMA_HOST to a reachable instance.`,
|
|
133
|
+
}));
|
|
134
|
+
}
|
|
135
|
+
finally {
|
|
136
|
+
if (timer)
|
|
137
|
+
clearTimeout(timer);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
function roleFor(role) {
|
|
142
|
+
if (role === AiMessageRole.System)
|
|
143
|
+
return 'system';
|
|
144
|
+
if (role === AiMessageRole.Assistant)
|
|
145
|
+
return 'assistant';
|
|
146
|
+
return 'user';
|
|
147
|
+
}
|
|
148
|
+
function formatFor(responseFormat) {
|
|
149
|
+
if (!responseFormat)
|
|
150
|
+
return undefined;
|
|
151
|
+
if (responseFormat.type === 'json_schema' && responseFormat.schema) {
|
|
152
|
+
return responseFormat.schema;
|
|
153
|
+
}
|
|
154
|
+
return 'json';
|
|
155
|
+
}
|
|
156
|
+
function stripTrailingSlash(url) {
|
|
157
|
+
return url.endsWith('/') ? url.slice(0, -1) : url;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Resolve the Ollama base URL from config + env. Accepts:
|
|
161
|
+
* - An explicit base URL on the provider config (`baseUrl`).
|
|
162
|
+
* - `OLLAMA_HOST` as a full URL (`http://my-box:11434`).
|
|
163
|
+
* - `OLLAMA_HOST` as a bare host (`my-box`) paired with
|
|
164
|
+
* `OLLAMA_PORT` (default 11434 if only host is given).
|
|
165
|
+
* - Falls back to `http://localhost:11434`.
|
|
166
|
+
*
|
|
167
|
+
* Why split host/port: lets the user point at a remote Ollama with two
|
|
168
|
+
* dotenv entries instead of having to remember the URL form. Both
|
|
169
|
+
* styles coexist; if `OLLAMA_HOST` already contains a scheme we keep
|
|
170
|
+
* it verbatim and ignore `OLLAMA_PORT` (the URL is authoritative).
|
|
171
|
+
*/
|
|
172
|
+
function resolveBaseUrl(configBaseUrl) {
|
|
173
|
+
if (configBaseUrl && configBaseUrl.length > 0) {
|
|
174
|
+
return stripTrailingSlash(configBaseUrl);
|
|
175
|
+
}
|
|
176
|
+
const rawHost = (process.env.OLLAMA_HOST ?? '').trim();
|
|
177
|
+
const rawPort = (process.env.OLLAMA_PORT ?? '').trim();
|
|
178
|
+
if (rawHost.length === 0 && rawPort.length === 0) {
|
|
179
|
+
return DEFAULT_OLLAMA_HOST;
|
|
180
|
+
}
|
|
181
|
+
if (rawHost.length > 0 && /^https?:\/\//i.test(rawHost)) {
|
|
182
|
+
// Full URL form takes precedence — OLLAMA_PORT is intentionally
|
|
183
|
+
// ignored so users can't end up with two conflicting sources of
|
|
184
|
+
// truth.
|
|
185
|
+
return stripTrailingSlash(rawHost);
|
|
186
|
+
}
|
|
187
|
+
const host = rawHost.length > 0 ? rawHost : 'localhost';
|
|
188
|
+
const port = rawPort.length > 0 ? rawPort : String(DEFAULT_OLLAMA_PORT);
|
|
189
|
+
return `http://${host}:${port}`;
|
|
190
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { type AppError, type Result } from '@shrkcrft/core';
|
|
2
|
+
import type { IAiProvider } from '../ai-provider.js';
|
|
3
|
+
import { type IAiMessage } from '../ai-request.js';
|
|
4
|
+
/**
|
|
5
|
+
* Identifier for a stage in the multi-pass enhancement pipeline.
|
|
6
|
+
*
|
|
7
|
+
* The default Claude-agent-oriented pipeline runs `draft → critique →
|
|
8
|
+
* refine → polish`. Callers may pass a custom stage list to truncate,
|
|
9
|
+
* extend, or rearrange the flow.
|
|
10
|
+
*/
|
|
11
|
+
export declare enum EnhancementStageKind {
|
|
12
|
+
Draft = "draft",
|
|
13
|
+
Critique = "critique",
|
|
14
|
+
Refine = "refine",
|
|
15
|
+
Polish = "polish"
|
|
16
|
+
}
|
|
17
|
+
export interface IEnhancementStageInput {
|
|
18
|
+
/** The deterministic ground truth assembled by the engine. */
|
|
19
|
+
originalContext: string;
|
|
20
|
+
/** The original user task / question. */
|
|
21
|
+
task: string;
|
|
22
|
+
/** Output of the previous stage (empty on the first stage). */
|
|
23
|
+
previous: string;
|
|
24
|
+
/** Output of the most recent `critique` stage, when relevant. */
|
|
25
|
+
lastCritique?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface IEnhancementStage {
|
|
28
|
+
kind: EnhancementStageKind;
|
|
29
|
+
/**
|
|
30
|
+
* Build the messages the LLM should see for this stage. Stages stay
|
|
31
|
+
* pure — the orchestrator owns the provider, retries, and bookkeeping.
|
|
32
|
+
*/
|
|
33
|
+
buildMessages(input: IEnhancementStageInput): IAiMessage[];
|
|
34
|
+
}
|
|
35
|
+
export interface IEnhancementStageResult {
|
|
36
|
+
kind: EnhancementStageKind;
|
|
37
|
+
content: string;
|
|
38
|
+
model: string;
|
|
39
|
+
/** Set when the stage failed and we kept the previous-stage output. */
|
|
40
|
+
degraded?: boolean;
|
|
41
|
+
errorMessage?: string;
|
|
42
|
+
usage?: {
|
|
43
|
+
inputTokens?: number;
|
|
44
|
+
outputTokens?: number;
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
export interface IEnhancementPipelineOptions {
|
|
48
|
+
/** Cap the pipeline depth — useful for cheap models. Default: all stages. */
|
|
49
|
+
maxPasses?: number;
|
|
50
|
+
/** Per-stage `maxTokens`. Default: 4096. */
|
|
51
|
+
maxTokensPerStage?: number;
|
|
52
|
+
/** Per-stage `temperature`. Default: 0.2 (deterministic-ish). */
|
|
53
|
+
temperature?: number;
|
|
54
|
+
/** Override the model selection (forwarded to the provider per call). */
|
|
55
|
+
model?: string;
|
|
56
|
+
/**
|
|
57
|
+
* Total wall-clock budget (ms) for the whole pipeline. Before each stage the
|
|
58
|
+
* elapsed time is checked; once the budget is spent the pipeline stops and
|
|
59
|
+
* returns the best output so far (degrading to the deterministic seed if not
|
|
60
|
+
* even the draft finished). Undefined = no budget (legacy behaviour).
|
|
61
|
+
*/
|
|
62
|
+
budgetMs?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Per-call timeout (ms) handed to the provider for each stage. Effective
|
|
65
|
+
* timeout is `min(perStageTimeoutMs, remaining budget)`. Bounds a single
|
|
66
|
+
* slow call so it can't blow the whole budget.
|
|
67
|
+
*/
|
|
68
|
+
perStageTimeoutMs?: number;
|
|
69
|
+
/** Optional progress hook — called once per stage. */
|
|
70
|
+
onStage?: (event: {
|
|
71
|
+
kind: EnhancementStageKind;
|
|
72
|
+
ok: boolean;
|
|
73
|
+
pass: number;
|
|
74
|
+
total: number;
|
|
75
|
+
}) => void;
|
|
76
|
+
}
|
|
77
|
+
export interface IEnhancementPipelineRun {
|
|
78
|
+
/** Final enriched output. Always defined — falls back to `originalContext` when every stage failed. */
|
|
79
|
+
finalOutput: string;
|
|
80
|
+
/** Per-stage history (ordered). */
|
|
81
|
+
stages: IEnhancementStageResult[];
|
|
82
|
+
/** Aggregated token usage across stages (when reported by the provider). */
|
|
83
|
+
totalUsage: {
|
|
84
|
+
inputTokens: number;
|
|
85
|
+
outputTokens: number;
|
|
86
|
+
};
|
|
87
|
+
/**
|
|
88
|
+
* True when the pipeline could not call the LLM at all (no provider
|
|
89
|
+
* passed). The caller is expected to handle this case by returning
|
|
90
|
+
* the deterministic seed unchanged.
|
|
91
|
+
*/
|
|
92
|
+
deterministicFallback: boolean;
|
|
93
|
+
/**
|
|
94
|
+
* True when the wall-clock `budgetMs` was reached before every planned
|
|
95
|
+
* stage ran. `finalOutput` still holds the best result produced so far.
|
|
96
|
+
*/
|
|
97
|
+
budgetExhausted: boolean;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Multi-pass refinement pipeline that turns a deterministic brief into
|
|
101
|
+
* a denser, more agent-ready artefact by making the LLM critique and
|
|
102
|
+
* rewrite its own work.
|
|
103
|
+
*
|
|
104
|
+
* Design contract:
|
|
105
|
+
* - When no provider is supplied, the pipeline returns the
|
|
106
|
+
* `originalContext` unchanged and flags `deterministicFallback`.
|
|
107
|
+
* The deterministic engine remains the source of truth.
|
|
108
|
+
* - When a provider is supplied, every stage call is retried-once on
|
|
109
|
+
* failure; a permanently-failed stage degrades to the previous
|
|
110
|
+
* stage's output (the pipeline never throws and never produces
|
|
111
|
+
* less than the deterministic input).
|
|
112
|
+
* - Stages compose: a caller can pass a 2-stage `[draft, polish]`
|
|
113
|
+
* pipeline for fast paths, or extend with custom critique prompts
|
|
114
|
+
* for project-specific quality bars.
|
|
115
|
+
*
|
|
116
|
+
* Why a pipeline (vs. a single rich prompt): small local models behave
|
|
117
|
+
* dramatically better when asked to "find the gaps in this draft" than
|
|
118
|
+
* when asked to "write the perfect brief in one shot". The critique
|
|
119
|
+
* pass surfaces vague claims and missing evidence; the refine pass
|
|
120
|
+
* fixes them; the polish pass enforces Claude-agent ergonomics
|
|
121
|
+
* (file:line refs, explicit next commands, terse bullets).
|
|
122
|
+
*/
|
|
123
|
+
export declare class EnhancementPipeline {
|
|
124
|
+
private readonly stages;
|
|
125
|
+
constructor(stages: ReadonlyArray<IEnhancementStage>);
|
|
126
|
+
run(input: {
|
|
127
|
+
task: string;
|
|
128
|
+
originalContext: string;
|
|
129
|
+
}, provider: IAiProvider | null, options?: IEnhancementPipelineOptions): Promise<Result<IEnhancementPipelineRun, AppError>>;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* The default stage set for "make this brief more useful to the Claude
|
|
133
|
+
* agent". Tuned for small local models (Qwen2.5-Coder-3B, Llama-3.1-8B).
|
|
134
|
+
*
|
|
135
|
+
* Each stage's user message is intentionally short and concrete; the
|
|
136
|
+
* heavy lifting (the deterministic seed) lives in the system role
|
|
137
|
+
* and is reused verbatim across stages so the model never loses
|
|
138
|
+
* grounding.
|
|
139
|
+
*/
|
|
140
|
+
export declare function buildDefaultEnhancementStages(): IEnhancementStage[];
|
|
141
|
+
/**
|
|
142
|
+
* The fast default for interactive use: `draft → polish` (2 calls). Skips the
|
|
143
|
+
* slow critique + refine round-trip (the two passes small/large local models
|
|
144
|
+
* spend the most wall-clock on) while still applying the polish pass that
|
|
145
|
+
* gives the agent file:line refs and terse imperative bullets. Materially
|
|
146
|
+
* better than a single shot, ~half the calls of the full pipeline. Callers who
|
|
147
|
+
* want maximal density opt into `buildDefaultEnhancementStages()` (the
|
|
148
|
+
* `--plus` path).
|
|
149
|
+
*/
|
|
150
|
+
export declare function buildFastEnhancementStages(): IEnhancementStage[];
|
|
151
|
+
//# sourceMappingURL=enhancement-pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enhancement-pipeline.d.ts","sourceRoot":"","sources":["../../src/pipeline/enhancement-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsC,KAAK,QAAQ,EAAE,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAChG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAiB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;GAMG;AACH,oBAAY,oBAAoB;IAC9B,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,MAAM,WAAW;IACjB,MAAM,WAAW;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,QAAQ,EAAE,MAAM,CAAC;IACjB,iEAAiE;IACjE,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,oBAAoB,CAAC;IAC3B;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,sBAAsB,GAAG,UAAU,EAAE,CAAC;CAC5D;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,uEAAuE;IACvE,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CACzD;AAED,MAAM,WAAW,2BAA2B;IAC1C,6EAA6E;IAC7E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,sDAAsD;IACtD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,oBAAoB,CAAC;QAAC,EAAE,EAAE,OAAO,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;CACrG;AAED,MAAM,WAAW,uBAAuB;IACtC,uGAAuG;IACvG,WAAW,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,MAAM,EAAE,uBAAuB,EAAE,CAAC;IAClC,4EAA4E;IAC5E,UAAU,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAC/B;;;OAGG;IACH,eAAe,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmC;gBAE9C,MAAM,EAAE,aAAa,CAAC,iBAAiB,CAAC;IAI9C,GAAG,CACP,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,eAAe,EAAE,MAAM,CAAA;KAAE,EAChD,QAAQ,EAAE,WAAW,GAAG,IAAI,EAC5B,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,MAAM,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;CAkGtD;AAgBD;;;;;;;;GAQG;AACH,wBAAgB,6BAA6B,IAAI,iBAAiB,EAAE,CAOnE;AAED;;;;;;;;GAQG;AACH,wBAAgB,0BAA0B,IAAI,iBAAiB,EAAE,CAEhE"}
|