@hevmind/ask 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -0
- package/bin/ask-launcher.mjs +110 -0
- package/bin/ask.mjs +4 -0
- package/openapi.yaml +363 -0
- package/package.json +61 -0
- package/skills/build-digest/SKILL.md +164 -0
- package/src/components/SearchOverlay.astro +1375 -0
- package/src/components/markdown.ts +107 -0
- package/src/digest/build.ts +432 -0
- package/src/digest/cli.ts +148 -0
- package/src/digest/expand.ts +24 -0
- package/src/digest/facts.ts +77 -0
- package/src/digest/frontmatter.ts +41 -0
- package/src/digest/read.ts +63 -0
- package/src/digest/schema.ts +185 -0
- package/src/digest/verify.ts +116 -0
- package/src/endpoint.ts +247 -0
- package/src/index.ts +2 -0
- package/src/integration.ts +146 -0
- package/src/llm.ts +239 -0
- package/src/observability.ts +213 -0
- package/src/search/chunk.ts +137 -0
- package/src/search/index.ts +44 -0
- package/src/search/loop.ts +525 -0
- package/src/search/prefilter.ts +93 -0
- package/src/types.ts +99 -0
package/src/endpoint.ts
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import type { APIRoute } from 'astro';
|
|
2
|
+
import config from 'virtual:hev-ask/config';
|
|
3
|
+
import digest from 'virtual:hev-ask/digest';
|
|
4
|
+
import {
|
|
5
|
+
decodePathValue,
|
|
6
|
+
getGlossaryEntry,
|
|
7
|
+
getOverview,
|
|
8
|
+
getSection,
|
|
9
|
+
listGlossary,
|
|
10
|
+
listSectionSummaries,
|
|
11
|
+
} from './digest/read.ts';
|
|
12
|
+
import { makeTelemetry, telemetryFromEnv } from './observability';
|
|
13
|
+
import { hashableChunkText } from './search/chunk';
|
|
14
|
+
import { buildIndex, prefilter, type Candidate, type Chunk } from './search/index';
|
|
15
|
+
import { runAgenticAnswerLoop, type AgenticEvent } from './search/loop';
|
|
16
|
+
|
|
17
|
+
export const prerender = false;
|
|
18
|
+
|
|
19
|
+
let indexPromise: Promise<Chunk[]> | null = null;
|
|
20
|
+
let staleWarningIssued = false;
|
|
21
|
+
|
|
22
|
+
function getIndex(): Promise<Chunk[]> {
|
|
23
|
+
if (!indexPromise) indexPromise = buildIndex(config.collections, config.basePath, config.chunkHeadingDepth);
|
|
24
|
+
return indexPromise;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Merge the runtime environments the endpoint may run under: Cloudflare's
|
|
28
|
+
// per-request `locals.runtime.env` wins over `process.env` (Node adapters),
|
|
29
|
+
// which wins over build-time `import.meta.env`.
|
|
30
|
+
function resolveEnv(locals: unknown): Record<string, string | undefined> {
|
|
31
|
+
const fromRuntime = (locals as { runtime?: { env?: Record<string, string> } })?.runtime?.env ?? {};
|
|
32
|
+
const fromProcess = (typeof process !== 'undefined' ? process.env : undefined) ?? {};
|
|
33
|
+
const fromImportMeta = (import.meta as { env?: Record<string, string> }).env ?? {};
|
|
34
|
+
return { ...fromImportMeta, ...fromProcess, ...fromRuntime };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function resolveApiKey(locals: unknown): string | undefined {
|
|
38
|
+
return resolveEnv(locals).ANTHROPIC_API_KEY;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// PostHog LLM tracing for the answer loop. On Cloudflare, capture promises
|
|
42
|
+
// must be handed to `ctx.waitUntil` or they are cancelled when the SSE stream
|
|
43
|
+
// closes. No POSTHOG_KEY in the environment → no-op sink.
|
|
44
|
+
function resolveTelemetry(locals: unknown) {
|
|
45
|
+
const ctx = (locals as { runtime?: { ctx?: { waitUntil?: (promise: Promise<unknown>) => void } } })
|
|
46
|
+
?.runtime?.ctx;
|
|
47
|
+
const waitUntil = ctx?.waitUntil ? (promise: Promise<unknown>) => ctx.waitUntil!(promise) : undefined;
|
|
48
|
+
return makeTelemetry(telemetryFromEnv(resolveEnv(locals), { waitUntil }));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// The overlay fetches suggested questions from the base route. Sub-routes expose
|
|
52
|
+
// keyless reads over the committed digest for CLI, MCP, and generated clients.
|
|
53
|
+
export const GET: APIRoute = ({ params, request }) => {
|
|
54
|
+
const resource = resourceSegments(params.resource);
|
|
55
|
+
if (!resource.length) return json({ suggestions: digest.suggestions ?? [], model: config.model });
|
|
56
|
+
return readResource(resource, request);
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
export const POST: APIRoute = async ({ request, locals, params }) => {
|
|
60
|
+
if (resourceSegments(params.resource).length) return notFound();
|
|
61
|
+
|
|
62
|
+
let query: string | undefined;
|
|
63
|
+
let mode: string | undefined;
|
|
64
|
+
try {
|
|
65
|
+
({ query, mode } = await request.json());
|
|
66
|
+
} catch {
|
|
67
|
+
return json({ error: 'Invalid JSON body.' }, 400);
|
|
68
|
+
}
|
|
69
|
+
if (!query || !query.trim()) return json({ results: [], query: '', model: config.model, mode: 'keyword' });
|
|
70
|
+
|
|
71
|
+
let chunks: Chunk[];
|
|
72
|
+
try {
|
|
73
|
+
chunks = await getIndex();
|
|
74
|
+
void warnIfStale(chunks);
|
|
75
|
+
} catch (err) {
|
|
76
|
+
return json({ error: (err as Error).message }, 500);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const keywordCandidates = prefilter(
|
|
80
|
+
chunks,
|
|
81
|
+
query,
|
|
82
|
+
digest.glossary,
|
|
83
|
+
Math.max(config.maxResults, config.candidatePerSearch),
|
|
84
|
+
config.perDocCap,
|
|
85
|
+
digest.nodes,
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
const apiKey = resolveApiKey(locals);
|
|
89
|
+
const keywordResults = () =>
|
|
90
|
+
toResults(keywordCandidates, new Map(chunks.map((chunk) => [chunk.id, chunk])), config.maxResults);
|
|
91
|
+
|
|
92
|
+
if (mode === 'agentic' && !apiKey) {
|
|
93
|
+
return json({
|
|
94
|
+
results: keywordResults(),
|
|
95
|
+
query,
|
|
96
|
+
model: config.model,
|
|
97
|
+
mode: 'keyword',
|
|
98
|
+
warning: 'AI search is unavailable because ANTHROPIC_API_KEY is not configured.',
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (mode === 'keyword' || !apiKey) {
|
|
103
|
+
return json({
|
|
104
|
+
results: keywordResults(),
|
|
105
|
+
query,
|
|
106
|
+
model: config.model,
|
|
107
|
+
mode: 'keyword',
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const encoder = new TextEncoder();
|
|
112
|
+
const stream = new ReadableStream<Uint8Array>({
|
|
113
|
+
async start(controller) {
|
|
114
|
+
const send = (event: string, data: unknown) => {
|
|
115
|
+
controller.enqueue(encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`));
|
|
116
|
+
};
|
|
117
|
+
try {
|
|
118
|
+
for await (const ev of runAgenticAnswerLoop({
|
|
119
|
+
apiKey,
|
|
120
|
+
query: query as string,
|
|
121
|
+
chunks,
|
|
122
|
+
digest,
|
|
123
|
+
telemetry: resolveTelemetry(locals),
|
|
124
|
+
config: {
|
|
125
|
+
model: config.model,
|
|
126
|
+
maxIterations: config.maxIterations,
|
|
127
|
+
candidatePerSearch: config.candidatePerSearch,
|
|
128
|
+
perDocCap: config.perDocCap,
|
|
129
|
+
maxResults: config.maxResults,
|
|
130
|
+
answerMaxTokens: config.answerMaxTokens,
|
|
131
|
+
},
|
|
132
|
+
signal: request.signal,
|
|
133
|
+
})) {
|
|
134
|
+
if (request.signal.aborted) break;
|
|
135
|
+
forward(send, ev, config.model);
|
|
136
|
+
}
|
|
137
|
+
} catch (err) {
|
|
138
|
+
// The HTTP status is already committed once streaming starts, so surface
|
|
139
|
+
// failures as an SSE error event rather than a status change.
|
|
140
|
+
send('error', { error: (err as Error).message });
|
|
141
|
+
} finally {
|
|
142
|
+
controller.close();
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
return new Response(stream, {
|
|
148
|
+
status: 200,
|
|
149
|
+
headers: {
|
|
150
|
+
'content-type': 'text/event-stream; charset=utf-8',
|
|
151
|
+
'cache-control': 'no-cache, no-transform',
|
|
152
|
+
connection: 'keep-alive',
|
|
153
|
+
},
|
|
154
|
+
});
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
function readResource(resource: string[], request: Request): Response {
|
|
158
|
+
const [rawRoot, ...rest] = resource;
|
|
159
|
+
const root = decodePathValue(rawRoot).trim();
|
|
160
|
+
|
|
161
|
+
if (root === 'glossary') {
|
|
162
|
+
if (!rest.length) return json({ terms: listGlossary(digest) });
|
|
163
|
+
const entry = getGlossaryEntry(digest, rest.join('/'));
|
|
164
|
+
return entry ? json(entry) : notFound('No glossary entry matched that term or alias.');
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (root === 'sections') {
|
|
168
|
+
if (!rest.length) {
|
|
169
|
+
const group = new URL(request.url).searchParams.get('group');
|
|
170
|
+
return json({ sections: listSectionSummaries(digest, group) });
|
|
171
|
+
}
|
|
172
|
+
const node = getSection(digest, rest.join('/'));
|
|
173
|
+
return node ? json(node) : notFound('No section matched that id.');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (root === 'overview' && rest.length === 0) return json(getOverview(digest));
|
|
177
|
+
|
|
178
|
+
return notFound();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function resourceSegments(value: string | undefined): string[] {
|
|
182
|
+
return (value ?? '')
|
|
183
|
+
.split('/')
|
|
184
|
+
.map((part) => part.trim())
|
|
185
|
+
.filter(Boolean);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function forward(send: (event: string, data: unknown) => void, ev: AgenticEvent, model: string): void {
|
|
189
|
+
if (ev.type === 'sources') send('sources', { sources: ev.sources, model, mode: 'agentic' });
|
|
190
|
+
else if (ev.type === 'token') send('token', { text: ev.text });
|
|
191
|
+
else if (ev.type === 'search') send('search', { query: ev.query });
|
|
192
|
+
else if (ev.type === 'done') send('done', {});
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
interface KeywordResult {
|
|
196
|
+
title: string;
|
|
197
|
+
heading?: string;
|
|
198
|
+
url: string;
|
|
199
|
+
group?: string;
|
|
200
|
+
snippet: string;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function toResults(candidates: Candidate[], byId: Map<string, Chunk>, maxResults: number): KeywordResult[] {
|
|
204
|
+
return candidates
|
|
205
|
+
.map((candidate) => {
|
|
206
|
+
const chunk = byId.get(candidate.id);
|
|
207
|
+
return chunk ? chunkToResult(chunk, candidate.snippet) : null;
|
|
208
|
+
})
|
|
209
|
+
.filter((result): result is KeywordResult => result !== null)
|
|
210
|
+
.slice(0, maxResults);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function chunkToResult(chunk: Chunk, snippet: string): KeywordResult {
|
|
214
|
+
return {
|
|
215
|
+
title: chunk.docTitle,
|
|
216
|
+
heading: chunk.heading,
|
|
217
|
+
url: chunk.url,
|
|
218
|
+
group: chunk.group,
|
|
219
|
+
snippet,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
async function warnIfStale(chunks: Chunk[]) {
|
|
224
|
+
if (staleWarningIssued || !digest.contentHash || typeof crypto === 'undefined' || !crypto.subtle) return;
|
|
225
|
+
staleWarningIssued = true;
|
|
226
|
+
const hash = await sha256Hex(hashableChunkText(chunks)).catch(() => '');
|
|
227
|
+
if (hash && hash !== digest.contentHash) {
|
|
228
|
+
console.warn('[hev-ask] Digest content hash is stale; run `ask digest build` to refresh it.');
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async function sha256Hex(text: string): Promise<string> {
|
|
233
|
+
const data = new TextEncoder().encode(text);
|
|
234
|
+
const digest = await crypto.subtle.digest('SHA-256', data);
|
|
235
|
+
return [...new Uint8Array(digest)].map((byte) => byte.toString(16).padStart(2, '0')).join('');
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function json(data: unknown, status = 200): Response {
|
|
239
|
+
return new Response(JSON.stringify(data), {
|
|
240
|
+
status,
|
|
241
|
+
headers: { 'content-type': 'application/json' },
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function notFound(error = 'Not found.'): Response {
|
|
246
|
+
return json({ error }, 404);
|
|
247
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import type { AstroIntegration } from 'astro';
|
|
2
|
+
import { execFile } from 'node:child_process';
|
|
3
|
+
import { readFileSync } from 'node:fs';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { promisify } from 'node:util';
|
|
6
|
+
import { fileURLToPath } from 'node:url';
|
|
7
|
+
import { EMPTY_DIGEST, normalizeDigest } from './digest/schema';
|
|
8
|
+
import type { HevAskOptions, ResolvedConfig } from './types';
|
|
9
|
+
|
|
10
|
+
const CONFIG_VIRTUAL_ID = 'virtual:hev-ask/config';
|
|
11
|
+
const DIGEST_VIRTUAL_ID = 'virtual:hev-ask/digest';
|
|
12
|
+
const execFileAsync = promisify(execFile);
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Astro integration that mounts the hev ask endpoint and exposes resolved
|
|
16
|
+
* configuration plus the committed ask digest through virtual modules.
|
|
17
|
+
*/
|
|
18
|
+
export default function hevAsk(options: HevAskOptions = {}): AstroIntegration {
|
|
19
|
+
const config: ResolvedConfig = {
|
|
20
|
+
collections: options.collections ?? null,
|
|
21
|
+
model: options.model ?? 'claude-haiku-4-5',
|
|
22
|
+
digestModel: options.digestModel ?? 'claude-opus-4-8',
|
|
23
|
+
endpoint: options.endpoint ?? '/api/ask',
|
|
24
|
+
basePath: options.basePath ?? '/docs/',
|
|
25
|
+
maxResults: options.maxResults ?? 6,
|
|
26
|
+
answerMaxTokens: options.answerMaxTokens ?? 1024,
|
|
27
|
+
maxIterations: options.maxIterations ?? 4,
|
|
28
|
+
chunkHeadingDepth: options.chunkHeadingDepth ?? 3,
|
|
29
|
+
candidatePerSearch: options.candidatePerSearch ?? 8,
|
|
30
|
+
perDocCap: options.perDocCap ?? 2,
|
|
31
|
+
digestPath: options.digestPath ?? '.hev-ask/digest.json',
|
|
32
|
+
digestContentGlobs: options.digestContentGlobs,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
let siteRoot = process.cwd();
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
name: '@hevmind/ask',
|
|
39
|
+
hooks: {
|
|
40
|
+
'astro:config:setup': ({ config: astroConfig, injectRoute, updateConfig, logger, addWatchFile }) => {
|
|
41
|
+
siteRoot = fileURLToPath(astroConfig.root);
|
|
42
|
+
updateConfig({
|
|
43
|
+
vite: { plugins: [virtualConfigPlugin(config), virtualDigestPlugin(config, siteRoot)] },
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
injectRoute({
|
|
47
|
+
pattern: resourceRoutePattern(config.endpoint),
|
|
48
|
+
entrypoint: '@hevmind/ask/endpoint',
|
|
49
|
+
prerender: false,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
addWatchFile(new URL(config.digestPath, astroConfig.root));
|
|
53
|
+
|
|
54
|
+
if (!config.collections?.length) {
|
|
55
|
+
logger.warn('No `collections` configured; search will error until you set e.g. collections: ["docs"].');
|
|
56
|
+
}
|
|
57
|
+
logger.info(`search endpoint at ${config.endpoint} (model: ${config.model})`);
|
|
58
|
+
},
|
|
59
|
+
'astro:build:start': async ({ logger }) => {
|
|
60
|
+
if (!config.collections?.length) return;
|
|
61
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
62
|
+
if (!apiKey) {
|
|
63
|
+
logger.warn(`ANTHROPIC_API_KEY is not set; using committed ${config.digestPath} if present.`);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
try {
|
|
68
|
+
const output = await runDigestBuild(siteRoot, config);
|
|
69
|
+
if (output) logger.info(output);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
logger.warn(`digest build failed; using committed artifact if present. ${(err as Error).message}`);
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Serializes the resolved config into a virtual module the endpoint imports. */
|
|
79
|
+
function virtualConfigPlugin(config: ResolvedConfig) {
|
|
80
|
+
const resolvedId = '\0' + CONFIG_VIRTUAL_ID;
|
|
81
|
+
return {
|
|
82
|
+
name: 'hev-ask:config',
|
|
83
|
+
resolveId(id: string) {
|
|
84
|
+
return id === CONFIG_VIRTUAL_ID ? resolvedId : undefined;
|
|
85
|
+
},
|
|
86
|
+
load(id: string) {
|
|
87
|
+
return id === resolvedId ? `export default ${JSON.stringify(config)};` : undefined;
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function virtualDigestPlugin(config: ResolvedConfig, siteRoot: string) {
|
|
93
|
+
const resolvedId = '\0' + DIGEST_VIRTUAL_ID;
|
|
94
|
+
return {
|
|
95
|
+
name: 'hev-ask:digest',
|
|
96
|
+
resolveId(id: string) {
|
|
97
|
+
return id === DIGEST_VIRTUAL_ID ? resolvedId : undefined;
|
|
98
|
+
},
|
|
99
|
+
load(id: string) {
|
|
100
|
+
if (id !== resolvedId) return undefined;
|
|
101
|
+
const digest = readDigest(siteRoot, config.digestPath);
|
|
102
|
+
return `export default ${JSON.stringify(digest)};`;
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function readDigest(siteRoot: string, digestPath: string) {
|
|
108
|
+
try {
|
|
109
|
+
return normalizeDigest(JSON.parse(readFileSync(path.resolve(siteRoot, digestPath), 'utf8')));
|
|
110
|
+
} catch {
|
|
111
|
+
return EMPTY_DIGEST;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async function runDigestBuild(siteRoot: string, config: ResolvedConfig): Promise<string> {
|
|
116
|
+
const askBin = fileURLToPath(new URL('../bin/ask.mjs', import.meta.url));
|
|
117
|
+
const args = [
|
|
118
|
+
askBin,
|
|
119
|
+
'digest',
|
|
120
|
+
'build',
|
|
121
|
+
'--digest-path',
|
|
122
|
+
config.digestPath,
|
|
123
|
+
'--base-path',
|
|
124
|
+
config.basePath,
|
|
125
|
+
'--chunk-heading-depth',
|
|
126
|
+
String(config.chunkHeadingDepth),
|
|
127
|
+
'--digest-model',
|
|
128
|
+
config.digestModel,
|
|
129
|
+
];
|
|
130
|
+
for (const collection of config.collections ?? []) args.push('--collection', collection);
|
|
131
|
+
for (const glob of config.digestContentGlobs ?? []) args.push('--content-glob', glob);
|
|
132
|
+
|
|
133
|
+
const { stdout, stderr } = await execFileAsync(process.execPath, args, {
|
|
134
|
+
cwd: siteRoot,
|
|
135
|
+
env: process.env,
|
|
136
|
+
maxBuffer: 1024 * 1024 * 8,
|
|
137
|
+
});
|
|
138
|
+
const output = [stdout, stderr].map((value) => value.trim()).filter(Boolean).join('\n');
|
|
139
|
+
return output.replace(/^\[hev-ask\]\s*/gm, '');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function resourceRoutePattern(endpoint: string): string {
|
|
143
|
+
if (endpoint === '/') return '/[...resource]';
|
|
144
|
+
const base = endpoint.endsWith('/') && endpoint.length > 1 ? endpoint.slice(0, -1) : endpoint;
|
|
145
|
+
return `${base}/[...resource]`;
|
|
146
|
+
}
|
package/src/llm.ts
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
// Minimal Anthropic Messages API client over fetch — keeps the package free of
|
|
2
|
+
// runtime dependencies and edge-runtime friendly.
|
|
3
|
+
|
|
4
|
+
const API_URL = 'https://api.anthropic.com/v1/messages';
|
|
5
|
+
const API_VERSION = '2023-06-01';
|
|
6
|
+
|
|
7
|
+
export interface AnthropicMessage {
|
|
8
|
+
role: 'user' | 'assistant';
|
|
9
|
+
content: unknown;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface AnthropicTextBlock {
|
|
13
|
+
type: 'text';
|
|
14
|
+
text: string;
|
|
15
|
+
cache_control?: { type: 'ephemeral' };
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface AnthropicToolResultBlock {
|
|
19
|
+
type: 'tool_result';
|
|
20
|
+
tool_use_id: string;
|
|
21
|
+
content: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface AnthropicTool {
|
|
25
|
+
name: string;
|
|
26
|
+
description: string;
|
|
27
|
+
input_schema: Record<string, unknown>;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface CallClaudeOptions {
|
|
31
|
+
apiKey: string;
|
|
32
|
+
model: string;
|
|
33
|
+
system: string | AnthropicTextBlock[];
|
|
34
|
+
messages: AnthropicMessage[];
|
|
35
|
+
tools?: AnthropicTool[];
|
|
36
|
+
toolChoice?: { type: 'tool'; name: string } | { type: 'auto' };
|
|
37
|
+
maxTokens?: number;
|
|
38
|
+
/** Aborts the upstream request when the client disconnects. */
|
|
39
|
+
signal?: AbortSignal;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface AnthropicUsage {
|
|
43
|
+
input_tokens: number;
|
|
44
|
+
output_tokens: number;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface AnthropicResponse {
|
|
48
|
+
content: Array<
|
|
49
|
+
| { type: 'text'; text: string }
|
|
50
|
+
| { type: 'tool_use'; id: string; name: string; input: unknown }
|
|
51
|
+
>;
|
|
52
|
+
stop_reason: string | null;
|
|
53
|
+
/** Token counts returned by the Messages API; used for observability. */
|
|
54
|
+
usage?: AnthropicUsage;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function requestBody(opts: CallClaudeOptions, stream: boolean) {
|
|
58
|
+
return JSON.stringify({
|
|
59
|
+
model: opts.model,
|
|
60
|
+
max_tokens: opts.maxTokens ?? 2048,
|
|
61
|
+
system: opts.system,
|
|
62
|
+
messages: opts.messages,
|
|
63
|
+
tools: opts.tools,
|
|
64
|
+
tool_choice: opts.toolChoice,
|
|
65
|
+
stream: stream || undefined,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function headers(apiKey: string) {
|
|
70
|
+
return {
|
|
71
|
+
'content-type': 'application/json',
|
|
72
|
+
'x-api-key': apiKey,
|
|
73
|
+
'anthropic-version': API_VERSION,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export async function callClaude(opts: CallClaudeOptions): Promise<AnthropicResponse> {
|
|
78
|
+
const res = await fetch(API_URL, {
|
|
79
|
+
method: 'POST',
|
|
80
|
+
headers: headers(opts.apiKey),
|
|
81
|
+
body: requestBody(opts, false),
|
|
82
|
+
signal: opts.signal,
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
if (!res.ok) {
|
|
86
|
+
const detail = await res.text().catch(() => '');
|
|
87
|
+
throw new Error(`Anthropic API ${res.status}: ${detail.slice(0, 500)}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return (await res.json()) as AnthropicResponse;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** High-level events surfaced from the Anthropic SSE stream. */
|
|
94
|
+
export type StreamEvent =
|
|
95
|
+
| { type: 'text'; text: string }
|
|
96
|
+
| { type: 'tool_use'; id: string; name: string; input: unknown }
|
|
97
|
+
| { type: 'stop'; stopReason: string | null; usage?: AnthropicUsage };
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Streams a Messages API response, yielding text deltas as they arrive and
|
|
101
|
+
* fully-reconstructed tool_use blocks once their streamed JSON input completes.
|
|
102
|
+
*/
|
|
103
|
+
export async function* streamClaude(opts: CallClaudeOptions): AsyncGenerator<StreamEvent> {
|
|
104
|
+
const res = await fetch(API_URL, {
|
|
105
|
+
method: 'POST',
|
|
106
|
+
headers: headers(opts.apiKey),
|
|
107
|
+
body: requestBody(opts, true),
|
|
108
|
+
signal: opts.signal,
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
if (!res.ok || !res.body) {
|
|
112
|
+
const detail = res.ok ? 'no response body' : await res.text().catch(() => '');
|
|
113
|
+
throw new Error(`Anthropic API ${res.status}: ${detail.slice(0, 500)}`);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const reader = res.body.getReader();
|
|
117
|
+
// `{ stream: true }` so multibyte tokens split across network chunks decode
|
|
118
|
+
// without producing replacement characters.
|
|
119
|
+
const decoder = new TextDecoder('utf-8');
|
|
120
|
+
const state = newSseState();
|
|
121
|
+
|
|
122
|
+
while (true) {
|
|
123
|
+
const { done, value } = await reader.read();
|
|
124
|
+
if (done) break;
|
|
125
|
+
const { events, state: next } = parseSseChunk(decoder.decode(value, { stream: true }), state);
|
|
126
|
+
Object.assign(state, next);
|
|
127
|
+
for (const event of events) yield event;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
interface SseToolBlock {
|
|
132
|
+
kind: 'tool_use';
|
|
133
|
+
id: string;
|
|
134
|
+
name: string;
|
|
135
|
+
json: string;
|
|
136
|
+
}
|
|
137
|
+
interface SseTextBlock {
|
|
138
|
+
kind: 'text';
|
|
139
|
+
}
|
|
140
|
+
type SseBlock = SseToolBlock | SseTextBlock;
|
|
141
|
+
|
|
142
|
+
export interface SseParseState {
|
|
143
|
+
/** Bytes not yet terminated by a blank line. */
|
|
144
|
+
buffer: string;
|
|
145
|
+
/** Content blocks indexed by their position in the message. */
|
|
146
|
+
blocks: Record<number, SseBlock>;
|
|
147
|
+
/** Token usage accumulated from `message_start` / `message_delta` frames. */
|
|
148
|
+
usage: AnthropicUsage;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function newSseState(): SseParseState {
|
|
152
|
+
return { buffer: '', blocks: {}, usage: { input_tokens: 0, output_tokens: 0 } };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Pure, network-free SSE frame parser. Feeds on decoded text chunks and returns
|
|
157
|
+
* any high-level events that completed, plus the carried-over parse state.
|
|
158
|
+
*/
|
|
159
|
+
export function parseSseChunk(
|
|
160
|
+
chunk: string,
|
|
161
|
+
prev: SseParseState,
|
|
162
|
+
): { events: StreamEvent[]; state: SseParseState } {
|
|
163
|
+
const events: StreamEvent[] = [];
|
|
164
|
+
const blocks = prev.blocks;
|
|
165
|
+
const usage = prev.usage ?? { input_tokens: 0, output_tokens: 0 };
|
|
166
|
+
let buffer = prev.buffer + chunk;
|
|
167
|
+
|
|
168
|
+
let sep: number;
|
|
169
|
+
while ((sep = buffer.indexOf('\n\n')) !== -1) {
|
|
170
|
+
const frame = buffer.slice(0, sep);
|
|
171
|
+
buffer = buffer.slice(sep + 2);
|
|
172
|
+
|
|
173
|
+
const data = frame
|
|
174
|
+
.split('\n')
|
|
175
|
+
.filter((line) => line.startsWith('data:'))
|
|
176
|
+
.map((line) => line.slice(5).trim())
|
|
177
|
+
.join('');
|
|
178
|
+
if (!data || data === '[DONE]') continue;
|
|
179
|
+
|
|
180
|
+
let payload: Record<string, unknown>;
|
|
181
|
+
try {
|
|
182
|
+
payload = JSON.parse(data) as Record<string, unknown>;
|
|
183
|
+
} catch {
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const type = payload.type;
|
|
188
|
+
if (type === 'content_block_start') {
|
|
189
|
+
const index = payload.index as number;
|
|
190
|
+
const block = payload.content_block as { type?: string; id?: string; name?: string };
|
|
191
|
+
if (block?.type === 'tool_use') {
|
|
192
|
+
blocks[index] = { kind: 'tool_use', id: block.id ?? '', name: block.name ?? '', json: '' };
|
|
193
|
+
} else {
|
|
194
|
+
blocks[index] = { kind: 'text' };
|
|
195
|
+
}
|
|
196
|
+
} else if (type === 'content_block_delta') {
|
|
197
|
+
const index = payload.index as number;
|
|
198
|
+
const delta = payload.delta as { type?: string; text?: string; partial_json?: string };
|
|
199
|
+
if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
|
|
200
|
+
events.push({ type: 'text', text: delta.text });
|
|
201
|
+
} else if (delta?.type === 'input_json_delta') {
|
|
202
|
+
const block = blocks[index];
|
|
203
|
+
if (block?.kind === 'tool_use') block.json += delta.partial_json ?? '';
|
|
204
|
+
}
|
|
205
|
+
} else if (type === 'content_block_stop') {
|
|
206
|
+
const block = blocks[payload.index as number];
|
|
207
|
+
if (block?.kind === 'tool_use') {
|
|
208
|
+
let input: unknown = {};
|
|
209
|
+
try {
|
|
210
|
+
input = JSON.parse(block.json || '{}');
|
|
211
|
+
} catch {
|
|
212
|
+
input = {};
|
|
213
|
+
}
|
|
214
|
+
events.push({ type: 'tool_use', id: block.id, name: block.name, input });
|
|
215
|
+
}
|
|
216
|
+
} else if (type === 'message_start') {
|
|
217
|
+
// Carries the prompt token count; no event, just accumulate usage.
|
|
218
|
+
const message = payload.message as { usage?: { input_tokens?: number } } | undefined;
|
|
219
|
+
if (typeof message?.usage?.input_tokens === 'number') {
|
|
220
|
+
usage.input_tokens = message.usage.input_tokens;
|
|
221
|
+
}
|
|
222
|
+
} else if (type === 'message_delta') {
|
|
223
|
+
const delta = payload.delta as { stop_reason?: string | null };
|
|
224
|
+
const deltaUsage = payload.usage as { output_tokens?: number } | undefined;
|
|
225
|
+
if (typeof deltaUsage?.output_tokens === 'number') {
|
|
226
|
+
usage.output_tokens = deltaUsage.output_tokens;
|
|
227
|
+
}
|
|
228
|
+
const hasUsage = usage.input_tokens > 0 || usage.output_tokens > 0;
|
|
229
|
+
events.push({
|
|
230
|
+
type: 'stop',
|
|
231
|
+
stopReason: delta?.stop_reason ?? null,
|
|
232
|
+
...(hasUsage ? { usage: { ...usage } } : {}),
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
// `ping` and `message_stop` need no surfaced event.
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return { events, state: { buffer, blocks, usage } };
|
|
239
|
+
}
|