@hevmind/ask 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -13
- package/bin/ask-launcher.mjs +19 -2
- package/openapi.yaml +53 -7
- package/package.json +6 -6
- package/skills/build-digest/SKILL.md +7 -7
- package/src/digest/build.ts +54 -16
- package/src/digest/cli.ts +19 -7
- package/src/digest/frontmatter.ts +7 -0
- package/src/digest/schema.ts +3 -0
- package/src/digest/tree.ts +259 -0
- package/src/digest/verify.ts +2 -11
- package/src/endpoint.ts +121 -5
- package/src/index.ts +1 -1
- package/src/integration.ts +16 -14
- package/src/llm-openai.ts +330 -0
- package/src/observability.ts +3 -1
- package/src/providers.ts +81 -0
- package/src/types.ts +34 -6
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import { readFileSync, readdirSync, statSync } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { parseFrontmatter } from './frontmatter.ts';
|
|
4
|
+
import { EMPTY_DIGEST, normalizeDigest, type Digest, type DigestNode, type Fact, type SourceRef } from './schema.ts';
|
|
5
|
+
|
|
6
|
+
const META_FILE = '_meta.md';
|
|
7
|
+
const GLOSSARY_DIR = '_glossary';
|
|
8
|
+
const META_OVERVIEW = '\n## Overview\n\n';
|
|
9
|
+
|
|
10
|
+
export interface DigestTreeFile {
|
|
11
|
+
path: string;
|
|
12
|
+
body: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function readDigestArtifact(siteRoot: string, digestPath: string): Digest {
|
|
16
|
+
const resolved = path.resolve(siteRoot, digestPath);
|
|
17
|
+
try {
|
|
18
|
+
const stat = statSync(resolved);
|
|
19
|
+
if (stat.isDirectory()) {
|
|
20
|
+
try {
|
|
21
|
+
return readDigestTree(resolved);
|
|
22
|
+
} catch {
|
|
23
|
+
return readLegacyJson(path.join(resolved, 'digest.json'));
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return readLegacyJson(resolved);
|
|
27
|
+
} catch {
|
|
28
|
+
try {
|
|
29
|
+
return readLegacyJson(path.join(resolved, 'digest.json'));
|
|
30
|
+
} catch {
|
|
31
|
+
return EMPTY_DIGEST;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function readDigestTree(root: string): Digest {
|
|
37
|
+
const meta = parseFrontmatter(readFileSync(path.join(root, META_FILE), 'utf8'));
|
|
38
|
+
const { context, overview } = parseMetaBody(meta.body);
|
|
39
|
+
const digest: Digest = {
|
|
40
|
+
version: 2,
|
|
41
|
+
generatedAt: stringField(meta.data, 'generatedAt'),
|
|
42
|
+
contentHash: stringField(meta.data, 'contentHash'),
|
|
43
|
+
context,
|
|
44
|
+
glossary: [],
|
|
45
|
+
overview,
|
|
46
|
+
suggestions: stringArrayField(meta.data, 'suggestions'),
|
|
47
|
+
nodes: [],
|
|
48
|
+
edges: [],
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
for (const file of walk(root)) {
|
|
52
|
+
const rel = path.relative(root, file).replace(/\\/g, '/');
|
|
53
|
+
if (rel === META_FILE || !rel.endsWith('.md')) continue;
|
|
54
|
+
const parsed = parseFrontmatter(readFileSync(file, 'utf8'));
|
|
55
|
+
if (rel.startsWith(`${GLOSSARY_DIR}/`)) {
|
|
56
|
+
const term = stringField(parsed.data, 'term') || titleFromPath(rel);
|
|
57
|
+
if (!term) continue;
|
|
58
|
+
digest.glossary.push({
|
|
59
|
+
term,
|
|
60
|
+
aliases: stringArrayField(parsed.data, 'aliases'),
|
|
61
|
+
definition: parsed.body.trim(),
|
|
62
|
+
});
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
const node = nodeFromTreeFile(rel, parsed);
|
|
66
|
+
if (node) digest.nodes.push(node);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
digest.glossary.sort((a, b) => a.term.localeCompare(b.term));
|
|
70
|
+
digest.nodes.sort((a, b) => a.id.localeCompare(b.id));
|
|
71
|
+
return normalizeDigest(digest);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function digestTreeFiles(digest: Digest): DigestTreeFile[] {
|
|
75
|
+
const files: DigestTreeFile[] = [{ path: META_FILE, body: renderMetaFile(digest) }];
|
|
76
|
+
for (const entry of digest.glossary) {
|
|
77
|
+
files.push({
|
|
78
|
+
path: `${GLOSSARY_DIR}/${glossaryPath(entry.term)}.md`,
|
|
79
|
+
body: markdownWithFrontmatter(
|
|
80
|
+
[
|
|
81
|
+
['term', entry.term],
|
|
82
|
+
['aliases', entry.aliases],
|
|
83
|
+
],
|
|
84
|
+
entry.definition,
|
|
85
|
+
),
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
digest.nodes.forEach((node, order) => {
|
|
89
|
+
files.push({ path: `${nodePath(node)}.md`, body: renderNodeFile(node, order) });
|
|
90
|
+
});
|
|
91
|
+
return files.sort((a, b) => a.path.localeCompare(b.path));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function readLegacyJson(file: string): Digest {
|
|
95
|
+
return normalizeDigest(JSON.parse(readFileSync(file, 'utf8')));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function renderMetaFile(digest: Digest): string {
|
|
99
|
+
return markdownWithFrontmatter(
|
|
100
|
+
[
|
|
101
|
+
['version', digest.version],
|
|
102
|
+
['generatedAt', digest.generatedAt || new Date().toISOString()],
|
|
103
|
+
['contentHash', digest.contentHash],
|
|
104
|
+
['suggestions', digest.suggestions],
|
|
105
|
+
],
|
|
106
|
+
`## Context\n\n${digest.context.trim()}${META_OVERVIEW}${digest.overview.trim()}`.trim(),
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function renderNodeFile(node: DigestNode, order: number): string {
|
|
111
|
+
return markdownWithFrontmatter(
|
|
112
|
+
[
|
|
113
|
+
['id', node.id],
|
|
114
|
+
['title', node.title],
|
|
115
|
+
['heading', node.heading],
|
|
116
|
+
['group', node.group],
|
|
117
|
+
['order', order],
|
|
118
|
+
['url', node.url],
|
|
119
|
+
['anchor', node.sources[0]?.anchor ?? null],
|
|
120
|
+
['terms', node.terms],
|
|
121
|
+
['hash', node.hash ?? null],
|
|
122
|
+
['mode', node.mode],
|
|
123
|
+
['facts', node.facts],
|
|
124
|
+
['sources', node.sources],
|
|
125
|
+
],
|
|
126
|
+
node.summary,
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function markdownWithFrontmatter(fields: Array<[string, unknown]>, body: string): string {
|
|
131
|
+
const frontmatter = fields.map(([key, value]) => `${key}: ${formatFrontmatterValue(value)}`).join('\n');
|
|
132
|
+
return `---\n${frontmatter}\n---\n\n${body.trim()}\n`;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function formatFrontmatterValue(value: unknown): string {
|
|
136
|
+
if (value === null || value === undefined || value === '') return 'null';
|
|
137
|
+
if (typeof value === 'number') return String(value);
|
|
138
|
+
return JSON.stringify(value);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function nodePath(node: DigestNode): string {
|
|
142
|
+
return node.id.trim().replace(/#/g, '/').replace(/^\/+|\/+$/g, '');
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function glossaryPath(term: string): string {
|
|
146
|
+
const slug = term
|
|
147
|
+
.toLowerCase()
|
|
148
|
+
.replace(/[^a-z0-9 _-]+/g, '')
|
|
149
|
+
.trim()
|
|
150
|
+
.replace(/\s+/g, '-');
|
|
151
|
+
return slug || 'term';
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function nodeFromTreeFile(rel: string, parsed: ReturnType<typeof parseFrontmatter>): DigestNode | null {
|
|
155
|
+
const id = stringField(parsed.data, 'id') || rel.replace(/\.md$/, '').replace(/\//g, '#');
|
|
156
|
+
const url = stringField(parsed.data, 'url');
|
|
157
|
+
if (!id || !url) return null;
|
|
158
|
+
const anchor = nullableStringField(parsed.data, 'anchor');
|
|
159
|
+
const sources = sourceArrayField(parsed.data, 'sources');
|
|
160
|
+
return {
|
|
161
|
+
id,
|
|
162
|
+
kind: 'section',
|
|
163
|
+
title: stringField(parsed.data, 'title') || id,
|
|
164
|
+
heading: nullableStringField(parsed.data, 'heading'),
|
|
165
|
+
group: nullableStringField(parsed.data, 'group'),
|
|
166
|
+
url,
|
|
167
|
+
summary: firstParagraph(parsed.body) || stringField(parsed.data, 'summary'),
|
|
168
|
+
hash: stringField(parsed.data, 'hash') || undefined,
|
|
169
|
+
facts: factArrayField(parsed.data, 'facts'),
|
|
170
|
+
sources: sources.length ? sources : [{ chunkId: id, url, anchor }],
|
|
171
|
+
mode: stringField(parsed.data, 'mode') === 'source-primary' ? 'source-primary' : 'agent-primary',
|
|
172
|
+
terms: stringArrayField(parsed.data, 'terms'),
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function parseMetaBody(body: string): { context: string; overview: string } {
|
|
177
|
+
let trimmed = body.trim();
|
|
178
|
+
if (trimmed.startsWith('## Context')) trimmed = trimmed.slice('## Context'.length).trim();
|
|
179
|
+
const at = trimmed.indexOf(META_OVERVIEW);
|
|
180
|
+
if (at >= 0) {
|
|
181
|
+
return { context: trimmed.slice(0, at).trim(), overview: trimmed.slice(at + META_OVERVIEW.length).trim() };
|
|
182
|
+
}
|
|
183
|
+
return { context: trimmed, overview: '' };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function walk(dir: string): string[] {
|
|
187
|
+
const out: string[] = [];
|
|
188
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
189
|
+
const file = path.join(dir, entry.name);
|
|
190
|
+
if (entry.isDirectory()) {
|
|
191
|
+
if (entry.name === 'shards') continue;
|
|
192
|
+
out.push(...walk(file));
|
|
193
|
+
} else {
|
|
194
|
+
out.push(file);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return out;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function firstParagraph(body: string): string {
|
|
201
|
+
return body
|
|
202
|
+
.trim()
|
|
203
|
+
.split(/\n{2,}/)
|
|
204
|
+
.map((part) => part.trim())
|
|
205
|
+
.find(Boolean) ?? '';
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function titleFromPath(value: string): string {
|
|
209
|
+
return path.basename(value, '.md').replace(/-/g, ' ');
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function stringField(data: Record<string, unknown>, key: string): string {
|
|
213
|
+
const value = data[key];
|
|
214
|
+
return typeof value === 'string' ? value : '';
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function nullableStringField(data: Record<string, unknown>, key: string): string | null {
|
|
218
|
+
return stringField(data, key) || null;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function stringArrayField(data: Record<string, unknown>, key: string): string[] {
|
|
222
|
+
const value = data[key];
|
|
223
|
+
if (Array.isArray(value)) return value.filter((item): item is string => typeof item === 'string' && item.length > 0);
|
|
224
|
+
return typeof value === 'string' && value ? [value] : [];
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function factArrayField(data: Record<string, unknown>, key: string): Fact[] {
|
|
228
|
+
const value = data[key];
|
|
229
|
+
if (!Array.isArray(value)) return [];
|
|
230
|
+
return value
|
|
231
|
+
.map((item) => {
|
|
232
|
+
if (!item || typeof item !== 'object') return null;
|
|
233
|
+
const maybe = item as Partial<Fact>;
|
|
234
|
+
if (typeof maybe.literal !== 'string' || !maybe.literal) return null;
|
|
235
|
+
return {
|
|
236
|
+
kind: maybe.kind ?? 'value',
|
|
237
|
+
literal: maybe.literal,
|
|
238
|
+
chunkId: typeof maybe.chunkId === 'string' ? maybe.chunkId : '',
|
|
239
|
+
} satisfies Fact;
|
|
240
|
+
})
|
|
241
|
+
.filter((item): item is Fact => item !== null);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function sourceArrayField(data: Record<string, unknown>, key: string): SourceRef[] {
|
|
245
|
+
const value = data[key];
|
|
246
|
+
if (!Array.isArray(value)) return [];
|
|
247
|
+
return value
|
|
248
|
+
.map((item) => {
|
|
249
|
+
if (!item || typeof item !== 'object') return null;
|
|
250
|
+
const maybe = item as Partial<SourceRef>;
|
|
251
|
+
if (typeof maybe.chunkId !== 'string' || typeof maybe.url !== 'string') return null;
|
|
252
|
+
return {
|
|
253
|
+
chunkId: maybe.chunkId,
|
|
254
|
+
url: maybe.url,
|
|
255
|
+
anchor: typeof maybe.anchor === 'string' ? maybe.anchor : null,
|
|
256
|
+
} satisfies SourceRef;
|
|
257
|
+
})
|
|
258
|
+
.filter((item): item is SourceRef => item !== null);
|
|
259
|
+
}
|
package/src/digest/verify.ts
CHANGED
|
@@ -4,7 +4,7 @@ import path from 'node:path';
|
|
|
4
4
|
import { promisify } from 'node:util';
|
|
5
5
|
import { buildCorpus } from './build.ts';
|
|
6
6
|
import { extractFacts } from './facts.ts';
|
|
7
|
-
import {
|
|
7
|
+
import { readDigestArtifact } from './tree.ts';
|
|
8
8
|
|
|
9
9
|
const execFileAsync = promisify(execFile);
|
|
10
10
|
|
|
@@ -65,8 +65,7 @@ async function verifyFidelity(
|
|
|
65
65
|
options: VerifyAnchorsOptions,
|
|
66
66
|
chunks: Awaited<ReturnType<typeof buildCorpus>>['chunks'],
|
|
67
67
|
): Promise<{ dropped: VerifyAnchorsResult['dropped']; uncovered: string[] }> {
|
|
68
|
-
const
|
|
69
|
-
const digest = normalizeDigest(await readJson(digestPath));
|
|
68
|
+
const digest = readDigestArtifact(options.siteRoot, options.digestPath ?? '.hev-ask');
|
|
70
69
|
if (!digest.nodes.length) return { dropped: [], uncovered: [] }; // v1 / degraded digest — nothing to check
|
|
71
70
|
|
|
72
71
|
const nodeById = new Map(digest.nodes.map((node) => [node.id, node]));
|
|
@@ -89,14 +88,6 @@ async function verifyFidelity(
|
|
|
89
88
|
return { dropped, uncovered };
|
|
90
89
|
}
|
|
91
90
|
|
|
92
|
-
async function readJson(file: string): Promise<unknown> {
|
|
93
|
-
try {
|
|
94
|
-
return JSON.parse(await readFile(file, 'utf8'));
|
|
95
|
-
} catch {
|
|
96
|
-
return null;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
91
|
async function findHtmlWithId(files: string[], id: string): Promise<boolean> {
|
|
101
92
|
for (const file of files) {
|
|
102
93
|
const html = await readFile(file, 'utf8').catch(() => '');
|
package/src/endpoint.ts
CHANGED
|
@@ -9,7 +9,9 @@ import {
|
|
|
9
9
|
listGlossary,
|
|
10
10
|
listSectionSummaries,
|
|
11
11
|
} from './digest/read.ts';
|
|
12
|
+
import { digestTreeFiles } from './digest/tree.ts';
|
|
12
13
|
import { makeTelemetry, telemetryFromEnv } from './observability';
|
|
14
|
+
import { PROVIDERS, clientFor, resolveProviderName } from './providers';
|
|
13
15
|
import { hashableChunkText } from './search/chunk';
|
|
14
16
|
import { buildIndex, prefilter, type Candidate, type Chunk } from './search/index';
|
|
15
17
|
import { runAgenticAnswerLoop, type AgenticEvent } from './search/loop';
|
|
@@ -34,8 +36,13 @@ function resolveEnv(locals: unknown): Record<string, string | undefined> {
|
|
|
34
36
|
return { ...fromImportMeta, ...fromProcess, ...fromRuntime };
|
|
35
37
|
}
|
|
36
38
|
|
|
39
|
+
// `config.provider` is baked at build time; only the key is read per-request.
|
|
40
|
+
const provider = resolveProviderName(config.provider);
|
|
41
|
+
const providerEnvKey = PROVIDERS[provider].envKey;
|
|
42
|
+
const llm = clientFor(provider, config.providerBaseUrl);
|
|
43
|
+
|
|
37
44
|
function resolveApiKey(locals: unknown): string | undefined {
|
|
38
|
-
return resolveEnv(locals)
|
|
45
|
+
return resolveEnv(locals)[providerEnvKey];
|
|
39
46
|
}
|
|
40
47
|
|
|
41
48
|
// PostHog LLM tracing for the answer loop. On Cloudflare, capture promises
|
|
@@ -45,17 +52,25 @@ function resolveTelemetry(locals: unknown) {
|
|
|
45
52
|
const ctx = (locals as { runtime?: { ctx?: { waitUntil?: (promise: Promise<unknown>) => void } } })
|
|
46
53
|
?.runtime?.ctx;
|
|
47
54
|
const waitUntil = ctx?.waitUntil ? (promise: Promise<unknown>) => ctx.waitUntil!(promise) : undefined;
|
|
48
|
-
return makeTelemetry(telemetryFromEnv(resolveEnv(locals), { waitUntil }));
|
|
55
|
+
return makeTelemetry(telemetryFromEnv(resolveEnv(locals), { waitUntil, provider }));
|
|
49
56
|
}
|
|
50
57
|
|
|
51
58
|
// The overlay fetches suggested questions from the base route. Sub-routes expose
|
|
52
59
|
// keyless reads over the committed digest for CLI, MCP, and generated clients.
|
|
53
|
-
export const GET: APIRoute = ({ params, request }) => {
|
|
60
|
+
export const GET: APIRoute = async ({ params, request }) => {
|
|
54
61
|
const resource = resourceSegments(params.resource);
|
|
55
62
|
if (!resource.length) return json({ suggestions: digest.suggestions ?? [], model: config.model });
|
|
56
63
|
return readResource(resource, request);
|
|
57
64
|
};
|
|
58
65
|
|
|
66
|
+
export const HEAD: APIRoute = ({ params }) => {
|
|
67
|
+
const resource = resourceSegments(params.resource);
|
|
68
|
+
if (resource.length === 1 && decodePathValue(resource[0]).trim() === 'archive') {
|
|
69
|
+
return new Response(null, { status: 200, headers: archiveHeaders() });
|
|
70
|
+
}
|
|
71
|
+
return notFound();
|
|
72
|
+
};
|
|
73
|
+
|
|
59
74
|
export const POST: APIRoute = async ({ request, locals, params }) => {
|
|
60
75
|
if (resourceSegments(params.resource).length) return notFound();
|
|
61
76
|
|
|
@@ -95,7 +110,7 @@ export const POST: APIRoute = async ({ request, locals, params }) => {
|
|
|
95
110
|
query,
|
|
96
111
|
model: config.model,
|
|
97
112
|
mode: 'keyword',
|
|
98
|
-
warning:
|
|
113
|
+
warning: `AI search is unavailable because ${providerEnvKey} is not configured.`,
|
|
99
114
|
});
|
|
100
115
|
}
|
|
101
116
|
|
|
@@ -121,6 +136,8 @@ export const POST: APIRoute = async ({ request, locals, params }) => {
|
|
|
121
136
|
chunks,
|
|
122
137
|
digest,
|
|
123
138
|
telemetry: resolveTelemetry(locals),
|
|
139
|
+
call: llm.call,
|
|
140
|
+
stream: llm.stream,
|
|
124
141
|
config: {
|
|
125
142
|
model: config.model,
|
|
126
143
|
maxIterations: config.maxIterations,
|
|
@@ -154,10 +171,12 @@ export const POST: APIRoute = async ({ request, locals, params }) => {
|
|
|
154
171
|
});
|
|
155
172
|
};
|
|
156
173
|
|
|
157
|
-
function readResource(resource: string[], request: Request): Response {
|
|
174
|
+
async function readResource(resource: string[], request: Request): Promise<Response> {
|
|
158
175
|
const [rawRoot, ...rest] = resource;
|
|
159
176
|
const root = decodePathValue(rawRoot).trim();
|
|
160
177
|
|
|
178
|
+
if (root === 'archive' && rest.length === 0) return archiveResponse();
|
|
179
|
+
|
|
161
180
|
if (root === 'glossary') {
|
|
162
181
|
if (!rest.length) return json({ terms: listGlossary(digest) });
|
|
163
182
|
const entry = getGlossaryEntry(digest, rest.join('/'));
|
|
@@ -178,6 +197,103 @@ function readResource(resource: string[], request: Request): Response {
|
|
|
178
197
|
return notFound();
|
|
179
198
|
}
|
|
180
199
|
|
|
200
|
+
async function archiveResponse(): Promise<Response> {
|
|
201
|
+
const tar = writeTar(digestTreeFiles(digest));
|
|
202
|
+
const body = await gzip(tar);
|
|
203
|
+
return new Response(arrayBufferFor(body), { status: 200, headers: archiveHeaders() });
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function archiveHeaders(): HeadersInit {
|
|
207
|
+
return {
|
|
208
|
+
'content-type': 'application/gzip',
|
|
209
|
+
'content-disposition': 'attachment; filename="hev-ask-digest.tar.gz"',
|
|
210
|
+
'cache-control': 'public, max-age=60',
|
|
211
|
+
'x-hev-ask-content-hash': digest.contentHash ?? '',
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
interface ArchiveFile {
|
|
216
|
+
path: string;
|
|
217
|
+
body: string;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function writeTar(files: ArchiveFile[]): Uint8Array {
|
|
221
|
+
const encoder = new TextEncoder();
|
|
222
|
+
const chunks: Uint8Array[] = [];
|
|
223
|
+
for (const file of files) {
|
|
224
|
+
const body = encoder.encode(file.body);
|
|
225
|
+
chunks.push(tarHeader(file.path, body.length));
|
|
226
|
+
chunks.push(body);
|
|
227
|
+
const padding = (512 - (body.length % 512)) % 512;
|
|
228
|
+
if (padding) chunks.push(new Uint8Array(padding));
|
|
229
|
+
}
|
|
230
|
+
chunks.push(new Uint8Array(1024));
|
|
231
|
+
const size = chunks.reduce((total, chunk) => total + chunk.length, 0);
|
|
232
|
+
const out = new Uint8Array(size);
|
|
233
|
+
let offset = 0;
|
|
234
|
+
for (const chunk of chunks) {
|
|
235
|
+
out.set(chunk, offset);
|
|
236
|
+
offset += chunk.length;
|
|
237
|
+
}
|
|
238
|
+
return out;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function tarHeader(filePath: string, size: number): Uint8Array {
|
|
242
|
+
const header = new Uint8Array(512);
|
|
243
|
+
const { name, prefix } = tarNameParts(filePath);
|
|
244
|
+
writeAscii(header, 0, 100, name);
|
|
245
|
+
writeOctal(header, 100, 8, 0o644);
|
|
246
|
+
writeOctal(header, 108, 8, 0);
|
|
247
|
+
writeOctal(header, 116, 8, 0);
|
|
248
|
+
writeOctal(header, 124, 12, size);
|
|
249
|
+
writeOctal(header, 136, 12, 0);
|
|
250
|
+
header.fill(0x20, 148, 156);
|
|
251
|
+
header[156] = '0'.charCodeAt(0);
|
|
252
|
+
writeAscii(header, 257, 6, 'ustar');
|
|
253
|
+
writeAscii(header, 263, 2, '00');
|
|
254
|
+
writeAscii(header, 345, 155, prefix);
|
|
255
|
+
let checksum = 0;
|
|
256
|
+
for (const byte of header) checksum += byte;
|
|
257
|
+
writeOctal(header, 148, 8, checksum);
|
|
258
|
+
return header;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function tarNameParts(filePath: string): { name: string; prefix: string } {
|
|
262
|
+
const normalized = filePath.replace(/^\/+/, '');
|
|
263
|
+
if (normalized.length <= 100) return { name: normalized, prefix: '' };
|
|
264
|
+
const minNameStart = Math.max(0, normalized.length - 100);
|
|
265
|
+
for (let i = minNameStart; i >= 0; i -= 1) {
|
|
266
|
+
if (normalized[i] !== '/') continue;
|
|
267
|
+
const prefix = normalized.slice(0, i);
|
|
268
|
+
const name = normalized.slice(i + 1);
|
|
269
|
+
if (prefix.length <= 155 && name.length <= 100) return { name, prefix };
|
|
270
|
+
}
|
|
271
|
+
throw new Error(`Digest archive path is too long for ustar: ${filePath}`);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function writeAscii(target: Uint8Array, offset: number, length: number, value: string): void {
|
|
275
|
+
const bytes = new TextEncoder().encode(value);
|
|
276
|
+
target.set(bytes.slice(0, length), offset);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function writeOctal(target: Uint8Array, offset: number, length: number, value: number): void {
|
|
280
|
+
const text = value.toString(8).padStart(length - 1, '0').slice(-(length - 1));
|
|
281
|
+
writeAscii(target, offset, length, text);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async function gzip(data: Uint8Array): Promise<Uint8Array> {
|
|
285
|
+
const Compression = (globalThis as { CompressionStream?: new (format: 'gzip') => TransformStream<Uint8Array, Uint8Array> }).CompressionStream;
|
|
286
|
+
if (!Compression) throw new Error('CompressionStream is unavailable in this runtime.');
|
|
287
|
+
const stream = new Blob([arrayBufferFor(data)]).stream().pipeThrough(new Compression('gzip'));
|
|
288
|
+
return new Uint8Array(await new Response(stream).arrayBuffer());
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function arrayBufferFor(data: Uint8Array): ArrayBuffer {
|
|
292
|
+
const copy = new Uint8Array(data.byteLength);
|
|
293
|
+
copy.set(data);
|
|
294
|
+
return copy.buffer;
|
|
295
|
+
}
|
|
296
|
+
|
|
181
297
|
function resourceSegments(value: string | undefined): string[] {
|
|
182
298
|
return (value ?? '')
|
|
183
299
|
.split('/')
|
package/src/index.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
export { default } from './integration';
|
|
2
|
-
export type { HevAskOptions } from './types';
|
|
2
|
+
export type { HevAskOptions, ProviderName } from './types';
|
package/src/integration.ts
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import type { AstroIntegration } from 'astro';
|
|
2
2
|
import { execFile } from 'node:child_process';
|
|
3
|
-
import { readFileSync } from 'node:fs';
|
|
4
|
-
import path from 'node:path';
|
|
5
3
|
import { promisify } from 'node:util';
|
|
6
4
|
import { fileURLToPath } from 'node:url';
|
|
7
|
-
import {
|
|
5
|
+
import { readDigestArtifact } from './digest/tree';
|
|
6
|
+
import { PROVIDERS, resolveProviderName } from './providers';
|
|
8
7
|
import type { HevAskOptions, ResolvedConfig } from './types';
|
|
9
8
|
|
|
10
9
|
const CONFIG_VIRTUAL_ID = 'virtual:hev-ask/config';
|
|
@@ -16,10 +15,13 @@ const execFileAsync = promisify(execFile);
|
|
|
16
15
|
* configuration plus the committed ask digest through virtual modules.
|
|
17
16
|
*/
|
|
18
17
|
export default function hevAsk(options: HevAskOptions = {}): AstroIntegration {
|
|
18
|
+
const provider = resolveProviderName(options.provider);
|
|
19
19
|
const config: ResolvedConfig = {
|
|
20
20
|
collections: options.collections ?? null,
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
provider,
|
|
22
|
+
providerBaseUrl: options.providerBaseUrl,
|
|
23
|
+
model: options.model ?? PROVIDERS[provider].defaultModel,
|
|
24
|
+
digestModel: options.digestModel ?? PROVIDERS[provider].defaultDigestModel,
|
|
23
25
|
endpoint: options.endpoint ?? '/api/ask',
|
|
24
26
|
basePath: options.basePath ?? '/docs/',
|
|
25
27
|
maxResults: options.maxResults ?? 6,
|
|
@@ -28,7 +30,7 @@ export default function hevAsk(options: HevAskOptions = {}): AstroIntegration {
|
|
|
28
30
|
chunkHeadingDepth: options.chunkHeadingDepth ?? 3,
|
|
29
31
|
candidatePerSearch: options.candidatePerSearch ?? 8,
|
|
30
32
|
perDocCap: options.perDocCap ?? 2,
|
|
31
|
-
digestPath: options.digestPath ?? '.hev-ask
|
|
33
|
+
digestPath: options.digestDir ?? options.digestPath ?? '.hev-ask',
|
|
32
34
|
digestContentGlobs: options.digestContentGlobs,
|
|
33
35
|
};
|
|
34
36
|
|
|
@@ -58,9 +60,10 @@ export default function hevAsk(options: HevAskOptions = {}): AstroIntegration {
|
|
|
58
60
|
},
|
|
59
61
|
'astro:build:start': async ({ logger }) => {
|
|
60
62
|
if (!config.collections?.length) return;
|
|
61
|
-
const
|
|
63
|
+
const envKey = PROVIDERS[config.provider].envKey;
|
|
64
|
+
const apiKey = process.env[envKey];
|
|
62
65
|
if (!apiKey) {
|
|
63
|
-
logger.warn(
|
|
66
|
+
logger.warn(`${envKey} is not set; using committed ${config.digestPath} if present.`);
|
|
64
67
|
return;
|
|
65
68
|
}
|
|
66
69
|
|
|
@@ -105,11 +108,7 @@ function virtualDigestPlugin(config: ResolvedConfig, siteRoot: string) {
|
|
|
105
108
|
}
|
|
106
109
|
|
|
107
110
|
function readDigest(siteRoot: string, digestPath: string) {
|
|
108
|
-
|
|
109
|
-
return normalizeDigest(JSON.parse(readFileSync(path.resolve(siteRoot, digestPath), 'utf8')));
|
|
110
|
-
} catch {
|
|
111
|
-
return EMPTY_DIGEST;
|
|
112
|
-
}
|
|
111
|
+
return readDigestArtifact(siteRoot, digestPath);
|
|
113
112
|
}
|
|
114
113
|
|
|
115
114
|
async function runDigestBuild(siteRoot: string, config: ResolvedConfig): Promise<string> {
|
|
@@ -118,7 +117,7 @@ async function runDigestBuild(siteRoot: string, config: ResolvedConfig): Promise
|
|
|
118
117
|
askBin,
|
|
119
118
|
'digest',
|
|
120
119
|
'build',
|
|
121
|
-
'--digest-
|
|
120
|
+
'--digest-dir',
|
|
122
121
|
config.digestPath,
|
|
123
122
|
'--base-path',
|
|
124
123
|
config.basePath,
|
|
@@ -126,7 +125,10 @@ async function runDigestBuild(siteRoot: string, config: ResolvedConfig): Promise
|
|
|
126
125
|
String(config.chunkHeadingDepth),
|
|
127
126
|
'--digest-model',
|
|
128
127
|
config.digestModel,
|
|
128
|
+
'--provider',
|
|
129
|
+
config.provider,
|
|
129
130
|
];
|
|
131
|
+
if (config.providerBaseUrl) args.push('--provider-url', config.providerBaseUrl);
|
|
130
132
|
for (const collection of config.collections ?? []) args.push('--collection', collection);
|
|
131
133
|
for (const glob of config.digestContentGlobs ?? []) args.push('--content-glob', glob);
|
|
132
134
|
|