@hevmind/ask 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -13
- package/openapi.yaml +53 -7
- package/package.json +6 -6
- package/skills/build-digest/SKILL.md +7 -7
- package/src/digest/build.ts +54 -16
- package/src/digest/cli.ts +19 -7
- package/src/digest/frontmatter.ts +7 -0
- package/src/digest/schema.ts +3 -0
- package/src/digest/tree.ts +259 -0
- package/src/digest/verify.ts +2 -11
- package/src/endpoint.ts +121 -5
- package/src/index.ts +1 -1
- package/src/integration.ts +16 -14
- package/src/llm-openai.ts +330 -0
- package/src/observability.ts +3 -1
- package/src/providers.ts +81 -0
- package/src/types.ts +34 -6
package/README.md
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
# @hevmind/ask
|
|
2
2
|
|
|
3
|
-
hev ask is a heading-anchored search overlay for
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
hev ask is a heading-anchored search overlay for docs sites. The digest is built
|
|
4
|
+
from your markdown, not your renderer — **Astro** gets the turnkey integration
|
|
5
|
+
below, and **Docusaurus, VitePress, MkDocs, or any static site** get the same
|
|
6
|
+
overlay as a one-script drop-in (see [Frameworks](https://hevask.com/docs/frameworks)).
|
|
7
|
+
Typing runs instant keyword search; pressing `Enter` runs an optional Claude
|
|
8
|
+
search loop that chooses sub-queries and ranks section results.
|
|
6
9
|
|
|
7
10
|
## Install
|
|
8
11
|
|
|
@@ -36,7 +39,9 @@ export default defineConfig({
|
|
|
36
39
|
| Option | Default | Description |
|
|
37
40
|
| --- | --- | --- |
|
|
38
41
|
| `collections` | - | Content collections to index. |
|
|
39
|
-
| `
|
|
42
|
+
| `provider` | `anthropic` | Inference provider: `anthropic`, `openai`, or `openrouter`. Each reads its own key: `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`. |
|
|
43
|
+
| `providerBaseUrl` | per provider | Base URL override for the OpenAI-compatible providers (any Chat Completions endpoint). |
|
|
44
|
+
| `model` | per provider | Runtime search-loop model; `claude-haiku-4-5` on the default provider. |
|
|
40
45
|
| `endpoint` | `/api/ask` | Injected on-demand route. |
|
|
41
46
|
| `basePath` | `/docs/` | Turns a doc slug into its page URL. |
|
|
42
47
|
| `maxResults` | `6` | Max results returned. |
|
|
@@ -44,8 +49,9 @@ export default defineConfig({
|
|
|
44
49
|
| `chunkHeadingDepth` | `3` | Chunk at `##` through this heading depth. |
|
|
45
50
|
| `candidatePerSearch` | `8` | Chunks returned by each search tool call. |
|
|
46
51
|
| `perDocCap` | `2` | Max chunks per document in one prefilter call. |
|
|
47
|
-
| `digestModel` | `claude-opus-4-8`
|
|
48
|
-
| `
|
|
52
|
+
| `digestModel` | per provider | Offline digest build model; `claude-opus-4-8` on the default provider. |
|
|
53
|
+
| `digestDir` | `.hev-ask` | Committed digest tree directory. |
|
|
54
|
+
| `digestPath` | `.hev-ask` | Deprecated alias for `digestDir`. |
|
|
49
55
|
| `digestContentGlobs` | derived from `collections` | Build-time Markdown/MDX corpus globs. |
|
|
50
56
|
|
|
51
57
|
## Add the overlay
|
|
@@ -71,9 +77,9 @@ ask digest build
|
|
|
71
77
|
ask digest verify
|
|
72
78
|
```
|
|
73
79
|
|
|
74
|
-
The builder writes `.hev-ask
|
|
75
|
-
hash-gated, so unchanged content does not spend another Opus call.
|
|
76
|
-
builds the site and checks that every chunk anchor exists in `dist`.
|
|
80
|
+
The builder writes the `.hev-ask/` markdown tree, which should be committed.
|
|
81
|
+
Builds are hash-gated, so unchanged content does not spend another Opus call.
|
|
82
|
+
`verify` builds the site and checks that every chunk anchor exists in `dist`.
|
|
77
83
|
|
|
78
84
|
hev ask uses `github-slugger` to match Astro heading anchors exactly.
|
|
79
85
|
|
|
@@ -102,12 +108,26 @@ Git `path:/packages/ui` dependency.
|
|
|
102
108
|
Git dependencies are acceptable for local integration while the package is not
|
|
103
109
|
yet published, but they are not the long-term distribution path.
|
|
104
110
|
|
|
111
|
+
## Other frameworks
|
|
112
|
+
|
|
113
|
+
The Astro integration above is the turnkey path. On any other framework you build
|
|
114
|
+
the digest the same way (`ask digest build`), bundle the static assets
|
|
115
|
+
(`ask digest bundle`), and drop in the prebuilt overlay as a `<script>` tag —
|
|
116
|
+
keyword search runs fully static, no server. For agentic answers, deploy the
|
|
117
|
+
standalone endpoint and point the overlay at it. See
|
|
118
|
+
[Frameworks](https://hevask.com/docs/frameworks) for Docusaurus, VitePress,
|
|
119
|
+
MkDocs, and plain-HTML recipes.
|
|
120
|
+
|
|
105
121
|
## Server Requirements
|
|
106
122
|
|
|
107
|
-
-
|
|
108
|
-
|
|
109
|
-
- The
|
|
110
|
-
production
|
|
123
|
+
- Keyword search runs **fully static** — the drop-in overlay reads the committed
|
|
124
|
+
digest in the browser, no server required.
|
|
125
|
+
- The **agentic** path needs a runtime: on Astro, `/api/ask` is rendered on
|
|
126
|
+
demand (so the site needs a server adapter in production); on other frameworks,
|
|
127
|
+
it's the standalone hostable endpoint.
|
|
128
|
+
- Set the provider's API key (`ANTHROPIC_API_KEY` by default) in that server
|
|
129
|
+
environment for AI search and fresh digest generation. Without a runtime key,
|
|
130
|
+
the endpoint still serves keyword results.
|
|
111
131
|
|
|
112
132
|
## Theming
|
|
113
133
|
|
package/openapi.yaml
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
openapi: 3.1.0
|
|
2
2
|
info:
|
|
3
3
|
title: hev ask API
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.1.0
|
|
5
5
|
summary: Search, answer, and digest read API exposed by the @hevmind/ask Astro integration.
|
|
6
6
|
description: |
|
|
7
7
|
`@hevmind/ask` mounts these routes on a consuming Astro site (default base `/api/ask`,
|
|
8
8
|
configurable via the integration's `endpoint` option). Two paths existed in v2:
|
|
9
9
|
keyword + agentic **search** (`POST /api/ask`) and **suggestions** (`GET /api/ask`).
|
|
10
10
|
v3 adds keyless **read** routes over the committed ask digest
|
|
11
|
-
(`/api/ask/glossary`, `/api/ask/sections`, `/api/ask/overview`)
|
|
12
|
-
|
|
13
|
-
directly.
|
|
11
|
+
(`/api/ask/glossary`, `/api/ask/sections`, `/api/ask/overview`) plus
|
|
12
|
+
`/api/ask/archive` for bulk tree hydration, so a coding agent — via the
|
|
13
|
+
`ask` CLI, the MCP server, or a generated client — can query the docs directly.
|
|
14
14
|
|
|
15
15
|
Degradation: with no `ANTHROPIC_API_KEY` configured on the server, `POST /api/ask`
|
|
16
16
|
falls back to keyword mode (HTTP 200 with a `warning`). The read routes never call a
|
|
@@ -18,7 +18,7 @@ info:
|
|
|
18
18
|
license:
|
|
19
19
|
name: MIT
|
|
20
20
|
servers:
|
|
21
|
-
- url: https://
|
|
21
|
+
- url: https://hevask.com
|
|
22
22
|
description: The hev ask docs site (dogfoods @hevmind/ask).
|
|
23
23
|
- url: "{origin}"
|
|
24
24
|
description: Any site running the integration.
|
|
@@ -30,6 +30,8 @@ tags:
|
|
|
30
30
|
description: Keyword and agentic search/answer.
|
|
31
31
|
- name: digest
|
|
32
32
|
description: Keyless reads over the committed ask digest.
|
|
33
|
+
- name: archive
|
|
34
|
+
description: Bulk hydrate transport for the committed `.hev-ask/` digest tree.
|
|
33
35
|
|
|
34
36
|
paths:
|
|
35
37
|
/api/ask:
|
|
@@ -38,7 +40,7 @@ paths:
|
|
|
38
40
|
operationId: getSuggestions
|
|
39
41
|
summary: Suggested questions and active model
|
|
40
42
|
description: |
|
|
41
|
-
Returns the model-authored example questions baked into the committed
|
|
43
|
+
Returns the model-authored example questions baked into the committed digest,
|
|
42
44
|
shown by the overlay on open. Keyless — no model call.
|
|
43
45
|
responses:
|
|
44
46
|
"200":
|
|
@@ -99,7 +101,7 @@ paths:
|
|
|
99
101
|
tags: [digest]
|
|
100
102
|
operationId: listGlossary
|
|
101
103
|
summary: List glossary terms
|
|
102
|
-
description: All glossary entries from the committed
|
|
104
|
+
description: All glossary entries from the committed digest. Keyless.
|
|
103
105
|
responses:
|
|
104
106
|
"200":
|
|
105
107
|
description: The glossary.
|
|
@@ -204,6 +206,50 @@ paths:
|
|
|
204
206
|
overview: { type: string }
|
|
205
207
|
context: { type: string }
|
|
206
208
|
|
|
209
|
+
/api/ask/archive:
|
|
210
|
+
head:
|
|
211
|
+
tags: [archive]
|
|
212
|
+
operationId: headDigestArchive
|
|
213
|
+
summary: Check digest archive freshness
|
|
214
|
+
description: |
|
|
215
|
+
Returns cache headers for the compressed digest tree without the archive body.
|
|
216
|
+
Clients compare `x-hev-ask-content-hash` against their local cache before
|
|
217
|
+
downloading the full archive.
|
|
218
|
+
responses:
|
|
219
|
+
"200":
|
|
220
|
+
description: Digest archive metadata.
|
|
221
|
+
headers:
|
|
222
|
+
x-hev-ask-content-hash:
|
|
223
|
+
schema: { type: string }
|
|
224
|
+
description: Content hash of the committed digest tree.
|
|
225
|
+
cache-control:
|
|
226
|
+
schema: { type: string }
|
|
227
|
+
content-disposition:
|
|
228
|
+
schema: { type: string }
|
|
229
|
+
get:
|
|
230
|
+
tags: [archive]
|
|
231
|
+
operationId: getDigestArchive
|
|
232
|
+
summary: Download the digest tree archive
|
|
233
|
+
description: |
|
|
234
|
+
Returns a gzip-compressed tar archive of the committed `.hev-ask/`
|
|
235
|
+
markdown tree. This is the bulk hydrate path used by `ask mcp --endpoint`.
|
|
236
|
+
responses:
|
|
237
|
+
"200":
|
|
238
|
+
description: Gzip-compressed tar archive of the digest tree.
|
|
239
|
+
headers:
|
|
240
|
+
x-hev-ask-content-hash:
|
|
241
|
+
schema: { type: string }
|
|
242
|
+
description: Content hash of the committed digest tree.
|
|
243
|
+
cache-control:
|
|
244
|
+
schema: { type: string }
|
|
245
|
+
content-disposition:
|
|
246
|
+
schema: { type: string }
|
|
247
|
+
content:
|
|
248
|
+
application/gzip:
|
|
249
|
+
schema:
|
|
250
|
+
type: string
|
|
251
|
+
format: binary
|
|
252
|
+
|
|
207
253
|
components:
|
|
208
254
|
parameters:
|
|
209
255
|
Term:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hevmind/ask",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
|
|
6
6
|
"keywords": [
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
"ask": "./bin/ask.mjs"
|
|
29
29
|
},
|
|
30
30
|
"optionalDependencies": {
|
|
31
|
-
"@hevmind/ask-darwin-arm64": "0.
|
|
32
|
-
"@hevmind/ask-
|
|
33
|
-
"@hevmind/ask-win32-x64": "0.
|
|
34
|
-
"@hevmind/ask-linux-arm64": "0.
|
|
35
|
-
"@hevmind/ask-
|
|
31
|
+
"@hevmind/ask-darwin-arm64": "0.2.0",
|
|
32
|
+
"@hevmind/ask-linux-x64": "0.2.0",
|
|
33
|
+
"@hevmind/ask-win32-x64": "0.2.0",
|
|
34
|
+
"@hevmind/ask-linux-arm64": "0.2.0",
|
|
35
|
+
"@hevmind/ask-darwin-x64": "0.2.0"
|
|
36
36
|
},
|
|
37
37
|
"exports": {
|
|
38
38
|
".": "./src/index.ts",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: build-digest
|
|
3
3
|
description: >-
|
|
4
|
-
Build the @hevmind/ask ask digest (.hev-ask/
|
|
4
|
+
Build the @hevmind/ask ask digest (.hev-ask/ markdown tree) for an Astro docs site
|
|
5
5
|
using your Claude Code subscription instead of an ANTHROPIC_API_KEY. Use when
|
|
6
6
|
the user asks to build, rebuild, or refresh the hev ask digest, knowledge
|
|
7
7
|
graph, KG, or search index, or after docs content changes. Runs `ask digest
|
|
@@ -12,8 +12,8 @@ description: >-
|
|
|
12
12
|
# Build the hev ask digest
|
|
13
13
|
|
|
14
14
|
`@hevmind/ask` searches an Astro docs site. Its agentic loop, keyword ranking, and
|
|
15
|
-
suggested questions are powered by a committed ask digest at
|
|
16
|
-
`.hev-ask
|
|
15
|
+
suggested questions are powered by a committed ask digest tree at
|
|
16
|
+
`.hev-ask/`. Only the **distillation** needs a model — the node
|
|
17
17
|
structure, verbatim facts, overview map, and content hash are computed
|
|
18
18
|
deterministically by the CLI. This skill performs that distillation here, in
|
|
19
19
|
the user's subscription, so it costs **no API tokens on their own key**.
|
|
@@ -29,7 +29,7 @@ Run every command from the **site root** (the directory whose `astro.config.*`
|
|
|
29
29
|
registers `hevAsk()`). Prefer `pnpm exec ask digest …`; fall back to
|
|
30
30
|
`npx -p @hevmind/ask ask digest …` if pnpm isn't used. Pass the same content flags
|
|
31
31
|
the site's integration uses if they differ from the defaults (`--collection`,
|
|
32
|
-
`--base-path`, `--chunk-heading-depth`, `--content-glob`, `--digest-
|
|
32
|
+
`--base-path`, `--chunk-heading-depth`, `--content-glob`, `--digest-dir`);
|
|
33
33
|
they must match across `corpus` and `assemble`.
|
|
34
34
|
|
|
35
35
|
**Never read a shard input file into the orchestrating context** (they hold
|
|
@@ -129,7 +129,7 @@ distillation agents.
|
|
|
129
129
|
```
|
|
130
130
|
|
|
131
131
|
Merges every current shard distillation with the global synthesis, derives
|
|
132
|
-
the deterministic parts, and writes `.hev-ask
|
|
132
|
+
the deterministic parts, and writes the `.hev-ask/` markdown tree. Sections from
|
|
133
133
|
undistilled shards fall back to plain excerpts and are reported — the
|
|
134
134
|
digest stays usable mid-wave, but aim for 0 pending before committing.
|
|
135
135
|
|
|
@@ -148,10 +148,10 @@ distillation agents.
|
|
|
148
148
|
```sh
|
|
149
149
|
rm -f .hev-ask/shards/input-*.json
|
|
150
150
|
git check-ignore -q .hev-ask/shards || echo ".hev-ask/shards/" >> .gitignore
|
|
151
|
-
git add .gitignore .hev-ask
|
|
151
|
+
git add .gitignore .hev-ask
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
Only `.hev-ask
|
|
154
|
+
Only the `.hev-ask/` tree is committed; `.hev-ask/shards/` remains local.
|
|
155
155
|
|
|
156
156
|
## Notes
|
|
157
157
|
|
package/src/digest/build.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import { createHash } from 'node:crypto';
|
|
2
|
-
import { mkdir, readFile, readdir, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { mkdir, readFile, readdir, rm, writeFile } from 'node:fs/promises';
|
|
3
3
|
import path from 'node:path';
|
|
4
|
-
import {
|
|
4
|
+
import { type AnthropicTool } from '../llm.ts';
|
|
5
|
+
import { PROVIDERS, clientFor, resolveProviderName, type ProviderName } from '../providers.ts';
|
|
5
6
|
import { chunkDocument, hashableChunkText, type Chunk, type SourceDocument } from '../search/chunk.ts';
|
|
6
7
|
import { classifyMode, distinctiveTokens, extractFacts } from './facts.ts';
|
|
7
8
|
import { parseFrontmatter } from './frontmatter.ts';
|
|
8
9
|
import { normalizeDigest, type Digest, type DigestNode } from './schema.ts';
|
|
10
|
+
import { digestTreeFiles, readDigestArtifact } from './tree.ts';
|
|
9
11
|
|
|
10
12
|
export interface DigestBuildOptions {
|
|
11
13
|
siteRoot: string;
|
|
@@ -15,6 +17,8 @@ export interface DigestBuildOptions {
|
|
|
15
17
|
digestContentGlobs?: string[];
|
|
16
18
|
chunkHeadingDepth: number;
|
|
17
19
|
digestModel: string;
|
|
20
|
+
provider?: ProviderName;
|
|
21
|
+
providerBaseUrl?: string;
|
|
18
22
|
apiKey?: string;
|
|
19
23
|
}
|
|
20
24
|
|
|
@@ -94,7 +98,7 @@ export interface EmittedDistillation {
|
|
|
94
98
|
export interface DigestInput {
|
|
95
99
|
contentHash: string;
|
|
96
100
|
digestPath: string;
|
|
97
|
-
/** True when the committed digest
|
|
101
|
+
/** True when the committed digest tree already matches this corpus — no rebuild needed. */
|
|
98
102
|
upToDate: boolean;
|
|
99
103
|
sections: Array<{ id: string; url: string; title: string; text: string }>;
|
|
100
104
|
}
|
|
@@ -133,21 +137,22 @@ export function assembleDigest(emitted: EmittedDistillation, corpus: CorpusBuild
|
|
|
133
137
|
export async function buildDigest(options: DigestBuildOptions): Promise<DigestBuildResult> {
|
|
134
138
|
const corpus = await buildCorpus(options);
|
|
135
139
|
const outPath = path.resolve(options.siteRoot, options.digestPath);
|
|
136
|
-
const existing =
|
|
140
|
+
const existing = readExistingDigest(options.siteRoot, options.digestPath);
|
|
137
141
|
// Skip only when the committed artifact is already a current-version digest with
|
|
138
142
|
// nodes built from this exact corpus. A v1 (node-less) artifact always rebuilds.
|
|
139
143
|
if (existing && existing.version === 2 && existing.contentHash === corpus.contentHash && existing.nodes.length > 0) {
|
|
140
144
|
return { status: 'skipped', path: outPath, contentHash: corpus.contentHash, chunks: corpus.chunks.length };
|
|
141
145
|
}
|
|
142
146
|
|
|
143
|
-
const
|
|
144
|
-
|
|
147
|
+
const provider = resolveProviderName(options.provider);
|
|
148
|
+
const apiKey = options.apiKey ?? process.env[PROVIDERS[provider].envKey];
|
|
149
|
+
if (!apiKey) throw new Error(`${PROVIDERS[provider].envKey} is required to build a fresh digest.`);
|
|
145
150
|
|
|
146
151
|
const corpusText = corpusSections(corpus)
|
|
147
152
|
.map((section) => `id: ${section.id}\nurl: ${section.url}\ntitle: ${section.title}\n\n${section.text}`)
|
|
148
153
|
.join('\n\n---\n\n');
|
|
149
154
|
|
|
150
|
-
const response = await
|
|
155
|
+
const response = await clientFor(provider, options.providerBaseUrl).call({
|
|
151
156
|
apiKey,
|
|
152
157
|
model: options.digestModel,
|
|
153
158
|
maxTokens: 8192,
|
|
@@ -203,8 +208,21 @@ export function parseEmittedDigest(input: unknown): EmittedDistillation {
|
|
|
203
208
|
}
|
|
204
209
|
|
|
205
210
|
async function writeGraph(outPath: string, digest: Digest): Promise<void> {
|
|
206
|
-
|
|
207
|
-
|
|
211
|
+
if (path.extname(outPath).toLowerCase() === '.json') {
|
|
212
|
+
await mkdir(path.dirname(outPath), { recursive: true });
|
|
213
|
+
await writeFile(outPath, JSON.stringify(digest, null, 2) + '\n', 'utf8');
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
await mkdir(outPath, { recursive: true });
|
|
218
|
+
const desired = new Set<string>();
|
|
219
|
+
for (const file of digestTreeFiles(digest)) {
|
|
220
|
+
const target = path.join(outPath, file.path);
|
|
221
|
+
await mkdir(path.dirname(target), { recursive: true });
|
|
222
|
+
await writeFile(target, file.body, 'utf8');
|
|
223
|
+
desired.add(file.path);
|
|
224
|
+
}
|
|
225
|
+
await removeOrphanDigestMarkdown(outPath, desired);
|
|
208
226
|
}
|
|
209
227
|
|
|
210
228
|
/**
|
|
@@ -221,7 +239,7 @@ export async function writeCorpusInput(options: {
|
|
|
221
239
|
chunkHeadingDepth: number;
|
|
222
240
|
}): Promise<{ path: string; upToDate: boolean; sections: number }> {
|
|
223
241
|
const corpus = await buildCorpus(options);
|
|
224
|
-
const committed =
|
|
242
|
+
const committed = readExistingDigest(options.siteRoot, options.digestPath);
|
|
225
243
|
const upToDate = Boolean(
|
|
226
244
|
committed && committed.version === 2 && committed.contentHash === corpus.contentHash && committed.nodes.length > 0,
|
|
227
245
|
);
|
|
@@ -282,6 +300,7 @@ export function buildNodes(chunks: Chunk[], summaryById: Map<string, string>): D
|
|
|
282
300
|
group: chunk.group ?? null,
|
|
283
301
|
url: chunk.url,
|
|
284
302
|
summary,
|
|
303
|
+
hash: sectionHash(chunk),
|
|
285
304
|
facts,
|
|
286
305
|
sources: [{ chunkId: chunk.id, url: chunk.url, anchor: chunk.anchorId ?? null }],
|
|
287
306
|
mode: classifyMode(chunk.group),
|
|
@@ -348,12 +367,31 @@ export function sha256(text: string): string {
|
|
|
348
367
|
return createHash('sha256').update(text).digest('hex');
|
|
349
368
|
}
|
|
350
369
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
370
|
+
function sectionHash(chunk: Chunk): string {
|
|
371
|
+
return sha256(`${chunk.id}\n${chunk.text}`);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
function readExistingDigest(siteRoot: string, digestPath: string): Digest | null {
|
|
375
|
+
const digest = readDigestArtifact(siteRoot, digestPath);
|
|
376
|
+
return digest.version === 2 && digest.nodes.length > 0 ? digest : null;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async function removeOrphanDigestMarkdown(root: string, desired: Set<string>, relDir = ''): Promise<void> {
|
|
380
|
+
const dir = path.join(root, relDir);
|
|
381
|
+
const entries = await readdir(dir, { withFileTypes: true }).catch(() => []);
|
|
382
|
+
await Promise.all(
|
|
383
|
+
entries.map(async (entry) => {
|
|
384
|
+
if (entry.isDirectory()) {
|
|
385
|
+
if (entry.name === 'shards') return;
|
|
386
|
+
await removeOrphanDigestMarkdown(root, desired, path.join(relDir, entry.name));
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
const rel = path.join(relDir, entry.name).replace(/\\/g, '/');
|
|
390
|
+
if (path.extname(entry.name).toLowerCase() === '.md' && !desired.has(rel)) {
|
|
391
|
+
await rm(path.join(root, rel), { force: true });
|
|
392
|
+
}
|
|
393
|
+
}),
|
|
394
|
+
);
|
|
357
395
|
}
|
|
358
396
|
|
|
359
397
|
async function resolveContentFiles(
|
package/src/digest/cli.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { PROVIDERS, resolveProviderName } from '../providers.ts';
|
|
2
3
|
import { assembleFromDistillation, buildDigest, writeCorpusInput } from './build.ts';
|
|
3
4
|
import { verifyAnchors } from './verify.ts';
|
|
4
5
|
|
|
@@ -9,6 +10,8 @@ interface Flags {
|
|
|
9
10
|
digestContentGlobs: string[];
|
|
10
11
|
chunkHeadingDepth?: number;
|
|
11
12
|
digestModel?: string;
|
|
13
|
+
provider?: string;
|
|
14
|
+
providerBaseUrl?: string;
|
|
12
15
|
buildCommand?: string;
|
|
13
16
|
skipBuild?: boolean;
|
|
14
17
|
strict?: boolean;
|
|
@@ -21,14 +24,17 @@ const flags = parseFlags(args);
|
|
|
21
24
|
|
|
22
25
|
try {
|
|
23
26
|
if (command === 'build') {
|
|
27
|
+
const provider = resolveProviderName(flags.provider);
|
|
24
28
|
const result = await buildDigest({
|
|
25
29
|
siteRoot: process.cwd(),
|
|
26
30
|
collections: flags.collections.length ? flags.collections : ['docs'],
|
|
27
31
|
basePath: flags.basePath ?? '/docs/',
|
|
28
|
-
digestPath: flags.digestPath ?? '.hev-ask
|
|
32
|
+
digestPath: flags.digestPath ?? '.hev-ask',
|
|
29
33
|
digestContentGlobs: flags.digestContentGlobs.length ? flags.digestContentGlobs : undefined,
|
|
30
34
|
chunkHeadingDepth: flags.chunkHeadingDepth ?? 3,
|
|
31
|
-
digestModel: flags.digestModel ??
|
|
35
|
+
digestModel: flags.digestModel ?? PROVIDERS[provider].defaultDigestModel,
|
|
36
|
+
provider,
|
|
37
|
+
providerBaseUrl: flags.providerBaseUrl,
|
|
32
38
|
});
|
|
33
39
|
console.log(`[hev-ask] digest:${result.status} ${result.path} (${result.chunks} chunks)`);
|
|
34
40
|
} else if (command === 'corpus') {
|
|
@@ -36,7 +42,7 @@ try {
|
|
|
36
42
|
siteRoot: process.cwd(),
|
|
37
43
|
collections: flags.collections.length ? flags.collections : ['docs'],
|
|
38
44
|
basePath: flags.basePath ?? '/docs/',
|
|
39
|
-
digestPath: flags.digestPath ?? '.hev-ask
|
|
45
|
+
digestPath: flags.digestPath ?? '.hev-ask',
|
|
40
46
|
outPath: flags.out ?? '.hev-ask/digest-input.json',
|
|
41
47
|
digestContentGlobs: flags.digestContentGlobs.length ? flags.digestContentGlobs : undefined,
|
|
42
48
|
chunkHeadingDepth: flags.chunkHeadingDepth ?? 3,
|
|
@@ -48,7 +54,7 @@ try {
|
|
|
48
54
|
siteRoot: process.cwd(),
|
|
49
55
|
collections: flags.collections.length ? flags.collections : ['docs'],
|
|
50
56
|
basePath: flags.basePath ?? '/docs/',
|
|
51
|
-
digestPath: flags.digestPath ?? '.hev-ask
|
|
57
|
+
digestPath: flags.digestPath ?? '.hev-ask',
|
|
52
58
|
inputPath: flags.input ?? '.hev-ask/digest-distill.json',
|
|
53
59
|
digestContentGlobs: flags.digestContentGlobs.length ? flags.digestContentGlobs : undefined,
|
|
54
60
|
chunkHeadingDepth: flags.chunkHeadingDepth ?? 3,
|
|
@@ -59,7 +65,7 @@ try {
|
|
|
59
65
|
siteRoot: process.cwd(),
|
|
60
66
|
collections: flags.collections.length ? flags.collections : ['docs'],
|
|
61
67
|
basePath: flags.basePath ?? '/docs/',
|
|
62
|
-
digestPath: flags.digestPath ?? '.hev-ask
|
|
68
|
+
digestPath: flags.digestPath ?? '.hev-ask',
|
|
63
69
|
digestContentGlobs: flags.digestContentGlobs.length ? flags.digestContentGlobs : undefined,
|
|
64
70
|
chunkHeadingDepth: flags.chunkHeadingDepth ?? 3,
|
|
65
71
|
buildCommand: flags.buildCommand,
|
|
@@ -97,7 +103,7 @@ try {
|
|
|
97
103
|
}
|
|
98
104
|
} else {
|
|
99
105
|
console.error(
|
|
100
|
-
'Usage: ask digest build|corpus|assemble|verify [--collection docs] [--base-path /docs/] [--out path] [--input path] [--strict]',
|
|
106
|
+
'Usage: ask digest build|corpus|assemble|verify [--collection docs] [--base-path /docs/] [--digest-dir .hev-ask] [--provider anthropic|openai|openrouter] [--out path] [--input path] [--strict]',
|
|
101
107
|
);
|
|
102
108
|
process.exitCode = 1;
|
|
103
109
|
}
|
|
@@ -117,7 +123,7 @@ function parseFlags(args: string[]): Flags {
|
|
|
117
123
|
} else if (arg === '--base-path' && next) {
|
|
118
124
|
flags.basePath = next;
|
|
119
125
|
i += 1;
|
|
120
|
-
} else if (arg === '--digest-path' && next) {
|
|
126
|
+
} else if ((arg === '--digest-dir' || arg === '--digest-path') && next) {
|
|
121
127
|
flags.digestPath = next;
|
|
122
128
|
i += 1;
|
|
123
129
|
} else if (arg === '--content-glob' && next) {
|
|
@@ -129,6 +135,12 @@ function parseFlags(args: string[]): Flags {
|
|
|
129
135
|
} else if (arg === '--digest-model' && next) {
|
|
130
136
|
flags.digestModel = next;
|
|
131
137
|
i += 1;
|
|
138
|
+
} else if (arg === '--provider' && next) {
|
|
139
|
+
flags.provider = next;
|
|
140
|
+
i += 1;
|
|
141
|
+
} else if (arg === '--provider-url' && next) {
|
|
142
|
+
flags.providerBaseUrl = next;
|
|
143
|
+
i += 1;
|
|
132
144
|
} else if (arg === '--build-command' && next) {
|
|
133
145
|
flags.buildCommand = next;
|
|
134
146
|
i += 1;
|
|
@@ -37,5 +37,12 @@ function parseScalar(value: string): unknown {
|
|
|
37
37
|
if (value === 'false') return false;
|
|
38
38
|
const numberValue = Number(value);
|
|
39
39
|
if (Number.isFinite(numberValue) && /^-?\d+(\.\d+)?$/.test(value)) return numberValue;
|
|
40
|
+
if (value.startsWith('[') || value.startsWith('{') || value === 'null') {
|
|
41
|
+
try {
|
|
42
|
+
return JSON.parse(value);
|
|
43
|
+
} catch {
|
|
44
|
+
// Fall through to the raw scalar.
|
|
45
|
+
}
|
|
46
|
+
}
|
|
40
47
|
return value;
|
|
41
48
|
}
|
package/src/digest/schema.ts
CHANGED
|
@@ -36,6 +36,8 @@ export interface DigestNode {
|
|
|
36
36
|
url: string;
|
|
37
37
|
/** Model-distilled prose. May paraphrase; exact strings live in `facts`. */
|
|
38
38
|
summary: string;
|
|
39
|
+
/** Per-section source hash used by the incremental digest builder. */
|
|
40
|
+
hash?: string;
|
|
39
41
|
/** Deterministically extracted verbatim literals. */
|
|
40
42
|
facts: Fact[];
|
|
41
43
|
/** Provenance — for a section node, its own chunk. */
|
|
@@ -144,6 +146,7 @@ function normalizeNode(value: unknown): DigestNode | null {
|
|
|
144
146
|
group: typeof maybe.group === 'string' ? maybe.group : null,
|
|
145
147
|
url: maybe.url,
|
|
146
148
|
summary: typeof maybe.summary === 'string' ? maybe.summary : '',
|
|
149
|
+
hash: typeof maybe.hash === 'string' ? maybe.hash : undefined,
|
|
147
150
|
facts: Array.isArray(maybe.facts)
|
|
148
151
|
? maybe.facts.map((fact) => normalizeFact(fact)).filter((fact): fact is Fact => fact !== null)
|
|
149
152
|
: [],
|