markdown-lsp 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -4
- package/dist/ai/cache.d.ts +3 -0
- package/dist/ai/cache.d.ts.map +1 -0
- package/dist/ai/cache.js +27 -0
- package/dist/ai/cache.js.map +1 -0
- package/dist/ai/config.d.ts +6 -0
- package/dist/ai/config.d.ts.map +1 -1
- package/dist/ai/config.js +16 -3
- package/dist/ai/config.js.map +1 -1
- package/dist/ai/embeddings.d.ts +9 -2
- package/dist/ai/embeddings.d.ts.map +1 -1
- package/dist/ai/embeddings.js +48 -10
- package/dist/ai/embeddings.js.map +1 -1
- package/dist/ai/gateway.d.ts +3 -0
- package/dist/ai/gateway.d.ts.map +1 -1
- package/dist/ai/gateway.js +17 -8
- package/dist/ai/gateway.js.map +1 -1
- package/dist/cli.js +795 -6
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
[](https://github.com/Docsbook-io/markdown-lsp/blob/main/LICENSE)
|
|
7
7
|
[](https://www.npmjs.com/package/markdown-lsp)
|
|
8
8
|
|
|
9
|
-
CLI and library for querying Markdown documentation graphs. Point it at a folder of `.md` files and get instant full-text search, outline, link analysis,
|
|
9
|
+
CLI and library for querying Markdown documentation graphs. Point it at a folder of `.md` files and get instant full-text search, outline, link analysis, symbol lookup, interactive HTML graphs, and AI-powered semantic search — all as JSON.
|
|
10
10
|
|
|
11
|
-
**Status: v1.
|
|
11
|
+
**Status: v1.2.0. CLI is the default interface. LSP stdio mode available as a subcommand.**
|
|
12
12
|
|
|
13
13
|
---
|
|
14
14
|
|
|
@@ -18,11 +18,40 @@ CLI and library for querying Markdown documentation graphs. Point it at a folder
|
|
|
18
18
|
# List all pages
|
|
19
19
|
npx markdown-lsp workspace-outline ./docs
|
|
20
20
|
|
|
21
|
+
# Heading outline of a page
|
|
22
|
+
npx markdown-lsp outline ./docs introduction.md
|
|
23
|
+
|
|
21
24
|
# Full-text search (natural-language, ranked)
|
|
22
25
|
npx markdown-lsp search-text ./docs "getting started"
|
|
23
26
|
|
|
24
27
|
# Fuzzy heading search
|
|
25
28
|
npx markdown-lsp search-symbols ./docs "auth" --limit 10
|
|
29
|
+
|
|
30
|
+
# Find pages by glob
|
|
31
|
+
npx markdown-lsp search-paths ./docs "ai/*.md"
|
|
32
|
+
|
|
33
|
+
# Backlinks and outgoing links
|
|
34
|
+
npx markdown-lsp links-to ./docs quick-start.md
|
|
35
|
+
npx markdown-lsp links-from ./docs README.md
|
|
36
|
+
|
|
37
|
+
# Resolve a link text / read a section
|
|
38
|
+
npx markdown-lsp resolve-link ./docs README.md "Getting Started"
|
|
39
|
+
npx markdown-lsp get-section ./docs overview.md "quick-links"
|
|
40
|
+
|
|
41
|
+
# Interactive link graph (HTML, JSON, DOT, Mermaid)
|
|
42
|
+
npx markdown-lsp graph ./docs --format html --out graph.html
|
|
43
|
+
npx markdown-lsp graph ./docs --format json --pretty
|
|
44
|
+
npx markdown-lsp graph ./docs --format dot | dot -Tsvg > graph.svg
|
|
45
|
+
npx markdown-lsp graph ./docs --format mermaid
|
|
46
|
+
|
|
47
|
+
# Build embeddings + interactive semantic graph in one command
|
|
48
|
+
OPENROUTER_API_KEY=sk-or-... npx markdown-lsp graph ./docs --format html --semantic --out graph.html
|
|
49
|
+
|
|
50
|
+
# AI semantic search
|
|
51
|
+
OPENROUTER_API_KEY=sk-or-... npx markdown-lsp semantic-search ./docs "how to configure webhooks"
|
|
52
|
+
|
|
53
|
+
# LSP server (for editors)
|
|
54
|
+
npx markdown-lsp lsp --stdio
|
|
26
55
|
```
|
|
27
56
|
|
|
28
57
|
---
|
|
@@ -54,6 +83,8 @@ All subcommands accept a **`--pretty`** flag for indented JSON output (compact b
|
|
|
54
83
|
| `links-from` | `<docs-dir> <page>` | All links originating from `<page>` |
|
|
55
84
|
| `resolve-link` | `<docs-dir> <from-page> <link-text>` | Resolve a specific link text from a page |
|
|
56
85
|
| `get-section` | `<docs-dir> <page> <anchor>` | Get a section by anchor slug |
|
|
86
|
+
| `graph` | `<docs-dir> [--format json\|dot\|mermaid\|html] [--out file] [--semantic] [--sim-threshold n] [--sim-top-k n] [--model m]` | Export doc link graph; `--semantic` adds AI similarity edges |
|
|
87
|
+
| `semantic-search` | `<docs-dir> <query> [--limit n] [--model m]` | AI semantic search via embeddings |
|
|
57
88
|
| `lsp` / `serve` | `[--stdio]` | Start the LSP stdio server |
|
|
58
89
|
|
|
59
90
|
### search-text modes
|
|
@@ -101,6 +132,102 @@ markdown-lsp get-section ./docs overview.md quick-links --pretty
|
|
|
101
132
|
|
|
102
133
|
---
|
|
103
134
|
|
|
135
|
+
## Graph export
|
|
136
|
+
|
|
137
|
+
Export the full page link graph — nodes are pages, edges are markdown links.
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# JSON (nodes + edges — machine-readable)
|
|
141
|
+
markdown-lsp graph ./docs --format json --pretty
|
|
142
|
+
|
|
143
|
+
# Graphviz DOT
|
|
144
|
+
markdown-lsp graph ./docs --format dot > graph.dot
|
|
145
|
+
|
|
146
|
+
# Mermaid flowchart (embed in markdown)
|
|
147
|
+
markdown-lsp graph ./docs --format mermaid
|
|
148
|
+
|
|
149
|
+
# Self-contained interactive HTML with D3 force-directed graph
|
|
150
|
+
# (drag, zoom, hover highlights neighbours, click to inspect side-panel)
|
|
151
|
+
markdown-lsp graph ./docs --format html --out graph.html
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
JSON output shape:
|
|
155
|
+
```json
|
|
156
|
+
{
|
|
157
|
+
"nodes": [{"id": "README.md", "title": "Docsbook", "charCount": 2634, "sectionsCount": 10,
|
|
158
|
+
"sections": [...], "outgoing": [...], "incoming": [...], "topSimilar": []}],
|
|
159
|
+
"edges": [{"source": "README.md", "target": "quick-start.md", "kind": "inline", "label": "Get started"}],
|
|
160
|
+
"semanticEdges": [],
|
|
161
|
+
"unresolvedCount": 3
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Turnkey semantic graph (v1.2)
|
|
168
|
+
|
|
169
|
+
Overlay AI-powered semantic similarity edges on top of the link graph — one command, no pipeline.
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
# Build embeddings + interactive semantic graph in one command
|
|
173
|
+
OPENROUTER_API_KEY=sk-or-... markdown-lsp graph ./docs --format html --semantic --out graph.html
|
|
174
|
+
|
|
175
|
+
# With explicit thresholds
|
|
176
|
+
OPENROUTER_API_KEY=sk-or-... markdown-lsp graph ./docs --format html --semantic \
|
|
177
|
+
--sim-threshold 0.75 --sim-top-k 5 --out graph.html
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**What you get in the HTML:**
|
|
181
|
+
|
|
182
|
+
- Two types of edges — **solid lines** (explicit markdown links) and **dashed amber lines** (semantic similarity)
|
|
183
|
+
- **Checkboxes** in the toolbar to toggle each edge type independently
|
|
184
|
+
- **Click any node** to open a side-panel showing: title, path, sections, outgoing links, incoming links, and top semantically similar pages with scores
|
|
185
|
+
- Clicking a linked page in the side-panel focuses the graph on that node
|
|
186
|
+
- Background click closes the panel and clears selection
|
|
187
|
+
- Drag and zoom preserved from v1.1
|
|
188
|
+
|
|
189
|
+
**Semantic flags:**
|
|
190
|
+
|
|
191
|
+
| Flag | Default | Description |
|
|
192
|
+
|---|---|---|
|
|
193
|
+
| `--semantic` | off | Enable AI similarity edges |
|
|
194
|
+
| `--sim-threshold` | `0.75` | Minimum cosine similarity score to draw an edge |
|
|
195
|
+
| `--sim-top-k` | `5` | Max semantic neighbours per node |
|
|
196
|
+
| `--model` | `openai/text-embedding-3-small` | Embedding model override |
|
|
197
|
+
|
|
198
|
+
**Caching:** embeddings are cached in `.markdown-lsp-cache/embeddings/` — the second run is instant with 0 API calls.
|
|
199
|
+
|
|
200
|
+
**OpenRouter model naming:** when using `OPENROUTER_API_KEY`, the model name requires the `openai/` prefix (e.g. `openai/text-embedding-3-small`). When using `AI_GATEWAY_API_KEY` (Vercel AI Gateway), use the bare name (`text-embedding-3-small`). If the model is rejected, the CLI outputs a clear hint to try the other form.
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Semantic search
|
|
205
|
+
|
|
206
|
+
AI-powered semantic search using text embeddings — finds conceptually related pages even if they
|
|
207
|
+
don't contain the exact query words.
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Requires OPENROUTER_API_KEY (OpenRouter) or AI_GATEWAY_API_KEY (Vercel AI Gateway)
|
|
211
|
+
OPENROUTER_API_KEY=sk-or-... markdown-lsp semantic-search ./docs "how to configure webhooks" --limit 5
|
|
212
|
+
|
|
213
|
+
# Override embedding model
|
|
214
|
+
markdown-lsp semantic-search ./docs "authentication" --model openai/text-embedding-3-small --limit 3
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
- Default embedding model: `openai/text-embedding-3-small` (via OpenRouter — model prefix required)
|
|
218
|
+
- Results cached in `.markdown-lsp-cache/embeddings/` — second run is instant, no API call
|
|
219
|
+
- Returns `[{ pagePath, pageTitle, score, snippet }]` sorted by cosine similarity
|
|
220
|
+
|
|
221
|
+
**Environment variables:**
|
|
222
|
+
|
|
223
|
+
| Variable | Purpose |
|
|
224
|
+
|---|---|
|
|
225
|
+
| `OPENROUTER_API_KEY` | OpenRouter API key (takes priority if set) |
|
|
226
|
+
| `AI_GATEWAY_API_KEY` | Vercel AI Gateway key (fallback) |
|
|
227
|
+
| `EMBEDDING_MODEL` | Override default embedding model |
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
104
231
|
## LSP mode (editor integration)
|
|
105
232
|
|
|
106
233
|
`markdown-lsp` also works as a Language Server Protocol server for editors (VS Code, Zed, Neovim, etc.).
|
|
@@ -174,6 +301,7 @@ export AI_GATEWAY_API_KEY=... # Vercel AI Gateway
|
|
|
174
301
|
|
|
175
302
|
- **CLI** — `node:util parseArgs`, zero extra deps, reads `.md` files into an in-memory graph
|
|
176
303
|
- **Graph** — pure TypeScript, no DB needed; `buildGraph(docsRoot)` walks the directory tree
|
|
304
|
+
- **Semantic graph** — in-memory cosine similarity (N×N in ~5ms); embeddings via OpenRouter or Vercel AI Gateway; disk-cached per sha256(model+text)
|
|
177
305
|
- **LSP** — `vscode-languageserver/node` over stdio; requires Postgres (Drizzle ORM, `mdlsp_` prefix)
|
|
178
306
|
- **AI layer** (opt-in) — pgvector cosine search on canonical-term embeddings; `text-embedding-3-small` via Vercel AI Gateway
|
|
179
307
|
- **Bridge** — pure in-memory search (searchText, searchTextRanked, searchSymbols, searchPaths, listPages)
|
|
@@ -196,8 +324,10 @@ pnpm test
|
|
|
196
324
|
- **M1 — Structural layer** ✅
|
|
197
325
|
- **M2 — Semantic extract** (opt-in, code present, awaiting live AI Gateway credit)
|
|
198
326
|
- **M3 — CLI-first interface** ✅ (v1.0.0)
|
|
199
|
-
- M4 —
|
|
200
|
-
- M5 —
|
|
327
|
+
- **M4 — Graph export + HTML D3 visualisation** ✅ (v1.1.0)
|
|
328
|
+
- **M5 — Turnkey semantic graph (graph --semantic)** ✅ (v1.2.0)
|
|
329
|
+
- M6 — User overrides for the glossary (merge / split / rename / add_synonym)
|
|
330
|
+
- M7 — Docsbook integration
|
|
201
331
|
|
|
202
332
|
---
|
|
203
333
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/ai/cache.ts"],"names":[],"mappings":"AAUA,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,IAAI,CAW5E;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,IAAI,CAK9E"}
|
package/dist/ai/cache.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
const CACHE_DIR = ".markdown-lsp-cache/embeddings";
|
|
5
|
+
function cacheKey(text, model) {
|
|
6
|
+
return createHash("sha256").update(model + "\x00" + text).digest("hex");
|
|
7
|
+
}
|
|
8
|
+
export function cachedEmbedding(text, model) {
|
|
9
|
+
const key = cacheKey(text, model);
|
|
10
|
+
const filePath = join(CACHE_DIR, `${key}.json`);
|
|
11
|
+
if (existsSync(filePath)) {
|
|
12
|
+
try {
|
|
13
|
+
return JSON.parse(readFileSync(filePath, "utf8"));
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
export function saveEmbedding(text, model, vec) {
|
|
22
|
+
mkdirSync(CACHE_DIR, { recursive: true });
|
|
23
|
+
const key = cacheKey(text, model);
|
|
24
|
+
const filePath = join(CACHE_DIR, `${key}.json`);
|
|
25
|
+
writeFileSync(filePath, JSON.stringify(vec), "utf8");
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/ai/cache.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AAC5E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,MAAM,SAAS,GAAG,gCAAgC,CAAA;AAElD,SAAS,QAAQ,CAAC,IAAY,EAAE,KAAa;IAC3C,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;AACzE,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,KAAa;IACzD,MAAM,GAAG,GAAG,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAA;IACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,CAAA;IAC/C,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAa,CAAA;QAC/D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAA;QACb,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,KAAa,EAAE,GAAa;IACtE,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACzC,MAAM,GAAG,GAAG,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAA;IACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,GAAG,OAAO,CAAC,CAAA;IAC/C,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,CAAA;AACtD,CAAC"}
|
package/dist/ai/config.d.ts
CHANGED
|
@@ -3,7 +3,13 @@ export interface AiConfig {
|
|
|
3
3
|
embeddingModel: string;
|
|
4
4
|
extractModel: string;
|
|
5
5
|
hasGatewayKey: boolean;
|
|
6
|
+
hasOpenRouterKey: boolean;
|
|
6
7
|
}
|
|
7
8
|
export declare function getAiConfig(): AiConfig;
|
|
8
9
|
export declare function assertAiEnabled(): void;
|
|
10
|
+
/**
|
|
11
|
+
* Lighter check for CLI commands (semantic-search) that don't need MARKDOWN_LSP_AI_ENABLED.
|
|
12
|
+
* Only checks that at least one API key is present. Exits with a clear message on failure.
|
|
13
|
+
*/
|
|
14
|
+
export declare function assertApiKey(): void;
|
|
9
15
|
//# sourceMappingURL=config.d.ts.map
|
package/dist/ai/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/ai/config.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/ai/config.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,OAAO,CAAA;IAChB,cAAc,EAAE,MAAM,CAAA;IACtB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,OAAO,CAAA;IACtB,gBAAgB,EAAE,OAAO,CAAA;CAC1B;AAED,wBAAgB,WAAW,IAAI,QAAQ,CAStC;AAED,wBAAgB,eAAe,IAAI,IAAI,CAYtC;AAED;;;GAGG;AACH,wBAAgB,YAAY,IAAI,IAAI,CAQnC"}
|
package/dist/ai/config.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
import { DEFAULT_EMBEDDING_MODEL } from "./gateway.js";
|
|
1
2
|
export function getAiConfig() {
|
|
2
3
|
const enabled = process.env.MARKDOWN_LSP_AI_ENABLED === "1" || process.env.MARKDOWN_LSP_AI_ENABLED === "true";
|
|
3
4
|
return {
|
|
4
5
|
enabled,
|
|
5
|
-
embeddingModel: process.env.EMBEDDING_MODEL ??
|
|
6
|
+
embeddingModel: process.env.EMBEDDING_MODEL ?? DEFAULT_EMBEDDING_MODEL,
|
|
6
7
|
extractModel: process.env.EXTRACT_MODEL ?? "gpt-4o-mini",
|
|
7
8
|
hasGatewayKey: Boolean(process.env.AI_GATEWAY_API_KEY),
|
|
9
|
+
hasOpenRouterKey: Boolean(process.env.OPENROUTER_API_KEY),
|
|
8
10
|
};
|
|
9
11
|
}
|
|
10
12
|
export function assertAiEnabled() {
|
|
@@ -12,8 +14,19 @@ export function assertAiEnabled() {
|
|
|
12
14
|
if (!cfg.enabled) {
|
|
13
15
|
throw new Error("AI features are disabled. Set MARKDOWN_LSP_AI_ENABLED=1 to enable embeddings + semantic extraction.");
|
|
14
16
|
}
|
|
15
|
-
if (!cfg.hasGatewayKey) {
|
|
16
|
-
throw new Error("AI features require AI_GATEWAY_API_KEY (Vercel AI Gateway)
|
|
17
|
+
if (!cfg.hasGatewayKey && !cfg.hasOpenRouterKey) {
|
|
18
|
+
throw new Error("AI features require OPENROUTER_API_KEY (OpenRouter) or AI_GATEWAY_API_KEY (Vercel AI Gateway).");
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Lighter check for CLI commands (semantic-search) that don't need MARKDOWN_LSP_AI_ENABLED.
|
|
23
|
+
* Only checks that at least one API key is present. Exits with a clear message on failure.
|
|
24
|
+
*/
|
|
25
|
+
export function assertApiKey() {
|
|
26
|
+
const cfg = getAiConfig();
|
|
27
|
+
if (!cfg.hasGatewayKey && !cfg.hasOpenRouterKey) {
|
|
28
|
+
process.stderr.write("Error: set OPENROUTER_API_KEY (OpenRouter) or AI_GATEWAY_API_KEY (Vercel AI Gateway) to use semantic-search.\n");
|
|
29
|
+
process.exit(1);
|
|
17
30
|
}
|
|
18
31
|
}
|
|
19
32
|
//# sourceMappingURL=config.js.map
|
package/dist/ai/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/ai/config.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/ai/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAA;AAUtD,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,MAAM,CAAA;IAC7G,OAAO;QACL,OAAO;QACP,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,uBAAuB;QACtE,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,aAAa;QACxD,aAAa,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC;QACtD,gBAAgB,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC;KAC1D,CAAA;AACH,CAAC;AAED,MAAM,UAAU,eAAe;IAC7B,MAAM,GAAG,GAAG,WAAW,EAAE,CAAA;IACzB,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CACb,qGAAqG,CACtG,CAAA;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,aAAa,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CACb,gGAAgG,CACjG,CAAA;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY;IAC1B,MAAM,GAAG,GAAG,WAAW,EAAE,CAAA;IACzB,IAAI,CAAC,GAAG,CAAC,aAAa,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QAChD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,gHAAgH,CACjH,CAAA;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;AACH,CAAC"}
|
package/dist/ai/embeddings.d.ts
CHANGED
|
@@ -2,6 +2,13 @@ export interface EmbeddingResult {
|
|
|
2
2
|
vectors: number[][];
|
|
3
3
|
tokensUsed: number;
|
|
4
4
|
}
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
/**
|
|
6
|
+
* Embed multiple texts with disk-cache support.
|
|
7
|
+
* Checks cache per-text before hitting the API; saves results after.
|
|
8
|
+
* @param texts - texts to embed
|
|
9
|
+
* @param modelOverride - override the embedding model (default: EMBEDDING_MODEL env or openai/text-embedding-3-small)
|
|
10
|
+
* @param useCache - whether to use disk cache (default: true)
|
|
11
|
+
*/
|
|
12
|
+
export declare function embedTexts(texts: string[], modelOverride?: string, useCache?: boolean): Promise<EmbeddingResult>;
|
|
13
|
+
export declare function embedOne(text: string, modelOverride?: string): Promise<number[]>;
|
|
7
14
|
//# sourceMappingURL=embeddings.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/ai/embeddings.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/ai/embeddings.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,EAAE,EAAE,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED;;;;;;GAMG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,aAAa,CAAC,EAAE,MAAM,EACtB,QAAQ,UAAO,GACd,OAAO,CAAC,eAAe,CAAC,CAgD1B;AAED,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAGtF"}
|
package/dist/ai/embeddings.js
CHANGED
|
@@ -1,27 +1,65 @@
|
|
|
1
1
|
import { embedMany } from "ai";
|
|
2
2
|
import { getGateway, EMBEDDING_MODEL } from "./gateway.js";
|
|
3
3
|
import { assertAiEnabled } from "./config.js";
|
|
4
|
+
import { cachedEmbedding, saveEmbedding } from "./cache.js";
|
|
4
5
|
const BATCH_SIZE = 96;
|
|
5
6
|
const MAX_INPUT_CHARS = 6000;
|
|
6
|
-
|
|
7
|
+
/**
|
|
8
|
+
* Embed multiple texts with disk-cache support.
|
|
9
|
+
* Checks cache per-text before hitting the API; saves results after.
|
|
10
|
+
* @param texts - texts to embed
|
|
11
|
+
* @param modelOverride - override the embedding model (default: EMBEDDING_MODEL env or openai/text-embedding-3-small)
|
|
12
|
+
* @param useCache - whether to use disk cache (default: true)
|
|
13
|
+
*/
|
|
14
|
+
export async function embedTexts(texts, modelOverride, useCache = true) {
|
|
7
15
|
if (texts.length === 0)
|
|
8
16
|
return { vectors: [], tokensUsed: 0 };
|
|
9
17
|
assertAiEnabled();
|
|
10
18
|
const gw = getGateway();
|
|
11
|
-
const
|
|
12
|
-
const
|
|
19
|
+
const modelName = modelOverride ?? EMBEDDING_MODEL;
|
|
20
|
+
const model = gw.embedding(modelName);
|
|
21
|
+
const out = new Array(texts.length).fill(null);
|
|
13
22
|
let tokensUsed = 0;
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
23
|
+
// Check cache first
|
|
24
|
+
const uncachedIndices = [];
|
|
25
|
+
const uncachedTexts = [];
|
|
26
|
+
if (useCache) {
|
|
27
|
+
for (let i = 0; i < texts.length; i++) {
|
|
28
|
+
const cached = cachedEmbedding(texts[i], modelName);
|
|
29
|
+
if (cached !== null) {
|
|
30
|
+
out[i] = cached;
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
uncachedIndices.push(i);
|
|
34
|
+
uncachedTexts.push(texts[i]);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
for (let i = 0; i < texts.length; i++) {
|
|
40
|
+
uncachedIndices.push(i);
|
|
41
|
+
uncachedTexts.push(texts[i]);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// Embed uncached texts in batches
|
|
45
|
+
for (let b = 0; b < uncachedTexts.length; b += BATCH_SIZE) {
|
|
46
|
+
const batchTexts = uncachedTexts.slice(b, b + BATCH_SIZE).map((t) => t.slice(0, MAX_INPUT_CHARS));
|
|
47
|
+
const batchIndices = uncachedIndices.slice(b, b + BATCH_SIZE);
|
|
48
|
+
const res = await embedMany({ model, values: batchTexts });
|
|
49
|
+
for (let j = 0; j < res.embeddings.length; j++) {
|
|
50
|
+
const vec = res.embeddings[j];
|
|
51
|
+
const originalIndex = batchIndices[j];
|
|
52
|
+
out[originalIndex] = vec;
|
|
53
|
+
if (useCache) {
|
|
54
|
+
saveEmbedding(uncachedTexts[b + j], modelName, vec);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
19
57
|
tokensUsed += res.usage?.tokens ?? 0;
|
|
20
58
|
}
|
|
21
59
|
return { vectors: out, tokensUsed };
|
|
22
60
|
}
|
|
23
|
-
export async function embedOne(text) {
|
|
24
|
-
const { vectors } = await embedTexts([text]);
|
|
61
|
+
export async function embedOne(text, modelOverride) {
|
|
62
|
+
const { vectors } = await embedTexts([text], modelOverride);
|
|
25
63
|
return vectors[0];
|
|
26
64
|
}
|
|
27
65
|
//# sourceMappingURL=embeddings.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/ai/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAA;AAC9B,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;
|
|
1
|
+
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/ai/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAA;AAC9B,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAC7C,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAE3D,MAAM,UAAU,GAAG,EAAE,CAAA;AACrB,MAAM,eAAe,GAAG,IAAI,CAAA;AAO5B;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAe,EACf,aAAsB,EACtB,QAAQ,GAAG,IAAI;IAEf,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,CAAA;IAC7D,eAAe,EAAE,CAAA;IACjB,MAAM,EAAE,GAAG,UAAU,EAAE,CAAA;IACvB,MAAM,SAAS,GAAG,aAAa,IAAI,eAAe,CAAA;IAClD,MAAM,KAAK,GAAG,EAAE,CAAC,SAAS,CAAC,SAAS,CAAC,CAAA;IAErC,MAAM,GAAG,GAAwB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACnE,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,oBAAoB;IACpB,MAAM,eAAe,GAAa,EAAE,CAAA;IACpC,MAAM,aAAa,GAAa,EAAE,CAAA;IAElC,IAAI,QAAQ,EAAE,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,CAAE,EAAE,SAAS,CAAC,CAAA;YACpD,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACpB,GAAG,CAAC,CAAC,CAAC,GAAG,MAAM,CAAA;YACjB,CAAC;iBAAM,CAAC;gBACN,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACvB,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,CAAA;YAC/B,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACvB,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,CAAA;QAC/B,CAAC;IACH,CAAC;IAED,kCAAkC;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QAC1D,MAAM,UAAU,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC,CAAA;QACjG,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAA;QAC7D,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAA;QAC1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAa,CAAA;YACzC,MAAM,aAAa,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YACtC,GAAG,CAAC,aAAa,CAAC,GAAG,GAAG,CAAA;YACxB,IAAI,QAAQ,EAAE,CAAC;gBACb,aAAa,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,CAAE,EAAE,SAAS,EAAE,GAAG,CAAC,CAAA;YACtD,CAAC;QACH,CAAC;QACD,UAAU,IAAI,GAAG,CAAC,KAAK,EAAE,MAAM,IAAI,CAAC,CAAA;IACtC,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,GAAiB,EAAE,UAAU,EAAE,CAAA;AACnD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAY,EAAE,aAAsB;IACjE,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,UAAU,CAAC,CAAC,IAAI,CAAC,EAAE,aAAa,CAAC,CAAA;IAC3D,OAAO,OAAO,CAAC,CAAC,CAAE,CAAA;AACpB,CAAC"}
|
package/dist/ai/gateway.d.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
export declare const DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small";
|
|
1
2
|
export declare const EMBEDDING_MODEL: string;
|
|
2
3
|
export declare const EMBEDDING_DIM = 1536;
|
|
3
4
|
export declare const EXTRACT_MODEL: string;
|
|
4
5
|
export declare function getGateway(): import("@ai-sdk/openai").OpenAIProvider;
|
|
6
|
+
/** Reset the cached gateway (useful for tests or when env changes mid-process) */
|
|
7
|
+
export declare function resetGateway(): void;
|
|
5
8
|
//# sourceMappingURL=gateway.d.ts.map
|
package/dist/ai/gateway.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gateway.d.ts","sourceRoot":"","sources":["../../src/ai/gateway.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,eAAe,
|
|
1
|
+
{"version":3,"file":"gateway.d.ts","sourceRoot":"","sources":["../../src/ai/gateway.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,uBAAuB,kCAAkC,CAAA;AACtE,eAAO,MAAM,eAAe,QAAyD,CAAA;AACrF,eAAO,MAAM,aAAa,OAAO,CAAA;AACjC,eAAO,MAAM,aAAa,QAA6C,CAAA;AAIvE,wBAAgB,UAAU,4CAoBzB;AAED,kFAAkF;AAClF,wBAAgB,YAAY,IAAI,IAAI,CAEnC"}
|
package/dist/ai/gateway.js
CHANGED
|
@@ -1,18 +1,27 @@
|
|
|
1
1
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
|
-
export const
|
|
2
|
+
export const DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small";
|
|
3
|
+
export const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL ?? DEFAULT_EMBEDDING_MODEL;
|
|
3
4
|
export const EMBEDDING_DIM = 1536;
|
|
4
5
|
export const EXTRACT_MODEL = process.env.EXTRACT_MODEL ?? "gpt-4o-mini";
|
|
5
6
|
let _gw = null;
|
|
6
7
|
export function getGateway() {
|
|
7
8
|
if (_gw)
|
|
8
9
|
return _gw;
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
}
|
|
10
|
+
// OpenRouter takes priority if OPENROUTER_API_KEY is set
|
|
11
|
+
const openrouterKey = process.env.OPENROUTER_API_KEY;
|
|
12
|
+
const gatewayKey = process.env.AI_GATEWAY_API_KEY;
|
|
13
|
+
const apiKey = openrouterKey ?? gatewayKey;
|
|
14
|
+
if (!apiKey) {
|
|
15
|
+
throw new Error("Set OPENROUTER_API_KEY (OpenRouter) or AI_GATEWAY_API_KEY (Vercel AI Gateway) to use AI features.");
|
|
16
|
+
}
|
|
17
|
+
const baseURL = openrouterKey
|
|
18
|
+
? (process.env.AI_GATEWAY_BASE_URL ?? "https://openrouter.ai/api/v1")
|
|
19
|
+
: (process.env.AI_GATEWAY_BASE_URL ?? "https://ai-gateway.vercel.sh/v1");
|
|
20
|
+
_gw = createOpenAI({ apiKey, baseURL });
|
|
16
21
|
return _gw;
|
|
17
22
|
}
|
|
23
|
+
/** Reset the cached gateway (useful for tests or when env changes mid-process) */
|
|
24
|
+
export function resetGateway() {
|
|
25
|
+
_gw = null;
|
|
26
|
+
}
|
|
18
27
|
//# sourceMappingURL=gateway.js.map
|
package/dist/ai/gateway.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gateway.js","sourceRoot":"","sources":["../../src/ai/gateway.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAE7C,MAAM,CAAC,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,
|
|
1
|
+
{"version":3,"file":"gateway.js","sourceRoot":"","sources":["../../src/ai/gateway.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAE7C,MAAM,CAAC,MAAM,uBAAuB,GAAG,+BAA+B,CAAA;AACtE,MAAM,CAAC,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,uBAAuB,CAAA;AACrF,MAAM,CAAC,MAAM,aAAa,GAAG,IAAI,CAAA;AACjC,MAAM,CAAC,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,aAAa,CAAA;AAEvE,IAAI,GAAG,GAA2C,IAAI,CAAA;AAEtD,MAAM,UAAU,UAAU;IACxB,IAAI,GAAG;QAAE,OAAO,GAAG,CAAA;IAEnB,yDAAyD;IACzD,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAA;IACpD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAA;IACjD,MAAM,MAAM,GAAG,aAAa,IAAI,UAAU,CAAA;IAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAA;IACH,CAAC;IAED,MAAM,OAAO,GAAG,aAAa;QAC3B,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,8BAA8B,CAAC;QACrE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,iCAAiC,CAAC,CAAA;IAE1E,GAAG,GAAG,YAAY,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAA;IACvC,OAAO,GAAG,CAAA;AACZ,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,YAAY;IAC1B,GAAG,GAAG,IAAI,CAAA;AACZ,CAAC"}
|