@devp0nt/doc0 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +272 -0
- package/dist/cache.d.ts +13 -0
- package/dist/cache.js +50 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.js +136 -0
- package/dist/docs.d.ts +42 -0
- package/dist/docs.js +100 -0
- package/dist/export.d.ts +29 -0
- package/dist/export.js +76 -0
- package/dist/generate.d.ts +7 -0
- package/dist/generate.js +12 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.js +160 -0
- package/dist/load.d.ts +10 -0
- package/dist/load.js +17 -0
- package/dist/mcp/index.d.ts +13 -0
- package/dist/mcp/index.js +82 -0
- package/dist/mcp/tools.d.ts +39 -0
- package/dist/mcp/tools.js +72 -0
- package/dist/normalize.d.ts +21 -0
- package/dist/normalize.js +25 -0
- package/dist/parsers/code.d.ts +7 -0
- package/dist/parsers/code.js +73 -0
- package/dist/parsers/index.d.ts +11 -0
- package/dist/parsers/index.js +15 -0
- package/dist/parsers/markdown.d.ts +7 -0
- package/dist/parsers/markdown.js +36 -0
- package/dist/search/embedder.d.ts +12 -0
- package/dist/search/embedder.js +30 -0
- package/dist/search/index.d.ts +23 -0
- package/dist/search/index.js +80 -0
- package/dist/select.d.ts +19 -0
- package/dist/select.js +25 -0
- package/dist/types.d.ts +147 -0
- package/dist/types.js +1 -0
- package/dist/utils.d.ts +27 -0
- package/dist/utils.js +74 -0
- package/dist/web/index.d.ts +145 -0
- package/dist/web/index.js +43 -0
- package/dist/web/render.d.ts +12 -0
- package/dist/web/render.js +129 -0
- package/package.json +164 -3
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { asStringArray, defaultIdFromFile, parseRelated } from "../utils.js";
|
|
2
|
+
import { buildDoc } from "../normalize.js";
|
|
3
|
+
import { parse } from "comment-parser";
|
|
4
|
+
//#region src/parsers/code.ts
|
|
5
|
+
const DOC0_TAGS = new Set([
|
|
6
|
+
"id",
|
|
7
|
+
"title",
|
|
8
|
+
"description",
|
|
9
|
+
"tags",
|
|
10
|
+
"category",
|
|
11
|
+
"related",
|
|
12
|
+
"doc"
|
|
13
|
+
]);
|
|
14
|
+
/** Find the identifier declared on the first meaningful line after a comment block. */
|
|
15
|
+
const nextSymbol = (lines, afterLine) => {
|
|
16
|
+
for (let i = afterLine + 1; i < lines.length; i++) {
|
|
17
|
+
const line = lines[i]?.trim() ?? "";
|
|
18
|
+
if (!line) continue;
|
|
19
|
+
const declared = line.match(/(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:const|let|var|function|class|interface|type|enum)\s+([A-Za-z_$][\w$]*)/);
|
|
20
|
+
const assigned = line.match(/^([A-Za-z_$][\w$]*)\s*[:=(]/);
|
|
21
|
+
return (declared ?? assigned)?.[1];
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
const parse$1 = ({ file, content }) => {
|
|
25
|
+
const blocks = parse(content, { spacing: "preserve" });
|
|
26
|
+
const lines = content.split("\n");
|
|
27
|
+
const docs = [];
|
|
28
|
+
for (const block of blocks) {
|
|
29
|
+
if (!block.tags.some((tag) => DOC0_TAGS.has(tag.tag))) continue;
|
|
30
|
+
const value = (name) => {
|
|
31
|
+
const tag = block.tags.find((candidate) => candidate.tag === name);
|
|
32
|
+
if (!tag) return;
|
|
33
|
+
const text = [tag.name, tag.description].filter(Boolean).join(" ").trim();
|
|
34
|
+
return text.length > 0 ? text : void 0;
|
|
35
|
+
};
|
|
36
|
+
const startLine = (block.source[0]?.number ?? 0) + 1;
|
|
37
|
+
const endLine = (block.source.at(-1)?.number ?? 0) + 1;
|
|
38
|
+
const body = block.description.trim();
|
|
39
|
+
docs.push(buildDoc({
|
|
40
|
+
source: {
|
|
41
|
+
file,
|
|
42
|
+
type: "jsdoc",
|
|
43
|
+
lineStart: startLine,
|
|
44
|
+
lineEnd: endLine
|
|
45
|
+
},
|
|
46
|
+
id: value("id") ?? nextSymbol(lines, endLine - 1) ?? defaultIdFromFile(file),
|
|
47
|
+
title: value("title"),
|
|
48
|
+
description: value("description"),
|
|
49
|
+
tags: asStringArray(value("tags")),
|
|
50
|
+
category: asStringArray(value("category")),
|
|
51
|
+
related: parseRelated(value("related")),
|
|
52
|
+
content: body
|
|
53
|
+
}));
|
|
54
|
+
}
|
|
55
|
+
return docs;
|
|
56
|
+
};
|
|
57
|
+
/** Parses JSDoc in code (`.ts`, `.tsx`, `.js`, …); a block is a doc iff it has a doc0 directive. */
|
|
58
|
+
const codeParser = {
|
|
59
|
+
name: "code",
|
|
60
|
+
extensions: [
|
|
61
|
+
"ts",
|
|
62
|
+
"tsx",
|
|
63
|
+
"mts",
|
|
64
|
+
"cts",
|
|
65
|
+
"js",
|
|
66
|
+
"jsx",
|
|
67
|
+
"mjs",
|
|
68
|
+
"cjs"
|
|
69
|
+
],
|
|
70
|
+
parse: parse$1
|
|
71
|
+
};
|
|
72
|
+
//#endregion
|
|
73
|
+
export { codeParser };
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Doc, Parser, ParserInput } from "../types.js";
|
|
2
|
+
import { codeParser } from "./code.js";
|
|
3
|
+
import { markdownParser } from "./markdown.js";
|
|
4
|
+
|
|
5
|
+
//#region src/parsers/index.d.ts
|
|
6
|
+
/** The parsers used when `Doc0.create` is given no `parsers`. */
|
|
7
|
+
declare const defaultParsers: Parser[];
|
|
8
|
+
/** Run the first parser that claims the file's extension. No match → no docs. */
|
|
9
|
+
declare const parseFile: (input: ParserInput, parsers: Parser[]) => Doc[] | Promise<Doc[]>;
|
|
10
|
+
//#endregion
|
|
11
|
+
export { codeParser, defaultParsers, markdownParser, parseFile };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { codeParser } from "./code.js";
|
|
2
|
+
import { markdownParser } from "./markdown.js";
|
|
3
|
+
import { extname } from "node:path";
|
|
4
|
+
//#region src/parsers/index.ts
|
|
5
|
+
/** The parsers used when `Doc0.create` is given no `parsers`. */
|
|
6
|
+
const defaultParsers = [markdownParser, codeParser];
|
|
7
|
+
/** Run the first parser that claims the file's extension. No match → no docs. */
|
|
8
|
+
const parseFile = (input, parsers) => {
|
|
9
|
+
const ext = extname(input.file).slice(1).toLowerCase();
|
|
10
|
+
const parser = parsers.find((candidate) => candidate.extensions.includes(ext));
|
|
11
|
+
if (!parser) return [];
|
|
12
|
+
return parser.parse(input);
|
|
13
|
+
};
|
|
14
|
+
//#endregion
|
|
15
|
+
export { codeParser, defaultParsers, markdownParser, parseFile };
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { asString, asStringArray, parseRelated } from "../utils.js";
|
|
2
|
+
import { buildDoc } from "../normalize.js";
|
|
3
|
+
import matter from "gray-matter";
|
|
4
|
+
//#region src/parsers/markdown.ts
|
|
5
|
+
const parse = ({ file, content }) => {
|
|
6
|
+
const parsed = matter(content);
|
|
7
|
+
const data = parsed.data;
|
|
8
|
+
if (data.doc === false) return [];
|
|
9
|
+
return [buildDoc({
|
|
10
|
+
source: {
|
|
11
|
+
file,
|
|
12
|
+
type: "md",
|
|
13
|
+
lineStart: 1,
|
|
14
|
+
lineEnd: content.split("\n").length
|
|
15
|
+
},
|
|
16
|
+
id: asString(data.id),
|
|
17
|
+
title: asString(data.title),
|
|
18
|
+
description: asString(data.description),
|
|
19
|
+
tags: asStringArray(data.tags),
|
|
20
|
+
category: asStringArray(data.category),
|
|
21
|
+
related: parseRelated(data.related),
|
|
22
|
+
content: parsed.content.trim()
|
|
23
|
+
})];
|
|
24
|
+
};
|
|
25
|
+
/** Parses Markdown (`.md`, `.mdx`, `.mdc`): frontmatter → fields, body → content. */
|
|
26
|
+
const markdownParser = {
|
|
27
|
+
name: "markdown",
|
|
28
|
+
extensions: [
|
|
29
|
+
"md",
|
|
30
|
+
"mdx",
|
|
31
|
+
"mdc"
|
|
32
|
+
],
|
|
33
|
+
parse
|
|
34
|
+
};
|
|
35
|
+
//#endregion
|
|
36
|
+
export { markdownParser };
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
//#region src/search/embedder.d.ts
|
|
2
|
+
/** Small, fast sentence-embedding model: 384-dim, ~23MB, runs locally with no API key. */
|
|
3
|
+
declare const EMBED_MODEL = "Xenova/all-MiniLM-L6-v2";
|
|
4
|
+
declare const EMBED_DIM = 384;
|
|
5
|
+
type Embed = (text: string) => Promise<number[]>;
|
|
6
|
+
/**
|
|
7
|
+
* The default embedder. The model is downloaded once into the shared Hugging Face cache (`~/.cache/huggingface`) and
|
|
8
|
+
* reused across projects.
|
|
9
|
+
*/
|
|
10
|
+
declare const createEmbedder: (model?: string) => Embed;
|
|
11
|
+
//#endregion
|
|
12
|
+
export { EMBED_DIM, EMBED_MODEL, Embed, createEmbedder };
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
//#region src/search/embedder.ts
|
|
2
|
+
/** Small, fast sentence-embedding model: 384-dim, ~23MB, runs locally with no API key. */
|
|
3
|
+
const EMBED_MODEL = "Xenova/all-MiniLM-L6-v2";
|
|
4
|
+
const EMBED_DIM = 384;
|
|
5
|
+
const extractors = /* @__PURE__ */ new Map();
|
|
6
|
+
const getExtractor = async (model) => {
|
|
7
|
+
let extractor = extractors.get(model);
|
|
8
|
+
if (!extractor) {
|
|
9
|
+
extractor = (await import("@huggingface/transformers").catch(() => {
|
|
10
|
+
throw new Error("doc0: semantic search needs the optional \"@huggingface/transformers\" package. Install it (e.g. `bun add -d @huggingface/transformers`), or use the default keyword search.");
|
|
11
|
+
})).pipeline("feature-extraction", model);
|
|
12
|
+
extractors.set(model, extractor);
|
|
13
|
+
}
|
|
14
|
+
return extractor;
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* The default embedder. The model is downloaded once into the shared Hugging Face cache (`~/.cache/huggingface`) and
|
|
18
|
+
* reused across projects.
|
|
19
|
+
*/
|
|
20
|
+
const createEmbedder = (model = EMBED_MODEL) => {
|
|
21
|
+
return async (text) => {
|
|
22
|
+
const output = await (await getExtractor(model))(text, {
|
|
23
|
+
pooling: "mean",
|
|
24
|
+
normalize: true
|
|
25
|
+
});
|
|
26
|
+
return Array.from(output.data, Number);
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
//#endregion
|
|
30
|
+
export { EMBED_DIM, EMBED_MODEL, createEmbedder };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { Cache, SearchEngine } from "../types.js";
|
|
2
|
+
import { EMBED_DIM, EMBED_MODEL, Embed, createEmbedder } from "./embedder.js";
|
|
3
|
+
|
|
4
|
+
//#region src/search/index.d.ts
|
|
5
|
+
type OramaSearchOptions = {
|
|
6
|
+
/**
|
|
7
|
+
* Turn on semantic (hybrid keyword + vector) search via local-model embeddings. Lazy-loads
|
|
8
|
+
* `@huggingface/transformers` on first use. Default `false` — keyword/BM25 only, zero extra install. Passing `embed`
|
|
9
|
+
* implies `true`.
|
|
10
|
+
*/
|
|
11
|
+
embeddings?: boolean; /** Custom embedder (e.g. an API provider, or a stub in tests). Implies `embeddings: true`. */
|
|
12
|
+
embed?: Embed; /** Embedding model id (default `Xenova/all-MiniLM-L6-v2`). */
|
|
13
|
+
model?: string; /** Where to cache computed vectors (default `node_modules/.cache/doc0`). */
|
|
14
|
+
cache?: Cache;
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* An orama-backed search engine. Keyword/BM25 by default — this is the engine `doc0` uses out of the box. Set
|
|
18
|
+
* `embeddings: true` (or pass `embed`) to upgrade to hybrid keyword + semantic search with local-model vectors cached
|
|
19
|
+
* by content hash. Plug a custom config in via `Doc0.create({ search })`.
|
|
20
|
+
*/
|
|
21
|
+
declare const oramaSearch: (options?: OramaSearchOptions) => SearchEngine;
|
|
22
|
+
//#endregion
|
|
23
|
+
export { EMBED_DIM, EMBED_MODEL, type Embed, OramaSearchOptions, createEmbedder, oramaSearch };
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { fileCache, hash } from "../cache.js";
|
|
2
|
+
import { EMBED_DIM, EMBED_MODEL, createEmbedder } from "./embedder.js";
|
|
3
|
+
import { create, insertMultiple, search } from "@orama/orama";
|
|
4
|
+
//#region src/search/index.ts
|
|
5
|
+
const BASE_SCHEMA = {
|
|
6
|
+
docId: "string",
|
|
7
|
+
title: "string",
|
|
8
|
+
description: "string",
|
|
9
|
+
content: "string",
|
|
10
|
+
tags: "string[]"
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* An orama-backed search engine. Keyword/BM25 by default — this is the engine `doc0` uses out of the box. Set
|
|
14
|
+
* `embeddings: true` (or pass `embed`) to upgrade to hybrid keyword + semantic search with local-model vectors cached
|
|
15
|
+
* by content hash. Plug a custom config in via `Doc0.create({ search })`.
|
|
16
|
+
*/
|
|
17
|
+
const oramaSearch = (options = {}) => {
|
|
18
|
+
const useEmbeddings = options.embeddings === true || options.embed !== void 0;
|
|
19
|
+
const model = options.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
20
|
+
const cache = options.cache ?? fileCache();
|
|
21
|
+
const embed = useEmbeddings ? options.embed ?? createEmbedder(model) : void 0;
|
|
22
|
+
const embedCached = async (content) => {
|
|
23
|
+
const key = `embed:${model}:${hash(content)}`;
|
|
24
|
+
const cached = await cache.get(key);
|
|
25
|
+
if (cached) return cached;
|
|
26
|
+
const vector = await embed(content);
|
|
27
|
+
await cache.set(key, vector);
|
|
28
|
+
return vector;
|
|
29
|
+
};
|
|
30
|
+
const makeDb = () => create({ schema: useEmbeddings ? {
|
|
31
|
+
...BASE_SCHEMA,
|
|
32
|
+
embedding: "vector[384]"
|
|
33
|
+
} : BASE_SCHEMA });
|
|
34
|
+
let db;
|
|
35
|
+
const byId = /* @__PURE__ */ new Map();
|
|
36
|
+
return {
|
|
37
|
+
index: async (docs) => {
|
|
38
|
+
db = makeDb();
|
|
39
|
+
byId.clear();
|
|
40
|
+
const records = [];
|
|
41
|
+
for (const doc of docs) {
|
|
42
|
+
byId.set(doc.id, doc);
|
|
43
|
+
const record = {
|
|
44
|
+
docId: doc.id,
|
|
45
|
+
title: doc.title,
|
|
46
|
+
description: doc.description,
|
|
47
|
+
content: doc.content,
|
|
48
|
+
tags: doc.tags
|
|
49
|
+
};
|
|
50
|
+
if (useEmbeddings) record.embedding = await embedCached(`${doc.title}\n${doc.description}\n${doc.content}`);
|
|
51
|
+
records.push(record);
|
|
52
|
+
}
|
|
53
|
+
await insertMultiple(db, records);
|
|
54
|
+
},
|
|
55
|
+
search: async (query, searchOptions) => {
|
|
56
|
+
if (!db) return [];
|
|
57
|
+
const limit = searchOptions?.limit ?? 10;
|
|
58
|
+
const results = useEmbeddings && embed ? await search(db, {
|
|
59
|
+
mode: "hybrid",
|
|
60
|
+
term: query,
|
|
61
|
+
vector: {
|
|
62
|
+
value: await embed(query),
|
|
63
|
+
property: "embedding"
|
|
64
|
+
},
|
|
65
|
+
limit
|
|
66
|
+
}) : await search(db, {
|
|
67
|
+
term: query,
|
|
68
|
+
limit
|
|
69
|
+
});
|
|
70
|
+
const hits = [];
|
|
71
|
+
for (const hit of results.hits) {
|
|
72
|
+
const doc = byId.get(hit.document.docId);
|
|
73
|
+
if (doc) hits.push(doc);
|
|
74
|
+
}
|
|
75
|
+
return hits;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
};
|
|
79
|
+
//#endregion
|
|
80
|
+
export { EMBED_DIM, EMBED_MODEL, createEmbedder, oramaSearch };
|
package/dist/select.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Doc, RelatedRef } from "./types.js";
|
|
2
|
+
|
|
3
|
+
//#region src/select.d.ts
|
|
4
|
+
/** A selectable field on a {@link Doc}. `source` carries the file path + line span. */
|
|
5
|
+
type DocField = 'id' | 'title' | 'description' | 'tags' | 'category' | 'related' | 'content' | 'source';
|
|
6
|
+
/**
|
|
7
|
+
* A projected doc for output (MCP/CLI/web): a subset of {@link Doc} fields, with `related` upgraded from stored links to
|
|
8
|
+
* enriched {@link RelatedRef}s (target title/description resolved). This is what the tool helpers return.
|
|
9
|
+
*/
|
|
10
|
+
type DocView = Omit<Partial<Doc>, 'related'> & {
|
|
11
|
+
related?: RelatedRef[];
|
|
12
|
+
};
|
|
13
|
+
declare const ALL_FIELDS: readonly DocField[];
|
|
14
|
+
/** Pick a subset of a doc's fields — keeps MCP/CLI responses lean and lets callers ask for `source`. */
|
|
15
|
+
declare const selectFields: (doc: Doc, fields: readonly DocField[]) => Partial<Doc>;
|
|
16
|
+
/** Parse a comma list / array of field names, keeping only valid {@link DocField}s (else undefined). */
|
|
17
|
+
declare const parseFields: (input?: string | readonly string[]) => DocField[] | undefined;
|
|
18
|
+
//#endregion
|
|
19
|
+
export { ALL_FIELDS, DocField, DocView, parseFields, selectFields };
|
package/dist/select.js
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
//#region src/select.ts
|
|
2
|
+
const ALL_FIELDS = [
|
|
3
|
+
"id",
|
|
4
|
+
"title",
|
|
5
|
+
"description",
|
|
6
|
+
"tags",
|
|
7
|
+
"category",
|
|
8
|
+
"related",
|
|
9
|
+
"content",
|
|
10
|
+
"source"
|
|
11
|
+
];
|
|
12
|
+
/** Pick a subset of a doc's fields — keeps MCP/CLI responses lean and lets callers ask for `source`. */
|
|
13
|
+
const selectFields = (doc, fields) => {
|
|
14
|
+
const result = {};
|
|
15
|
+
for (const field of fields) result[field] = doc[field];
|
|
16
|
+
return result;
|
|
17
|
+
};
|
|
18
|
+
/** Parse a comma list / array of field names, keeping only valid {@link DocField}s (else undefined). */
|
|
19
|
+
const parseFields = (input) => {
|
|
20
|
+
if (!input) return;
|
|
21
|
+
const valid = (Array.isArray(input) ? input : String(input).split(",")).map((name) => name.trim()).filter((name) => ALL_FIELDS.includes(name));
|
|
22
|
+
return valid.length > 0 ? valid : void 0;
|
|
23
|
+
};
|
|
24
|
+
//#endregion
|
|
25
|
+
export { ALL_FIELDS, parseFields, selectFields };
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { StandardSchemaV1 } from "@standard-schema/spec";
|
|
2
|
+
|
|
3
|
+
//#region src/types.d.ts
|
|
4
|
+
/** Which parser produced the doc — by parser kind, not file extension. `md` covers .md/.mdx/.mdc. */
|
|
5
|
+
type DocSourceType = 'md' | 'jsdoc';
|
|
6
|
+
/** Where a parsed doc lives — file, the parser that produced it, and line span. */
|
|
7
|
+
type DocSource = {
|
|
8
|
+
file: string;
|
|
9
|
+
type: DocSourceType;
|
|
10
|
+
lineStart: number;
|
|
11
|
+
lineEnd: number;
|
|
12
|
+
};
|
|
13
|
+
/**
|
|
14
|
+
* A normalized link to another doc, as stored on a {@link Doc}. Authored loosely — a bare id, an `id!` shorthand for
|
|
15
|
+
* must-read, or an object — and always normalized to this object form at parse time (the trailing `!` becomes
|
|
16
|
+
* `required: true`). So `related` is never a raw string on a collected doc.
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* '@related entity-payloads' // → { id: 'entity-payloads' }
|
|
20
|
+
* '@related entity-payloads!' // → { id: 'entity-payloads', required: true }
|
|
21
|
+
* { id: 'postgres', reason: 'shares the schema' } // kept as-is
|
|
22
|
+
*/
|
|
23
|
+
type DocRelated = {
|
|
24
|
+
id: string;
|
|
25
|
+
reason?: string;
|
|
26
|
+
required?: boolean;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* A {@link DocRelated} link enriched for output: the link's own `reason`/`required` plus the target doc's `title` and
|
|
30
|
+
* `description`, resolved at output time (not stored on the link) so a reader can tell what's behind the link before
|
|
31
|
+
* fetching it. This is the shape of a doc's `related` field on output and what the web "Related" section renders.
|
|
32
|
+
*/
|
|
33
|
+
type RelatedRef = DocRelated & {
|
|
34
|
+
title: string;
|
|
35
|
+
description: string;
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* One normalized doc — the unit doc0 collects, links, and serves. `TExtra` carries any custom fields a
|
|
39
|
+
* `transform`/`schema` adds.
|
|
40
|
+
*/
|
|
41
|
+
type Doc<TExtra = unknown> = {
|
|
42
|
+
source: DocSource; /** Explicit, else the symbol name / file basename / dir name. */
|
|
43
|
+
id: string; /** Explicit, else the first heading, else a humanized `id`. */
|
|
44
|
+
title: string; /** Explicit, else the first content line. */
|
|
45
|
+
description: string; /** The most-used directive; a superset of `category`. */
|
|
46
|
+
tags: string[]; /** The grouping tag(s) — drive generated folder structure. */
|
|
47
|
+
category: string[];
|
|
48
|
+
related: DocRelated[]; /** The doc body, with frontmatter / JSDoc directives stripped. */
|
|
49
|
+
content: string;
|
|
50
|
+
} & TExtra;
|
|
51
|
+
/** A file to write — `generate`/`finalGenerate` return these. */
|
|
52
|
+
type Output = {
|
|
53
|
+
path: string;
|
|
54
|
+
content: string;
|
|
55
|
+
};
|
|
56
|
+
type OneOrMany<T> = T | T[];
|
|
57
|
+
/**
|
|
58
|
+
* Shared cache: `collect()` stores parse results here, extensions store derived data (e.g. embeddings). Keys are
|
|
59
|
+
* content-addressed by the caller.
|
|
60
|
+
*/
|
|
61
|
+
type Cache = {
|
|
62
|
+
get: <T>(key: string) => Promise<T | undefined>;
|
|
63
|
+
set: <T>(key: string, value: T) => Promise<void>;
|
|
64
|
+
};
|
|
65
|
+
/** What a parser receives for one matched file. */
|
|
66
|
+
type ParserInput = {
|
|
67
|
+
file: string;
|
|
68
|
+
content: string;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* A doc-carrier parser: claims file extensions and turns one file into zero or more docs (a source can split into
|
|
72
|
+
* several via the parser or `transform`).
|
|
73
|
+
*/
|
|
74
|
+
type Parser = {
|
|
75
|
+
name: string;
|
|
76
|
+
extensions: string[];
|
|
77
|
+
parse: (input: ParserInput) => Doc[] | Promise<Doc[]>;
|
|
78
|
+
};
|
|
79
|
+
type SearchOptions = {
|
|
80
|
+
limit?: number;
|
|
81
|
+
};
|
|
82
|
+
/**
|
|
83
|
+
* A pluggable search engine. By default `Doc0` uses orama keyword/BM25 (`oramaSearch()`, loaded lazily); pass
|
|
84
|
+
* `oramaSearch({ embeddings: true })` for hybrid keyword + semantic search, or any engine implementing this shape.
|
|
85
|
+
*/
|
|
86
|
+
type SearchEngine = {
|
|
87
|
+
index: (docs: readonly Doc[]) => void | Promise<void>;
|
|
88
|
+
search: (query: string, options?: SearchOptions) => Doc[] | Promise<Doc[]>;
|
|
89
|
+
};
|
|
90
|
+
/** The queryable collection returned by `doc0.collect()`. */
|
|
91
|
+
type Docs<TExtra = unknown> = {
|
|
92
|
+
all: () => Doc<TExtra>[];
|
|
93
|
+
get: (id: string) => Doc<TExtra> | undefined;
|
|
94
|
+
search: (query: string, options?: SearchOptions) => Promise<Doc<TExtra>[]>;
|
|
95
|
+
/**
|
|
96
|
+
* Graph neighbours via `@related` as enriched links (id + reason/required + target title/description); must-read
|
|
97
|
+
* first.
|
|
98
|
+
*/
|
|
99
|
+
related: (id: string, options?: {
|
|
100
|
+
required?: boolean;
|
|
101
|
+
}) => RelatedRef[];
|
|
102
|
+
byTag: (tag: string) => Doc<TExtra>[];
|
|
103
|
+
byCategory: (category: string) => Doc<TExtra>[];
|
|
104
|
+
toJSON: () => string;
|
|
105
|
+
};
|
|
106
|
+
/**
|
|
107
|
+
* Config for `Doc0.create`. `TExtra` is the shape of any custom fields your `transform`/`schema` adds to each `Doc`.
|
|
108
|
+
*/
|
|
109
|
+
type Doc0Options<TExtra = unknown> = {
|
|
110
|
+
/** Include + `!`-exclude globs. For md/mdx/mdc this is the inclusion gate. */glob: string[]; /** Defaults to the built-in markdown + code parsers. */
|
|
111
|
+
parsers?: Parser[]; /** Validate and type each doc after transform. */
|
|
112
|
+
schema?: StandardSchemaV1<unknown, Doc<TExtra>>; /** One source doc → one or many output docs. */
|
|
113
|
+
transform?: (doc: Doc) => Doc<TExtra> | Doc<TExtra>[]; /** Map a category id to a human-readable title. */
|
|
114
|
+
category?: Record<string, string>; /** Pluggable search engine; default is orama keyword/BM25. Use `oramaSearch({ embeddings: true })` for semantic. */
|
|
115
|
+
search?: SearchEngine; /** Run for each doc after collect. */
|
|
116
|
+
callback?: OneOrMany<(doc: Doc<TExtra>) => void | Promise<void>>; /** Per-doc output; written only when the content differs (idempotent). */
|
|
117
|
+
generate?: (doc: Doc<TExtra>) => Output | undefined; /** Whole-collection output. */
|
|
118
|
+
finalGenerate?: (docs: Docs<TExtra>) => OneOrMany<Output>; /** Run once over the whole collection after collect. */
|
|
119
|
+
finalCallback?: (docs: Docs<TExtra>) => void | Promise<void>;
|
|
120
|
+
/**
|
|
121
|
+
* `true` → `node_modules/.cache/doc0` (default), a path to a cache dir, a custom backend, or `false` to disable.
|
|
122
|
+
*/
|
|
123
|
+
cache?: boolean | string | Cache; /** Bypass scanning — feed docs directly. */
|
|
124
|
+
data?: Doc<TExtra>[];
|
|
125
|
+
};
|
|
126
|
+
/**
|
|
127
|
+
* The engine: parse config + actions. Created with `Doc0.create(options)`; the implementing class is named `Doc0`.
|
|
128
|
+
*/
|
|
129
|
+
type Doc0Instance<TExtra = unknown> = {
|
|
130
|
+
readonly options: Doc0Options<TExtra>;
|
|
131
|
+
readonly cache: Cache; /** Scan + parse + build the graph. Self-revalidating by mtime, so always fresh; cache-backed. */
|
|
132
|
+
collect: () => Promise<Docs<TExtra>>; /** collect + run callbacks + write generate/finalGenerate outputs (diff-only). */
|
|
133
|
+
sync: () => Promise<void>;
|
|
134
|
+
/**
|
|
135
|
+
* Watch the globs and call `onChange` (debounced) on change — for push reactions like regenerating. Idempotent: at
|
|
136
|
+
* most one watcher per `Doc0`. Returns a stop function.
|
|
137
|
+
*/
|
|
138
|
+
watch: (onChange: () => void | Promise<void>) => () => void;
|
|
139
|
+
};
|
|
140
|
+
/** A `Doc0` instance, or a path to a module that exports one (`doc0` or default). */
|
|
141
|
+
type Doc0Source<TExtra = unknown> = Doc0Instance<TExtra> | string;
|
|
142
|
+
/** Options for `serveWeb` (`@devp0nt/doc0/web`). The served data stays fresh via `collect()`. */
|
|
143
|
+
type ServeWebOptions = {
|
|
144
|
+
port?: number;
|
|
145
|
+
};
|
|
146
|
+
//#endregion
|
|
147
|
+
export { Cache, Doc, Doc0Instance, Doc0Options, Doc0Source, DocRelated, DocSource, DocSourceType, Docs, OneOrMany, Output, Parser, ParserInput, RelatedRef, SearchEngine, SearchOptions, ServeWebOptions };
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { DocRelated } from "./types.js";
|
|
2
|
+
|
|
3
|
+
//#region src/utils.d.ts
|
|
4
|
+
/** Wrap a single value or an array into an array. */
|
|
5
|
+
declare const toArray: <T>(value: T | T[]) => T[];
|
|
6
|
+
/** Coerce an unknown frontmatter value to a trimmed string, or undefined. */
|
|
7
|
+
declare const asString: (value: unknown) => string | undefined;
|
|
8
|
+
/** Coerce an unknown value to a string array — accepts a string, a comma list, or an array. */
|
|
9
|
+
declare const asStringArray: (value: unknown) => string[];
|
|
10
|
+
/** Humanize an id/slug to Title Case: `entity-payloads` → `Entity Payloads`. */
|
|
11
|
+
declare const humanize: (id: string) => string;
|
|
12
|
+
/** Default id from a file path: the basename without extension, or the dir name for index/README. */
|
|
13
|
+
declare const defaultIdFromFile: (file: string) => string;
|
|
14
|
+
/** The first Markdown heading's text, if any. */
|
|
15
|
+
declare const firstHeading: (content: string) => string | undefined;
|
|
16
|
+
/** Drop a leading top-level heading — callers that render the `title` separately would otherwise duplicate it. */
|
|
17
|
+
declare const stripLeadingHeading: (content: string) => string;
|
|
18
|
+
/** The first non-empty, non-heading line, trimmed. */
|
|
19
|
+
declare const firstLine: (content: string) => string | undefined;
|
|
20
|
+
/**
|
|
21
|
+
* Parse a frontmatter/JSDoc `related` value into normalized {@link DocRelated} links. Always returns the object form:
|
|
22
|
+
* the `id!` string shorthand becomes `{ id, required: true }`, and object entries are cleaned (trimmed `id`, optional
|
|
23
|
+
* `reason`/`required`). Unknown/empty entries are dropped.
|
|
24
|
+
*/
|
|
25
|
+
declare const parseRelated: (value: unknown) => DocRelated[];
|
|
26
|
+
//#endregion
|
|
27
|
+
export { asString, asStringArray, defaultIdFromFile, firstHeading, firstLine, humanize, parseRelated, stripLeadingHeading, toArray };
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
//#region src/utils.ts
|
|
2
|
+
/** Wrap a single value or an array into an array. */
|
|
3
|
+
const toArray = (value) => Array.isArray(value) ? value : [value];
|
|
4
|
+
/** Coerce an unknown frontmatter value to a trimmed string, or undefined. */
|
|
5
|
+
const asString = (value) => {
|
|
6
|
+
if (typeof value === "string") {
|
|
7
|
+
const trimmed = value.trim();
|
|
8
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
9
|
+
}
|
|
10
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
11
|
+
};
|
|
12
|
+
/** Coerce an unknown value to a string array — accepts a string, a comma list, or an array. */
|
|
13
|
+
const asStringArray = (value) => {
|
|
14
|
+
if (Array.isArray(value)) return value.flatMap(asStringArray);
|
|
15
|
+
const str = asString(value);
|
|
16
|
+
if (!str) return [];
|
|
17
|
+
return str.split(",").map((part) => part.trim()).filter((part) => part.length > 0);
|
|
18
|
+
};
|
|
19
|
+
/** Humanize an id/slug to Title Case: `entity-payloads` → `Entity Payloads`. */
|
|
20
|
+
const humanize = (id) => id.replace(/[-_]+/g, " ").replace(/([a-z0-9])([A-Z])/g, "$1 $2").trim().replace(/\b\w/g, (char) => char.toUpperCase());
|
|
21
|
+
/** Default id from a file path: the basename without extension, or the dir name for index/README. */
|
|
22
|
+
const defaultIdFromFile = (file) => {
|
|
23
|
+
const parts = file.split("/");
|
|
24
|
+
const name = (parts.at(-1) ?? file).replace(/\.[^.]+$/, "");
|
|
25
|
+
const lower = name.toLowerCase();
|
|
26
|
+
if (lower === "index" || lower === "readme") return parts.at(-2) ?? name;
|
|
27
|
+
return name;
|
|
28
|
+
};
|
|
29
|
+
/** The first Markdown heading's text, if any. */
|
|
30
|
+
const firstHeading = (content) => {
|
|
31
|
+
return content.match(/^#{1,6}\s+(.+)$/m)?.[1]?.trim();
|
|
32
|
+
};
|
|
33
|
+
/** Drop a leading top-level heading — callers that render the `title` separately would otherwise duplicate it. */
|
|
34
|
+
const stripLeadingHeading = (content) => content.replace(/^\s*#{1,6}[^\n]*\n+/, "");
|
|
35
|
+
/** The first non-empty, non-heading line, trimmed. */
|
|
36
|
+
const firstLine = (content) => {
|
|
37
|
+
for (const raw of content.split("\n")) {
|
|
38
|
+
const line = raw.trim();
|
|
39
|
+
if (line.length > 0 && !line.startsWith("#")) return line;
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Parse a frontmatter/JSDoc `related` value into normalized {@link DocRelated} links. Always returns the object form:
|
|
44
|
+
* the `id!` string shorthand becomes `{ id, required: true }`, and object entries are cleaned (trimmed `id`, optional
|
|
45
|
+
* `reason`/`required`). Unknown/empty entries are dropped.
|
|
46
|
+
*/
|
|
47
|
+
const parseRelated = (value) => {
|
|
48
|
+
if (value === null || value === void 0) return [];
|
|
49
|
+
return (Array.isArray(value) ? value : asStringArray(value)).flatMap((item) => {
|
|
50
|
+
if (typeof item === "string") {
|
|
51
|
+
const trimmed = item.trim();
|
|
52
|
+
const required = trimmed.endsWith("!");
|
|
53
|
+
const id = (required ? trimmed.slice(0, -1) : trimmed).trim();
|
|
54
|
+
if (!id) return [];
|
|
55
|
+
return [required ? {
|
|
56
|
+
id,
|
|
57
|
+
required: true
|
|
58
|
+
} : { id }];
|
|
59
|
+
}
|
|
60
|
+
if (typeof item === "object" && item !== null && "id" in item) {
|
|
61
|
+
const source = item;
|
|
62
|
+
const id = asString(source.id);
|
|
63
|
+
if (!id) return [];
|
|
64
|
+
const link = { id };
|
|
65
|
+
const reason = asString(source.reason);
|
|
66
|
+
if (reason) link.reason = reason;
|
|
67
|
+
if (source.required === true) link.required = true;
|
|
68
|
+
return [link];
|
|
69
|
+
}
|
|
70
|
+
return [];
|
|
71
|
+
});
|
|
72
|
+
};
|
|
73
|
+
//#endregion
|
|
74
|
+
export { asString, asStringArray, defaultIdFromFile, firstHeading, firstLine, humanize, parseRelated, stripLeadingHeading, toArray };
|