@nzpr/kb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import YAML from "yaml";
4
+
5
+ export function loadMarkdownWithFrontmatter(filePath) {
6
+ const raw = fs.readFileSync(filePath, "utf8");
7
+ if (!raw.startsWith("---\n")) {
8
+ return parseMarkdownDocument(filePath, {}, raw);
9
+ }
10
+ const rest = raw.slice(4);
11
+ const separator = "\n---\n";
12
+ const separatorIndex = rest.indexOf(separator);
13
+ if (separatorIndex === -1) {
14
+ throw new Error(`${filePath}: malformed frontmatter block`);
15
+ }
16
+ const frontmatterText = rest.slice(0, separatorIndex);
17
+ const body = rest.slice(separatorIndex + separator.length);
18
+ const metaRaw = YAML.parse(frontmatterText) ?? {};
19
+ return parseMarkdownDocument(filePath, metaRaw, body);
20
+ }
21
+
22
+ export function parseMeta(filePath, metaRaw) {
23
+ const title = "title" in metaRaw ? String(metaRaw.title).trim() : "";
24
+ if (!title) {
25
+ throw new Error(`${filePath}: title is required when frontmatter is present`);
26
+ }
27
+
28
+ return {
29
+ docId: String(metaRaw.id ?? slugFromPath(filePath)),
30
+ title,
31
+ path: path.resolve(filePath)
32
+ };
33
+ }
34
+
35
+ function parseMarkdownDocument(filePath, metaRaw, rawBody) {
36
+ const parsed = extractTitleAndBody(rawBody);
37
+ const meta = parseMeta(filePath, {
38
+ ...metaRaw,
39
+ title: metaRaw.title ?? parsed.title
40
+ });
41
+ return [meta, parsed.body];
42
+ }
43
+
44
+ function extractTitleAndBody(rawBody) {
45
+ const body = String(rawBody ?? "").trim();
46
+ const match = /^#\s+(.+?)\n+([\s\S]*)$/m.exec(body);
47
+ if (match) {
48
+ return {
49
+ title: match[1].trim(),
50
+ body: match[2].trim()
51
+ };
52
+ }
53
+ const firstParagraph = body.split(/\n\s*\n/, 1)[0]?.trim() ?? "";
54
+ return {
55
+ title: firstParagraph || "Untitled Document",
56
+ body
57
+ };
58
+ }
59
+
60
+ function slugFromPath(filePath) {
61
+ return path
62
+ .basename(filePath, path.extname(filePath))
63
+ .toLowerCase()
64
+ .replace(/[^a-z0-9]+/g, "-")
65
+ .replace(/^-+|-+$/g, "");
66
+ }
package/lib/index.js ADDED
@@ -0,0 +1,140 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import crypto from "node:crypto";
4
+ import { chunkMarkdown } from "./chunking.js";
5
+ import { connect, initDb, readEmbeddingMetadata, writeEmbeddingMetadata } from "./db.js";
6
+ import { embedText, vectorLiteral } from "./embeddings.js";
7
+ import { loadMarkdownWithFrontmatter } from "./frontmatter.js";
8
+
9
+ export async function ingestDocuments({ databaseUrl, docsRoot, embeddingProfile }) {
10
+ const client = await connect(databaseUrl);
11
+ try {
12
+ await initDb(client);
13
+
14
+ const files = walkMarkdown(docsRoot);
15
+ const seenDocIds = new Set();
16
+ const seenPaths = new Set();
17
+ let indexed = 0;
18
+ let vectorsWritten = 0;
19
+ const errors = [];
20
+ const currentMetadata = await readEmbeddingMetadata(client);
21
+ const forceReindex = metadataDiffers(currentMetadata, embeddingProfile);
22
+
23
+ await client.query("BEGIN");
24
+ await writeEmbeddingMetadata(client, embeddingProfile);
25
+ for (const filePath of files) {
26
+ try {
27
+ const [meta, body] = loadMarkdownWithFrontmatter(filePath);
28
+ if (seenDocIds.has(meta.docId)) {
29
+ errors.push(`${filePath}: duplicate document id ${meta.docId}`);
30
+ continue;
31
+ }
32
+ seenDocIds.add(meta.docId);
33
+ seenPaths.add(meta.path);
34
+ const contentHash = sha256(fs.readFileSync(filePath));
35
+ const existing = await client.query(
36
+ "SELECT content_hash FROM documents WHERE doc_id = $1",
37
+ [meta.docId]
38
+ );
39
+ if (!forceReindex && existing.rowCount && existing.rows[0].content_hash === contentHash) {
40
+ indexed += 1;
41
+ continue;
42
+ }
43
+
44
+ const chunks = chunkMarkdown(meta, body);
45
+ await replaceDocument(client, meta, contentHash, chunks[0]?.content ?? body, embeddingProfile);
46
+ indexed += 1;
47
+ vectorsWritten += chunks.length ? 1 : 0;
48
+ } catch (error) {
49
+ errors.push(String(error.message ?? error));
50
+ }
51
+ }
52
+ await removeDeletedDocuments(client, seenDocIds, seenPaths);
53
+ await client.query("COMMIT");
54
+
55
+ if (errors.length) {
56
+ throw new Error(errors.join("\n"));
57
+ }
58
+
59
+ return {
60
+ documents: indexed,
61
+ vectors: vectorsWritten,
62
+ embeddingMode: embeddingProfile.mode,
63
+ embeddingModel: embeddingProfile.model,
64
+ forceReindex
65
+ };
66
+ } catch (error) {
67
+ await client.query("ROLLBACK").catch(() => {});
68
+ throw error;
69
+ } finally {
70
+ await client.end();
71
+ }
72
+ }
73
+
74
+ async function replaceDocument(client, meta, contentHash, content, embeddingProfile) {
75
+ const embedding = vectorLiteral(await embedText(`${meta.title}\n${content}`, embeddingProfile));
76
+ await client.query(
77
+ `
78
+ INSERT INTO documents (
79
+ doc_id, path, title, content, content_hash, embedding
80
+ ) VALUES ($1,$2,$3,$4,$5,$6::vector)
81
+ ON CONFLICT (doc_id) DO UPDATE SET
82
+ path = EXCLUDED.path,
83
+ title = EXCLUDED.title,
84
+ content = EXCLUDED.content,
85
+ content_hash = EXCLUDED.content_hash,
86
+ embedding = EXCLUDED.embedding,
87
+ updated_at = NOW()
88
+ `,
89
+ [
90
+ meta.docId,
91
+ meta.path,
92
+ meta.title,
93
+ content,
94
+ contentHash,
95
+ embedding
96
+ ]
97
+ );
98
+ }
99
+
100
+ async function removeDeletedDocuments(client, seenDocIds, seenPaths) {
101
+ const result = await client.query("SELECT doc_id, path FROM documents");
102
+ for (const row of result.rows) {
103
+ if (!seenDocIds.has(row.doc_id) || !seenPaths.has(row.path)) {
104
+ await client.query("DELETE FROM documents WHERE doc_id = $1", [row.doc_id]);
105
+ }
106
+ }
107
+ }
108
+
109
+ function walkMarkdown(root) {
110
+ const files = [];
111
+ const stack = [path.resolve(root)];
112
+ while (stack.length) {
113
+ const current = stack.pop();
114
+ const stats = fs.statSync(current);
115
+ if (stats.isDirectory()) {
116
+ const entries = fs.readdirSync(current).sort().reverse();
117
+ for (const entry of entries) {
118
+ stack.push(path.join(current, entry));
119
+ }
120
+ } else if (current.endsWith(".md")) {
121
+ files.push(current);
122
+ }
123
+ }
124
+ return files.sort();
125
+ }
126
+
127
+ function sha256(buffer) {
128
+ return crypto.createHash("sha256").update(buffer).digest("hex");
129
+ }
130
+
131
+ function metadataDiffers(metadata, embeddingProfile) {
132
+ if (!metadata.embedding_mode && !metadata.embedding_dim && !metadata.embedding_model) {
133
+ return true;
134
+ }
135
+ return (
136
+ metadata.embedding_mode !== embeddingProfile.mode ||
137
+ metadata.embedding_dim !== String(embeddingProfile.dimensions) ||
138
+ (metadata.embedding_model ?? "") !== (embeddingProfile.model ?? "")
139
+ );
140
+ }
@@ -0,0 +1,188 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+
4
+ const SECTION_TO_FIELD = new Map([
5
+ ["Title", "title"],
6
+ ["Relative Path", "relativePath"],
7
+ ["Text", "text"],
8
+ ["Proposed Guidance", "proposedGuidance"]
9
+ ]);
10
+
11
+ export function parseIssueFormBody(body) {
12
+ const sections = splitIssueSections(body);
13
+ const values = {};
14
+ for (const [section, field] of SECTION_TO_FIELD.entries()) {
15
+ values[field] = sections.get(section) ?? "";
16
+ }
17
+
18
+ const proposal = {
19
+ title: requiredValue(values.title, "Title"),
20
+ docId: slugify(requiredValue(values.title, "Title")),
21
+ relativePath: normalizeRelativePath(values.relativePath.trim() || null),
22
+ text: requiredValue(values.text || values.proposedGuidance, "Text")
23
+ };
24
+
25
+ return proposal;
26
+ }
27
+
28
+ export function deriveDocRelativePath(proposal) {
29
+ if (proposal.relativePath) {
30
+ return proposal.relativePath;
31
+ }
32
+ return path.posix.join("entries", `${slugify(proposal.docId)}.md`);
33
+ }
34
+
35
+ export function renderDocumentMarkdown(proposal, lastReviewed = currentDateString()) {
36
+ void lastReviewed;
37
+ return `# ${proposal.title}\n\n${proposal.text.trim()}\n`;
38
+ }
39
+
40
+ export function renderProposalIssueBody({ proposal, relativePath }) {
41
+ const issuePath = relativePath ?? deriveDocRelativePath(proposal);
42
+ return [
43
+ "### Title",
44
+ proposal.title,
45
+ "",
46
+ "### Relative Path",
47
+ issuePath,
48
+ "",
49
+ "### Text",
50
+ proposal.text.trim()
51
+ ].join("\n");
52
+ }
53
+
54
+ export function writeProposalDocument({ issueNumber, issueTitle, issueBody, docsRoot }) {
55
+ const proposal = parseIssueFormBody(issueBody);
56
+ const relativePath = deriveDocRelativePath(proposal);
57
+ const absolutePath = path.join(path.resolve(docsRoot), relativePath);
58
+ fs.mkdirSync(path.dirname(absolutePath), { recursive: true });
59
+ fs.writeFileSync(absolutePath, renderDocumentMarkdown(proposal), "utf8");
60
+
61
+ const branchSlug = slugify(`${proposal.docId}-${issueNumber}`);
62
+ return {
63
+ proposal,
64
+ relativePath,
65
+ absolutePath,
66
+ branch: `kb-issue-${branchSlug}`,
67
+ commitMessage: `kb: materialize issue #${issueNumber} as ${proposal.docId}`,
68
+ prTitle: `kb: add ${proposal.title}`,
69
+ prBody: [
70
+ `Materializes approved KB issue #${issueNumber}.`,
71
+ "",
72
+ `Source issue title: ${issueTitle}`,
73
+ "",
74
+ `Document path: \`${relativePath}\``
75
+ ].join("\n")
76
+ };
77
+ }
78
+
79
+ export async function createGitHubIssueFromText({
80
+ title,
81
+ text,
82
+ relativePath = null,
83
+ repo,
84
+ token,
85
+ apiBaseUrl = "https://api.github.com"
86
+ }) {
87
+ if (!repo) {
88
+ throw new Error("GitHub repository is required, expected OWNER/REPO");
89
+ }
90
+ if (!token) {
91
+ throw new Error("GITHUB_TOKEN is required to create a proposal issue");
92
+ }
93
+
94
+ const proposal = {
95
+ docId: slugify(requiredValue(title, "Title")),
96
+ title: requiredValue(title, "Title"),
97
+ relativePath: normalizeRelativePath(relativePath),
98
+ text: requiredValue(text, "Text")
99
+ };
100
+ const response = await fetch(`${apiBaseUrl.replace(/\/$/, "")}/repos/${repo}/issues`, {
101
+ method: "POST",
102
+ headers: {
103
+ accept: "application/vnd.github+json",
104
+ authorization: `Bearer ${token}`,
105
+ "content-type": "application/json",
106
+ "user-agent": "@nzpr/kb"
107
+ },
108
+ body: JSON.stringify({
109
+ title: `kb: ${proposal.title}`,
110
+ body: renderProposalIssueBody({
111
+ proposal,
112
+ relativePath: proposal.relativePath
113
+ }),
114
+ labels: ["kb-entry"]
115
+ })
116
+ });
117
+
118
+ if (!response.ok) {
119
+ const errorText = await response.text().catch(() => "");
120
+ throw new Error(
121
+ `failed to create GitHub issue: ${response.status} ${response.statusText}${errorText ? ` - ${errorText}` : ""}`
122
+ );
123
+ }
124
+
125
+ return response.json();
126
+ }
127
+
128
+ function splitIssueSections(body) {
129
+ const normalized = String(body ?? "").replace(/\r\n/g, "\n");
130
+ const lines = normalized.split("\n");
131
+ const sections = new Map();
132
+ let currentHeading = null;
133
+ let currentLines = [];
134
+
135
+ const flush = () => {
136
+ if (!currentHeading) {
137
+ return;
138
+ }
139
+ sections.set(currentHeading, currentLines.join("\n").trim());
140
+ };
141
+
142
+ for (const line of lines) {
143
+ const match = /^###\s+(.*)$/.exec(line);
144
+ if (match) {
145
+ flush();
146
+ currentHeading = match[1].trim();
147
+ currentLines = [];
148
+ continue;
149
+ }
150
+ if (currentHeading) {
151
+ currentLines.push(line);
152
+ }
153
+ }
154
+ flush();
155
+
156
+ return sections;
157
+ }
158
+
159
+ function requiredValue(value, label) {
160
+ const normalized = String(value ?? "").trim();
161
+ if (!normalized) {
162
+ throw new Error(`missing required issue section: ${label}`);
163
+ }
164
+ return normalized;
165
+ }
166
+
167
+ function normalizeRelativePath(value) {
168
+ if (!value) {
169
+ return null;
170
+ }
171
+ const normalized = value.replace(/\\/g, "/").replace(/^\/+/, "");
172
+ if (normalized.includes("..")) {
173
+ throw new Error(`invalid relative path: ${value}`);
174
+ }
175
+ return normalized.endsWith(".md") ? normalized : `${normalized}.md`;
176
+ }
177
+
178
+ function slugify(value) {
179
+ return String(value ?? "")
180
+ .toLowerCase()
181
+ .replace(/[^a-z0-9]+/g, "-")
182
+ .replace(/^-+|-+$/g, "")
183
+ .replace(/-{2,}/g, "-");
184
+ }
185
+
186
+ function currentDateString() {
187
+ return new Date().toISOString().slice(0, 10);
188
+ }
@@ -0,0 +1,149 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import crypto from "node:crypto";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ const PACKAGE_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
7
+ const MIGRATIONS_DIR = path.join(PACKAGE_ROOT, "migrations");
8
+ const MIGRATION_FILE_RE = /^(\d+)_([a-z0-9_-]+)\.sql$/i;
9
+ const LEGACY_MIGRATION_CHECKSUMS = new Map([
10
+ [1, new Set(["594bf296f6c2d897333e424a5c025868beaf662bed279f90fa5f99c75a6c6956"])]
11
+ ]);
12
+
13
+ export function listMigrationFiles() {
14
+ const entries = fs.readdirSync(MIGRATIONS_DIR).sort();
15
+ return entries.map(parseMigrationFile);
16
+ }
17
+
18
+ export async function migrateDatabase(client) {
19
+ await ensureMigrationTable(client);
20
+ const applied = await getAppliedMigrations(client);
21
+ const migrations = listMigrationFiles();
22
+ const pending = [];
23
+
24
+ for (const migration of migrations) {
25
+ const previous = applied.get(migration.version);
26
+ if (previous) {
27
+ if (!checksumMatches(migration.version, previous.checksum, migration.checksum)) {
28
+ throw new Error(
29
+ `migration checksum mismatch for ${migration.file}: database=${previous.checksum} current=${migration.checksum}`
30
+ );
31
+ }
32
+ continue;
33
+ }
34
+ pending.push(migration);
35
+ }
36
+
37
+ for (const migration of pending) {
38
+ await client.query("BEGIN");
39
+ try {
40
+ await client.query(migration.sql);
41
+ await client.query(
42
+ `
43
+ INSERT INTO schema_migrations (version, name, checksum)
44
+ VALUES ($1, $2, $3)
45
+ `,
46
+ [migration.version, migration.name, migration.checksum]
47
+ );
48
+ await client.query("COMMIT");
49
+ } catch (error) {
50
+ await client.query("ROLLBACK").catch(() => {});
51
+ throw new Error(`failed to apply migration ${migration.file}: ${error.message ?? error}`);
52
+ }
53
+ }
54
+
55
+ const latestVersion = migrations.length ? migrations[migrations.length - 1].version : 0;
56
+ return {
57
+ appliedCount: pending.length,
58
+ latestVersion,
59
+ currentVersion: latestVersion,
60
+ pendingCount: 0
61
+ };
62
+ }
63
+
64
+ export async function migrationStatus(client) {
65
+ const applied = await getAppliedMigrationsIfTableExists(client);
66
+ const migrations = listMigrationFiles();
67
+ const latestVersion = migrations.length ? migrations[migrations.length - 1].version : 0;
68
+ let currentVersion = 0;
69
+
70
+ for (const migration of migrations) {
71
+ const previous = applied.get(migration.version);
72
+ if (!previous) {
73
+ break;
74
+ }
75
+ if (!checksumMatches(migration.version, previous.checksum, migration.checksum)) {
76
+ throw new Error(
77
+ `migration checksum mismatch for ${migration.file}: database=${previous.checksum} current=${migration.checksum}`
78
+ );
79
+ }
80
+ currentVersion = migration.version;
81
+ }
82
+
83
+ return {
84
+ latestVersion,
85
+ currentVersion,
86
+ pendingCount: migrations.filter((migration) => !applied.has(migration.version)).length
87
+ };
88
+ }
89
+
90
+ async function ensureMigrationTable(client) {
91
+ await client.query(`
92
+ CREATE TABLE IF NOT EXISTS schema_migrations (
93
+ version INTEGER PRIMARY KEY,
94
+ name TEXT NOT NULL,
95
+ checksum TEXT NOT NULL,
96
+ applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
97
+ )
98
+ `);
99
+ }
100
+
101
+ async function getAppliedMigrations(client) {
102
+ const result = await client.query(`
103
+ SELECT version, name, checksum
104
+ FROM schema_migrations
105
+ ORDER BY version
106
+ `);
107
+ return new Map(result.rows.map((row) => [Number(row.version), row]));
108
+ }
109
+
110
+ async function getAppliedMigrationsIfTableExists(client) {
111
+ const result = await client.query(`
112
+ SELECT to_regclass('public.schema_migrations') AS table_name
113
+ `);
114
+ if (!result.rows[0]?.table_name) {
115
+ return new Map();
116
+ }
117
+ return getAppliedMigrations(client);
118
+ }
119
+
120
+ function parseMigrationFile(file) {
121
+ const match = MIGRATION_FILE_RE.exec(file);
122
+ if (!match) {
123
+ throw new Error(`invalid migration filename: ${file}`);
124
+ }
125
+ const version = Number(match[1]);
126
+ const name = match[2];
127
+ const fullPath = path.join(MIGRATIONS_DIR, file);
128
+ const sql = fs.readFileSync(fullPath, "utf8").trim();
129
+ return {
130
+ file,
131
+ version,
132
+ name,
133
+ path: fullPath,
134
+ sql,
135
+ checksum: sha256(sql)
136
+ };
137
+ }
138
+
139
+ function sha256(value) {
140
+ return crypto.createHash("sha256").update(value).digest("hex");
141
+ }
142
+
143
+ function checksumMatches(version, databaseChecksum, currentChecksum) {
144
+ if (databaseChecksum === currentChecksum) {
145
+ return true;
146
+ }
147
+ const legacyChecksums = LEGACY_MIGRATION_CHECKSUMS.get(version);
148
+ return Boolean(legacyChecksums?.has(databaseChecksum));
149
+ }