@nzpr/kb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,438 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { execFileSync } from "node:child_process";
4
+ import { connect, initDb } from "./db.js";
5
+ import { maskConnection } from "./cli-common.js";
6
+
7
+ const PACKAGE_NAME = "@nzpr/kb";
8
+ const LABELS = Object.freeze([
9
+ {
10
+ name: "kb-entry",
11
+ color: "0E8A16",
12
+ description: "Knowledge base proposal or generated change"
13
+ },
14
+ {
15
+ name: "kb-approved",
16
+ color: "1D76DB",
17
+ description: "Approved knowledge proposal ready to materialize"
18
+ }
19
+ ]);
20
+
21
+ export function initializeKnowledgeRepo({ targetDir = process.cwd() } = {}) {
22
+ const root = path.resolve(targetDir);
23
+ const files = new Map([
24
+ [".github/ISSUE_TEMPLATE/config.yml", renderIssueConfig()],
25
+ [".github/ISSUE_TEMPLATE/knowledge-document.md", renderIssueTemplate()],
26
+ [".github/workflows/kb-issue-to-pr.yml", renderIssueToPrWorkflow()],
27
+ [".github/workflows/kb-publish.yml", renderPublishWorkflow()],
28
+ ["kb/docs/.gitkeep", ""]
29
+ ]);
30
+
31
+ const created = [];
32
+ const skipped = [];
33
+
34
+ for (const [relativePath, content] of files.entries()) {
35
+ const absolutePath = path.join(root, relativePath);
36
+ if (fs.existsSync(absolutePath)) {
37
+ skipped.push(relativePath);
38
+ continue;
39
+ }
40
+ fs.mkdirSync(path.dirname(absolutePath), { recursive: true });
41
+ fs.writeFileSync(absolutePath, content, "utf8");
42
+ created.push(relativePath);
43
+ }
44
+
45
+ return {
46
+ root,
47
+ created,
48
+ skipped,
49
+ configuration: buildConfigurationGuide()
50
+ };
51
+ }
52
+
53
+ export async function bootstrapKnowledgeRepo({
54
+ targetDir = process.cwd(),
55
+ repo = null,
56
+ githubToken = null,
57
+ databaseUrl = null,
58
+ embeddingMode = null,
59
+ embeddingApiUrl = null,
60
+ embeddingModel = null,
61
+ embeddingApiKey = null,
62
+ dbConnectTimeoutMs = null,
63
+ repoAutomationToken = null,
64
+ runGitHubCommand = defaultRunGitHubCommand,
65
+ verifyDatabaseReady = defaultVerifyDatabaseReady
66
+ } = {}) {
67
+ const scaffold = initializeKnowledgeRepo({ targetDir });
68
+ const result = {
69
+ ...scaffold,
70
+ ok: true,
71
+ database: {
72
+ status: "pending",
73
+ message:
74
+ "rerun with --database-url URL or KB_DATABASE_URL to verify the target database and initialize the schema"
75
+ },
76
+ github: {
77
+ status: "pending",
78
+ message:
79
+ "rerun with --repo OWNER/REPO and GITHUB_TOKEN to configure labels, repo settings, and GitHub secrets or variables"
80
+ }
81
+ };
82
+
83
+ if (databaseUrl) {
84
+ try {
85
+ const database = await verifyDatabaseReady({ databaseUrl });
86
+ result.database = {
87
+ status: "verified",
88
+ ...database
89
+ };
90
+ } catch (error) {
91
+ result.ok = false;
92
+ result.database = {
93
+ status: "failed",
94
+ error: String(error?.message ?? error)
95
+ };
96
+ }
97
+ }
98
+
99
+ if (repo) {
100
+ if (!githubToken) {
101
+ result.ok = false;
102
+ result.github = {
103
+ status: "failed",
104
+ repo,
105
+ error: "GITHUB_TOKEN is required when configuring a knowledge repo"
106
+ };
107
+ return result;
108
+ }
109
+
110
+ if (result.database.status === "failed") {
111
+ result.github = {
112
+ status: "skipped",
113
+ repo,
114
+ message: "database preflight failed, so repo secrets and variables were not changed"
115
+ };
116
+ return result;
117
+ }
118
+ const secrets = new Map();
119
+ const variables = new Map();
120
+
121
+ if (databaseUrl) {
122
+ secrets.set("KB_DATABASE_URL", databaseUrl);
123
+ }
124
+ if (embeddingApiKey) {
125
+ secrets.set("KB_EMBEDDING_API_KEY", embeddingApiKey);
126
+ }
127
+ if (repoAutomationToken) {
128
+ secrets.set("KB_REPO_AUTOMATION_TOKEN", repoAutomationToken);
129
+ }
130
+ if (embeddingMode) {
131
+ variables.set("KB_EMBEDDING_MODE", embeddingMode);
132
+ }
133
+ if (embeddingApiUrl) {
134
+ variables.set("KB_EMBEDDING_API_URL", embeddingApiUrl);
135
+ }
136
+ if (embeddingModel) {
137
+ variables.set("KB_EMBEDDING_MODEL", embeddingModel);
138
+ }
139
+ if (dbConnectTimeoutMs) {
140
+ variables.set("KB_DB_CONNECT_TIMEOUT_MS", String(dbConnectTimeoutMs));
141
+ }
142
+
143
+ try {
144
+ const github = await configureKnowledgeRepo({
145
+ repo,
146
+ githubToken,
147
+ secrets,
148
+ variables,
149
+ runGitHubCommand
150
+ });
151
+ result.github = {
152
+ status: "configured",
153
+ ...github
154
+ };
155
+ } catch (error) {
156
+ result.ok = false;
157
+ result.github = {
158
+ status: "failed",
159
+ repo,
160
+ error: String(error?.message ?? error)
161
+ };
162
+ }
163
+ }
164
+
165
+ return result;
166
+ }
167
+
168
+ function buildConfigurationGuide() {
169
+ return {
170
+ requiredSecrets: [
171
+ {
172
+ name: "KB_DATABASE_URL",
173
+ purpose: "PostgreSQL connection string for the KB database with write access for publish."
174
+ }
175
+ ],
176
+ optionalSecrets: [
177
+ {
178
+ name: "KB_EMBEDDING_API_KEY",
179
+ purpose: "API key for the embeddings endpoint, if your server requires authentication."
180
+ },
181
+ {
182
+ name: "KB_REPO_AUTOMATION_TOKEN",
183
+ purpose: "Optional token for issue-to-PR automation if default GitHub token behavior is insufficient."
184
+ }
185
+ ],
186
+ optionalVariables: [
187
+ {
188
+ name: "KB_EMBEDDING_MODE",
189
+ value: "bge-m3-openai",
190
+ purpose: "Enable high-quality remote embeddings."
191
+ },
192
+ {
193
+ name: "KB_EMBEDDING_API_URL",
194
+ value: "https://your-embeddings-host/v1/embeddings",
195
+ purpose: "OpenAI-compatible embeddings endpoint."
196
+ },
197
+ {
198
+ name: "KB_EMBEDDING_MODEL",
199
+ value: "BAAI/bge-m3",
200
+ purpose: "Embedding model name expected by the endpoint."
201
+ },
202
+ {
203
+ name: "KB_DB_CONNECT_TIMEOUT_MS",
204
+ value: "20000",
205
+ purpose: "Optional database connect timeout override for CI."
206
+ }
207
+ ]
208
+ };
209
+ }
210
+
211
+ async function configureKnowledgeRepo({
212
+ repo,
213
+ githubToken,
214
+ secrets,
215
+ variables,
216
+ runGitHubCommand
217
+ }) {
218
+ await runGitHubCommand(["repo", "edit", repo, "--enable-issues"], { githubToken });
219
+
220
+ for (const label of LABELS) {
221
+ await runGitHubCommand(
222
+ [
223
+ "label",
224
+ "create",
225
+ label.name,
226
+ "--repo",
227
+ repo,
228
+ "--color",
229
+ label.color,
230
+ "--description",
231
+ label.description,
232
+ "--force"
233
+ ],
234
+ { githubToken }
235
+ );
236
+ }
237
+
238
+ for (const [name, value] of secrets.entries()) {
239
+ await runGitHubCommand(["secret", "set", name, "--repo", repo, "--body", value], {
240
+ githubToken
241
+ });
242
+ }
243
+
244
+ for (const [name, value] of variables.entries()) {
245
+ await runGitHubCommand(["variable", "set", name, "--repo", repo, "--body", value], {
246
+ githubToken
247
+ });
248
+ }
249
+
250
+ return {
251
+ repo,
252
+ labels: LABELS.map((label) => label.name),
253
+ secrets: [...secrets.keys()],
254
+ variables: [...variables.keys()]
255
+ };
256
+ }
257
+
258
+ async function defaultVerifyDatabaseReady({ databaseUrl }) {
259
+ const client = await connect(databaseUrl);
260
+ try {
261
+ const result = await initDb(client);
262
+ return {
263
+ database: maskConnection(databaseUrl),
264
+ currentVersion: result.currentVersion,
265
+ appliedCount: result.appliedCount
266
+ };
267
+ } finally {
268
+ await client.end();
269
+ }
270
+ }
271
+
272
+ function defaultRunGitHubCommand(args, { githubToken }) {
273
+ return execFileSync("gh", args, {
274
+ encoding: "utf8",
275
+ env: {
276
+ ...process.env,
277
+ GH_TOKEN: githubToken,
278
+ GITHUB_TOKEN: githubToken
279
+ }
280
+ });
281
+ }
282
+
283
+ function renderIssueConfig() {
284
+ return ["blank_issues_enabled: false", ""].join("\n");
285
+ }
286
+
287
+ function renderIssueTemplate() {
288
+ return [
289
+ "---",
290
+ "name: Knowledge Base Document",
291
+ "about: Propose a new knowledge base document or a substantial update to an existing one",
292
+ 'title: "kb: "',
293
+ "labels: kb-entry",
294
+ "---",
295
+ "",
296
+ "Use this template to propose a new knowledge base entry.",
297
+ "",
298
+ "Keep it minimal. A knowledge entry should be just a title and the text that should be retrieved by search.",
299
+ "",
300
+ "### Title",
301
+ "",
302
+ "Example Platform Rule",
303
+ "",
304
+ "### Relative Path",
305
+ "",
306
+ "entries/example-platform-rule.md",
307
+ "",
308
+ "### Text",
309
+ "",
310
+ "Write the exact knowledge text that should become the document body.",
311
+ "",
312
+ "### Review Flow",
313
+ "",
314
+ "1. Open the issue with this template.",
315
+ "2. Review and edit the issue until the title and text are ready.",
316
+ "3. Add the `kb-approved` label to generate a PR that writes the Markdown file into `kb/docs/`.",
317
+ ""
318
+ ].join("\n");
319
+ }
320
+
321
+ function renderIssueToPrWorkflow() {
322
+ return [
323
+ "name: kb-issue-to-pr",
324
+ "",
325
+ "on:",
326
+ " issues:",
327
+ " types:",
328
+ " - labeled",
329
+ "",
330
+ "permissions:",
331
+ " contents: write",
332
+ " pull-requests: write",
333
+ " issues: write",
334
+ "",
335
+ "jobs:",
336
+ " materialize:",
337
+ " if: github.event.label.name == 'kb-approved'",
338
+ " runs-on: ubuntu-latest",
339
+ " concurrency:",
340
+ " group: kb-issue-${{ github.event.issue.number }}",
341
+ " cancel-in-progress: false",
342
+ " steps:",
343
+ " - name: Checkout",
344
+ " uses: actions/checkout@v4",
345
+ "",
346
+ " - name: Setup Node",
347
+ " uses: actions/setup-node@v4",
348
+ " with:",
349
+ " node-version: 24",
350
+ "",
351
+ ` - name: Install ${PACKAGE_NAME}`,
352
+ ` run: npm install -g ${PACKAGE_NAME}`,
353
+ "",
354
+ " - name: Materialize approved issue",
355
+ " id: materialize",
356
+ ' run: kb-admin issue-to-doc --issue-event "$GITHUB_EVENT_PATH" --docs-root ./kb/docs',
357
+ "",
358
+ " - name: Create pull request",
359
+ " id: cpr",
360
+ " uses: peter-evans/create-pull-request@v8",
361
+ " with:",
362
+ " token: ${{ secrets.KB_REPO_AUTOMATION_TOKEN || github.token }}",
363
+ " branch: ${{ steps.materialize.outputs.branch }}",
364
+ " commit-message: ${{ steps.materialize.outputs.commit_message }}",
365
+ " title: ${{ steps.materialize.outputs.pr_title }}",
366
+ " body: ${{ steps.materialize.outputs.pr_body }}",
367
+ " labels: kb-entry",
368
+ " add-paths: ${{ steps.materialize.outputs.doc_path }}",
369
+ "",
370
+ " - name: Comment on issue",
371
+ " if: steps.cpr.outputs.pull-request-number",
372
+ " uses: actions/github-script@v8",
373
+ " with:",
374
+ " script: |",
375
+ " await github.rest.issues.createComment({",
376
+ " owner: context.repo.owner,",
377
+ " repo: context.repo.repo,",
378
+ " issue_number: context.payload.issue.number,",
379
+ " body: `Created PR #${{ steps.cpr.outputs.pull-request-number }} for this approved KB entry.`",
380
+ " });",
381
+ ""
382
+ ].join("\n");
383
+ }
384
+
385
+ function renderPublishWorkflow() {
386
+ return [
387
+ "name: kb-publish",
388
+ "",
389
+ "on:",
390
+ " push:",
391
+ " branches:",
392
+ " - main",
393
+ " paths:",
394
+ ' - "kb/docs/**"',
395
+ ' - ".github/workflows/**"',
396
+ " workflow_dispatch:",
397
+ "",
398
+ "jobs:",
399
+ " publish:",
400
+ " runs-on: ubuntu-latest",
401
+ " permissions:",
402
+ " contents: write",
403
+ " concurrency:",
404
+ " group: kb-publish",
405
+ " cancel-in-progress: false",
406
+ " env:",
407
+ " KB_DATABASE_URL: ${{ secrets.KB_DATABASE_URL }}",
408
+ " KB_GITHUB_REPO: ${{ github.repository }}",
409
+ " GITHUB_TOKEN: ${{ github.token }}",
410
+ " KB_EMBEDDING_MODE: ${{ secrets.KB_EMBEDDING_MODE || vars.KB_EMBEDDING_MODE }}",
411
+ " KB_EMBEDDING_API_URL: ${{ secrets.KB_EMBEDDING_API_URL || vars.KB_EMBEDDING_API_URL }}",
412
+ " KB_EMBEDDING_MODEL: ${{ secrets.KB_EMBEDDING_MODEL || vars.KB_EMBEDDING_MODEL }}",
413
+ " KB_EMBEDDING_API_KEY: ${{ secrets.KB_EMBEDDING_API_KEY }}",
414
+ " KB_DB_CONNECT_TIMEOUT_MS: ${{ secrets.KB_DB_CONNECT_TIMEOUT_MS || vars.KB_DB_CONNECT_TIMEOUT_MS || '20000' }}",
415
+ " steps:",
416
+ " - name: Checkout",
417
+ " uses: actions/checkout@v4",
418
+ "",
419
+ " - name: Ensure publish secret is configured",
420
+ " run: |",
421
+ ' if [ -z "${KB_DATABASE_URL:-}" ]; then',
422
+ ' echo "KB_DATABASE_URL secret is required for publish" >&2',
423
+ " exit 1",
424
+ " fi",
425
+ "",
426
+ " - name: Setup Node",
427
+ " uses: actions/setup-node@v4",
428
+ " with:",
429
+ " node-version: 24",
430
+ "",
431
+ ` - name: Install ${PACKAGE_NAME}`,
432
+ ` run: npm install -g ${PACKAGE_NAME}`,
433
+ "",
434
+ " - name: Publish knowledge",
435
+ " run: kb publish --docs-root ./kb/docs",
436
+ ""
437
+ ].join("\n");
438
+ }
package/lib/search.js ADDED
@@ -0,0 +1,206 @@
1
+ import { connect, ensureCompatibility, schemaStatus } from "./db.js";
2
+ import { embedText, tokenOverlap, vectorLiteral } from "./embeddings.js";
3
+
4
+ export async function searchIndex({
5
+ databaseUrl,
6
+ embeddingProfile,
7
+ query,
8
+ limit = 5
9
+ }) {
10
+ const client = await connect(databaseUrl);
11
+ try {
12
+ await ensureCompatibility(client, embeddingProfile);
13
+ const lexicalRows = await lexicalCandidates(client, {
14
+ query,
15
+ limit: Math.max(limit * 10, 30)
16
+ });
17
+ const semanticRows = await semanticCandidates(client, {
18
+ embeddingProfile,
19
+ query,
20
+ limit: Math.max(limit * 10, 30)
21
+ });
22
+
23
+ const merged = new Map();
24
+ for (const row of lexicalRows) {
25
+ merged.set(row.doc_id, {
26
+ ...row,
27
+ lexical_score: Number(row.lexical_score),
28
+ semantic_score: 0
29
+ });
30
+ }
31
+ for (const row of semanticRows) {
32
+ const existing = merged.get(row.doc_id) ?? {
33
+ ...row,
34
+ lexical_score: 0,
35
+ semantic_score: 0
36
+ };
37
+ existing.semantic_score = Math.max(Number(row.semantic_score), existing.semantic_score);
38
+ merged.set(row.chunk_id, existing);
39
+ }
40
+
41
+ const results = [...merged.values()].map((row) => {
42
+ const lexical = Math.max(
43
+ Number(row.lexical_score ?? 0),
44
+ tokenOverlap(query, `${row.title} ${row.content}`)
45
+ );
46
+ const semantic = Number(row.semantic_score ?? 0);
47
+ const finalScore = combineScores({
48
+ lexical,
49
+ semantic,
50
+ query,
51
+ title: row.title
52
+ });
53
+ return {
54
+ chunkId: row.doc_id,
55
+ docId: row.doc_id,
56
+ title: row.title,
57
+ heading: row.title,
58
+ content: row.content,
59
+ path: row.path,
60
+ lexicalScore: lexical,
61
+ semanticScore: semantic,
62
+ finalScore,
63
+ lastReviewed: String(row.updated_at)
64
+ };
65
+ });
66
+
67
+ return results.sort((a, b) => b.finalScore - a.finalScore).slice(0, limit);
68
+ } finally {
69
+ await client.end();
70
+ }
71
+ }
72
+
73
+ export async function askIndex(options) {
74
+ const results = await searchIndex(options);
75
+ if (!results.length) {
76
+ return { answer: "No matching standards found.", results: [] };
77
+ }
78
+ const lines = [`Best guidance for: ${options.query}`, ""];
79
+ for (const result of results) {
80
+ lines.push(`- ${result.title}`);
81
+ lines.push(` ${snippet(result.content)}`);
82
+ lines.push(` Source: ${result.path} | reviewed ${result.lastReviewed}`);
83
+ }
84
+ return { answer: lines.join("\n"), results };
85
+ }
86
+
87
+ export async function listDocuments({ databaseUrl }) {
88
+ const client = await connect(databaseUrl);
89
+ try {
90
+ const result = await client.query(
91
+ `
92
+ SELECT doc_id, title, path
93
+ FROM documents
94
+ ORDER BY doc_id
95
+ `
96
+ );
97
+ return result.rows;
98
+ } finally {
99
+ await client.end();
100
+ }
101
+ }
102
+
103
+ export async function knowledgeCatalog({ databaseUrl }) {
104
+ const client = await connect(databaseUrl);
105
+ try {
106
+ const docs = await client.query(`
107
+ SELECT doc_id, title, path, updated_at
108
+ FROM documents
109
+ ORDER BY doc_id
110
+ `);
111
+ return {
112
+ topics: [],
113
+ projects: [],
114
+ documents: docs.rows.map((row) => ({
115
+ doc_id: row.doc_id,
116
+ title: row.title,
117
+ path: row.path,
118
+ updated_at: String(row.updated_at)
119
+ }))
120
+ };
121
+ } finally {
122
+ await client.end();
123
+ }
124
+ }
125
+
126
+ export async function doctor({ databaseUrl, embeddingProfile }) {
127
+ let client = null;
128
+ try {
129
+ client = await connect(databaseUrl);
130
+ const schema = await schemaStatus(client);
131
+ await ensureCompatibility(client, embeddingProfile);
132
+ const docs = await client.query("SELECT COUNT(*)::int AS count FROM documents");
133
+ return {
134
+ ok: true,
135
+ documents: docs.rows[0].count,
136
+ vectors: docs.rows[0].count,
137
+ embeddingMode: embeddingProfile.mode,
138
+ embeddingModel: embeddingProfile.model,
139
+ embeddingDimensions: embeddingProfile.dimensions,
140
+ schemaCurrent: schema.currentVersion,
141
+ schemaLatest: schema.latestVersion,
142
+ schemaPending: schema.pendingCount
143
+ };
144
+ } catch (error) {
145
+ return {
146
+ ok: false,
147
+ documents: 0,
148
+ vectors: 0,
149
+ embeddingMode: null,
150
+ schemaCurrent: null,
151
+ schemaLatest: null,
152
+ schemaPending: null,
153
+ error: String(error.message ?? error)
154
+ };
155
+ } finally {
156
+ if (client) {
157
+ await client.end();
158
+ }
159
+ }
160
+ }
161
+
162
+ export function snippet(content, limit = 220) {
163
+ const normalized = content.replace(/\s+/g, " ").trim();
164
+ if (normalized.length <= limit) {
165
+ return normalized;
166
+ }
167
+ return `${normalized.slice(0, limit - 3).trimEnd()}...`;
168
+ }
169
+
170
+ async function lexicalCandidates(client, { query, limit }) {
171
+ const result = await client.query(
172
+ `
173
+ SELECT
174
+ doc_id, title, content, path, updated_at,
175
+ ts_rank_cd(search_tsv, websearch_to_tsquery('english', $1))::float AS lexical_score
176
+ FROM documents
177
+ WHERE search_tsv @@ websearch_to_tsquery('english', $1)
178
+ ORDER BY lexical_score DESC
179
+ LIMIT $2
180
+ `,
181
+ [query, limit]
182
+ );
183
+ return result.rows;
184
+ }
185
+
186
+ async function semanticCandidates(client, { embeddingProfile, query, limit }) {
187
+ const queryEmbedding = vectorLiteral(await embedText(query, embeddingProfile));
188
+ const result = await client.query(
189
+ `
190
+ SELECT
191
+ doc_id, title, content, path, updated_at,
192
+ (1 - (embedding <=> $1::vector))::float AS semantic_score
193
+ FROM documents
194
+ ORDER BY embedding <=> $1::vector
195
+ LIMIT $2
196
+ `,
197
+ [queryEmbedding, limit]
198
+ );
199
+ return result.rows;
200
+ }
201
+
202
+ function combineScores({ lexical, semantic, query, title }) {
203
+ let score = lexical * 0.68 + Math.max(semantic, 0) * 0.22;
204
+ score += tokenOverlap(query, title) * 0.08;
205
+ return score;
206
+ }