@nzpr/kb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +5 -0
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/bin/kb-admin.js +5 -0
- package/bin/kb.js +5 -0
- package/docker-compose.pgvector.yml +19 -0
- package/lib/admin-cli.js +203 -0
- package/lib/chunking.js +16 -0
- package/lib/cli-common.js +73 -0
- package/lib/cli.js +391 -0
- package/lib/config.js +109 -0
- package/lib/db.js +81 -0
- package/lib/embeddings.js +94 -0
- package/lib/frontmatter.js +66 -0
- package/lib/index.js +140 -0
- package/lib/kb-proposals.js +188 -0
- package/lib/migrations.js +149 -0
- package/lib/repo-init.js +438 -0
- package/lib/search.js +206 -0
- package/migrations/0001_initial.sql +77 -0
- package/migrations/0002_relax_embedding_dimension.sql +9 -0
- package/migrations/0003_simplify_documents_table.sql +64 -0
- package/package.json +58 -0
package/lib/repo-init.js
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { execFileSync } from "node:child_process";
|
|
4
|
+
import { connect, initDb } from "./db.js";
|
|
5
|
+
import { maskConnection } from "./cli-common.js";
|
|
6
|
+
|
|
7
|
+
const PACKAGE_NAME = "@nzpr/kb";
|
|
8
|
+
const LABELS = Object.freeze([
|
|
9
|
+
{
|
|
10
|
+
name: "kb-entry",
|
|
11
|
+
color: "0E8A16",
|
|
12
|
+
description: "Knowledge base proposal or generated change"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
name: "kb-approved",
|
|
16
|
+
color: "1D76DB",
|
|
17
|
+
description: "Approved knowledge proposal ready to materialize"
|
|
18
|
+
}
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
export function initializeKnowledgeRepo({ targetDir = process.cwd() } = {}) {
|
|
22
|
+
const root = path.resolve(targetDir);
|
|
23
|
+
const files = new Map([
|
|
24
|
+
[".github/ISSUE_TEMPLATE/config.yml", renderIssueConfig()],
|
|
25
|
+
[".github/ISSUE_TEMPLATE/knowledge-document.md", renderIssueTemplate()],
|
|
26
|
+
[".github/workflows/kb-issue-to-pr.yml", renderIssueToPrWorkflow()],
|
|
27
|
+
[".github/workflows/kb-publish.yml", renderPublishWorkflow()],
|
|
28
|
+
["kb/docs/.gitkeep", ""]
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
const created = [];
|
|
32
|
+
const skipped = [];
|
|
33
|
+
|
|
34
|
+
for (const [relativePath, content] of files.entries()) {
|
|
35
|
+
const absolutePath = path.join(root, relativePath);
|
|
36
|
+
if (fs.existsSync(absolutePath)) {
|
|
37
|
+
skipped.push(relativePath);
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
fs.mkdirSync(path.dirname(absolutePath), { recursive: true });
|
|
41
|
+
fs.writeFileSync(absolutePath, content, "utf8");
|
|
42
|
+
created.push(relativePath);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
root,
|
|
47
|
+
created,
|
|
48
|
+
skipped,
|
|
49
|
+
configuration: buildConfigurationGuide()
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export async function bootstrapKnowledgeRepo({
|
|
54
|
+
targetDir = process.cwd(),
|
|
55
|
+
repo = null,
|
|
56
|
+
githubToken = null,
|
|
57
|
+
databaseUrl = null,
|
|
58
|
+
embeddingMode = null,
|
|
59
|
+
embeddingApiUrl = null,
|
|
60
|
+
embeddingModel = null,
|
|
61
|
+
embeddingApiKey = null,
|
|
62
|
+
dbConnectTimeoutMs = null,
|
|
63
|
+
repoAutomationToken = null,
|
|
64
|
+
runGitHubCommand = defaultRunGitHubCommand,
|
|
65
|
+
verifyDatabaseReady = defaultVerifyDatabaseReady
|
|
66
|
+
} = {}) {
|
|
67
|
+
const scaffold = initializeKnowledgeRepo({ targetDir });
|
|
68
|
+
const result = {
|
|
69
|
+
...scaffold,
|
|
70
|
+
ok: true,
|
|
71
|
+
database: {
|
|
72
|
+
status: "pending",
|
|
73
|
+
message:
|
|
74
|
+
"rerun with --database-url URL or KB_DATABASE_URL to verify the target database and initialize the schema"
|
|
75
|
+
},
|
|
76
|
+
github: {
|
|
77
|
+
status: "pending",
|
|
78
|
+
message:
|
|
79
|
+
"rerun with --repo OWNER/REPO and GITHUB_TOKEN to configure labels, repo settings, and GitHub secrets or variables"
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
if (databaseUrl) {
|
|
84
|
+
try {
|
|
85
|
+
const database = await verifyDatabaseReady({ databaseUrl });
|
|
86
|
+
result.database = {
|
|
87
|
+
status: "verified",
|
|
88
|
+
...database
|
|
89
|
+
};
|
|
90
|
+
} catch (error) {
|
|
91
|
+
result.ok = false;
|
|
92
|
+
result.database = {
|
|
93
|
+
status: "failed",
|
|
94
|
+
error: String(error?.message ?? error)
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (repo) {
|
|
100
|
+
if (!githubToken) {
|
|
101
|
+
result.ok = false;
|
|
102
|
+
result.github = {
|
|
103
|
+
status: "failed",
|
|
104
|
+
repo,
|
|
105
|
+
error: "GITHUB_TOKEN is required when configuring a knowledge repo"
|
|
106
|
+
};
|
|
107
|
+
return result;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (result.database.status === "failed") {
|
|
111
|
+
result.github = {
|
|
112
|
+
status: "skipped",
|
|
113
|
+
repo,
|
|
114
|
+
message: "database preflight failed, so repo secrets and variables were not changed"
|
|
115
|
+
};
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
const secrets = new Map();
|
|
119
|
+
const variables = new Map();
|
|
120
|
+
|
|
121
|
+
if (databaseUrl) {
|
|
122
|
+
secrets.set("KB_DATABASE_URL", databaseUrl);
|
|
123
|
+
}
|
|
124
|
+
if (embeddingApiKey) {
|
|
125
|
+
secrets.set("KB_EMBEDDING_API_KEY", embeddingApiKey);
|
|
126
|
+
}
|
|
127
|
+
if (repoAutomationToken) {
|
|
128
|
+
secrets.set("KB_REPO_AUTOMATION_TOKEN", repoAutomationToken);
|
|
129
|
+
}
|
|
130
|
+
if (embeddingMode) {
|
|
131
|
+
variables.set("KB_EMBEDDING_MODE", embeddingMode);
|
|
132
|
+
}
|
|
133
|
+
if (embeddingApiUrl) {
|
|
134
|
+
variables.set("KB_EMBEDDING_API_URL", embeddingApiUrl);
|
|
135
|
+
}
|
|
136
|
+
if (embeddingModel) {
|
|
137
|
+
variables.set("KB_EMBEDDING_MODEL", embeddingModel);
|
|
138
|
+
}
|
|
139
|
+
if (dbConnectTimeoutMs) {
|
|
140
|
+
variables.set("KB_DB_CONNECT_TIMEOUT_MS", String(dbConnectTimeoutMs));
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
try {
|
|
144
|
+
const github = await configureKnowledgeRepo({
|
|
145
|
+
repo,
|
|
146
|
+
githubToken,
|
|
147
|
+
secrets,
|
|
148
|
+
variables,
|
|
149
|
+
runGitHubCommand
|
|
150
|
+
});
|
|
151
|
+
result.github = {
|
|
152
|
+
status: "configured",
|
|
153
|
+
...github
|
|
154
|
+
};
|
|
155
|
+
} catch (error) {
|
|
156
|
+
result.ok = false;
|
|
157
|
+
result.github = {
|
|
158
|
+
status: "failed",
|
|
159
|
+
repo,
|
|
160
|
+
error: String(error?.message ?? error)
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return result;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function buildConfigurationGuide() {
|
|
169
|
+
return {
|
|
170
|
+
requiredSecrets: [
|
|
171
|
+
{
|
|
172
|
+
name: "KB_DATABASE_URL",
|
|
173
|
+
purpose: "PostgreSQL connection string for the KB database with write access for publish."
|
|
174
|
+
}
|
|
175
|
+
],
|
|
176
|
+
optionalSecrets: [
|
|
177
|
+
{
|
|
178
|
+
name: "KB_EMBEDDING_API_KEY",
|
|
179
|
+
purpose: "API key for the embeddings endpoint, if your server requires authentication."
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
name: "KB_REPO_AUTOMATION_TOKEN",
|
|
183
|
+
purpose: "Optional token for issue-to-PR automation if default GitHub token behavior is insufficient."
|
|
184
|
+
}
|
|
185
|
+
],
|
|
186
|
+
optionalVariables: [
|
|
187
|
+
{
|
|
188
|
+
name: "KB_EMBEDDING_MODE",
|
|
189
|
+
value: "bge-m3-openai",
|
|
190
|
+
purpose: "Enable high-quality remote embeddings."
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
name: "KB_EMBEDDING_API_URL",
|
|
194
|
+
value: "https://your-embeddings-host/v1/embeddings",
|
|
195
|
+
purpose: "OpenAI-compatible embeddings endpoint."
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
name: "KB_EMBEDDING_MODEL",
|
|
199
|
+
value: "BAAI/bge-m3",
|
|
200
|
+
purpose: "Embedding model name expected by the endpoint."
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
name: "KB_DB_CONNECT_TIMEOUT_MS",
|
|
204
|
+
value: "20000",
|
|
205
|
+
purpose: "Optional database connect timeout override for CI."
|
|
206
|
+
}
|
|
207
|
+
]
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
async function configureKnowledgeRepo({
|
|
212
|
+
repo,
|
|
213
|
+
githubToken,
|
|
214
|
+
secrets,
|
|
215
|
+
variables,
|
|
216
|
+
runGitHubCommand
|
|
217
|
+
}) {
|
|
218
|
+
await runGitHubCommand(["repo", "edit", repo, "--enable-issues"], { githubToken });
|
|
219
|
+
|
|
220
|
+
for (const label of LABELS) {
|
|
221
|
+
await runGitHubCommand(
|
|
222
|
+
[
|
|
223
|
+
"label",
|
|
224
|
+
"create",
|
|
225
|
+
label.name,
|
|
226
|
+
"--repo",
|
|
227
|
+
repo,
|
|
228
|
+
"--color",
|
|
229
|
+
label.color,
|
|
230
|
+
"--description",
|
|
231
|
+
label.description,
|
|
232
|
+
"--force"
|
|
233
|
+
],
|
|
234
|
+
{ githubToken }
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
for (const [name, value] of secrets.entries()) {
|
|
239
|
+
await runGitHubCommand(["secret", "set", name, "--repo", repo, "--body", value], {
|
|
240
|
+
githubToken
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
for (const [name, value] of variables.entries()) {
|
|
245
|
+
await runGitHubCommand(["variable", "set", name, "--repo", repo, "--body", value], {
|
|
246
|
+
githubToken
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
repo,
|
|
252
|
+
labels: LABELS.map((label) => label.name),
|
|
253
|
+
secrets: [...secrets.keys()],
|
|
254
|
+
variables: [...variables.keys()]
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
async function defaultVerifyDatabaseReady({ databaseUrl }) {
|
|
259
|
+
const client = await connect(databaseUrl);
|
|
260
|
+
try {
|
|
261
|
+
const result = await initDb(client);
|
|
262
|
+
return {
|
|
263
|
+
database: maskConnection(databaseUrl),
|
|
264
|
+
currentVersion: result.currentVersion,
|
|
265
|
+
appliedCount: result.appliedCount
|
|
266
|
+
};
|
|
267
|
+
} finally {
|
|
268
|
+
await client.end();
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function defaultRunGitHubCommand(args, { githubToken }) {
|
|
273
|
+
return execFileSync("gh", args, {
|
|
274
|
+
encoding: "utf8",
|
|
275
|
+
env: {
|
|
276
|
+
...process.env,
|
|
277
|
+
GH_TOKEN: githubToken,
|
|
278
|
+
GITHUB_TOKEN: githubToken
|
|
279
|
+
}
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function renderIssueConfig() {
|
|
284
|
+
return ["blank_issues_enabled: false", ""].join("\n");
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function renderIssueTemplate() {
|
|
288
|
+
return [
|
|
289
|
+
"---",
|
|
290
|
+
"name: Knowledge Base Document",
|
|
291
|
+
"about: Propose a new knowledge base document or a substantial update to an existing one",
|
|
292
|
+
'title: "kb: "',
|
|
293
|
+
"labels: kb-entry",
|
|
294
|
+
"---",
|
|
295
|
+
"",
|
|
296
|
+
"Use this template to propose a new knowledge base entry.",
|
|
297
|
+
"",
|
|
298
|
+
"Keep it minimal. A knowledge entry should be just a title and the text that should be retrieved by search.",
|
|
299
|
+
"",
|
|
300
|
+
"### Title",
|
|
301
|
+
"",
|
|
302
|
+
"Example Platform Rule",
|
|
303
|
+
"",
|
|
304
|
+
"### Relative Path",
|
|
305
|
+
"",
|
|
306
|
+
"entries/example-platform-rule.md",
|
|
307
|
+
"",
|
|
308
|
+
"### Text",
|
|
309
|
+
"",
|
|
310
|
+
"Write the exact knowledge text that should become the document body.",
|
|
311
|
+
"",
|
|
312
|
+
"### Review Flow",
|
|
313
|
+
"",
|
|
314
|
+
"1. Open the issue with this template.",
|
|
315
|
+
"2. Review and edit the issue until the title and text are ready.",
|
|
316
|
+
"3. Add the `kb-approved` label to generate a PR that writes the Markdown file into `kb/docs/`.",
|
|
317
|
+
""
|
|
318
|
+
].join("\n");
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
function renderIssueToPrWorkflow() {
|
|
322
|
+
return [
|
|
323
|
+
"name: kb-issue-to-pr",
|
|
324
|
+
"",
|
|
325
|
+
"on:",
|
|
326
|
+
" issues:",
|
|
327
|
+
" types:",
|
|
328
|
+
" - labeled",
|
|
329
|
+
"",
|
|
330
|
+
"permissions:",
|
|
331
|
+
" contents: write",
|
|
332
|
+
" pull-requests: write",
|
|
333
|
+
" issues: write",
|
|
334
|
+
"",
|
|
335
|
+
"jobs:",
|
|
336
|
+
" materialize:",
|
|
337
|
+
" if: github.event.label.name == 'kb-approved'",
|
|
338
|
+
" runs-on: ubuntu-latest",
|
|
339
|
+
" concurrency:",
|
|
340
|
+
" group: kb-issue-${{ github.event.issue.number }}",
|
|
341
|
+
" cancel-in-progress: false",
|
|
342
|
+
" steps:",
|
|
343
|
+
" - name: Checkout",
|
|
344
|
+
" uses: actions/checkout@v4",
|
|
345
|
+
"",
|
|
346
|
+
" - name: Setup Node",
|
|
347
|
+
" uses: actions/setup-node@v4",
|
|
348
|
+
" with:",
|
|
349
|
+
" node-version: 24",
|
|
350
|
+
"",
|
|
351
|
+
` - name: Install ${PACKAGE_NAME}`,
|
|
352
|
+
` run: npm install -g ${PACKAGE_NAME}`,
|
|
353
|
+
"",
|
|
354
|
+
" - name: Materialize approved issue",
|
|
355
|
+
" id: materialize",
|
|
356
|
+
' run: kb-admin issue-to-doc --issue-event "$GITHUB_EVENT_PATH" --docs-root ./kb/docs',
|
|
357
|
+
"",
|
|
358
|
+
" - name: Create pull request",
|
|
359
|
+
" id: cpr",
|
|
360
|
+
" uses: peter-evans/create-pull-request@v8",
|
|
361
|
+
" with:",
|
|
362
|
+
" token: ${{ secrets.KB_REPO_AUTOMATION_TOKEN || github.token }}",
|
|
363
|
+
" branch: ${{ steps.materialize.outputs.branch }}",
|
|
364
|
+
" commit-message: ${{ steps.materialize.outputs.commit_message }}",
|
|
365
|
+
" title: ${{ steps.materialize.outputs.pr_title }}",
|
|
366
|
+
" body: ${{ steps.materialize.outputs.pr_body }}",
|
|
367
|
+
" labels: kb-entry",
|
|
368
|
+
" add-paths: ${{ steps.materialize.outputs.doc_path }}",
|
|
369
|
+
"",
|
|
370
|
+
" - name: Comment on issue",
|
|
371
|
+
" if: steps.cpr.outputs.pull-request-number",
|
|
372
|
+
" uses: actions/github-script@v8",
|
|
373
|
+
" with:",
|
|
374
|
+
" script: |",
|
|
375
|
+
" await github.rest.issues.createComment({",
|
|
376
|
+
" owner: context.repo.owner,",
|
|
377
|
+
" repo: context.repo.repo,",
|
|
378
|
+
" issue_number: context.payload.issue.number,",
|
|
379
|
+
" body: `Created PR #${{ steps.cpr.outputs.pull-request-number }} for this approved KB entry.`",
|
|
380
|
+
" });",
|
|
381
|
+
""
|
|
382
|
+
].join("\n");
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function renderPublishWorkflow() {
|
|
386
|
+
return [
|
|
387
|
+
"name: kb-publish",
|
|
388
|
+
"",
|
|
389
|
+
"on:",
|
|
390
|
+
" push:",
|
|
391
|
+
" branches:",
|
|
392
|
+
" - main",
|
|
393
|
+
" paths:",
|
|
394
|
+
' - "kb/docs/**"',
|
|
395
|
+
' - ".github/workflows/**"',
|
|
396
|
+
" workflow_dispatch:",
|
|
397
|
+
"",
|
|
398
|
+
"jobs:",
|
|
399
|
+
" publish:",
|
|
400
|
+
" runs-on: ubuntu-latest",
|
|
401
|
+
" permissions:",
|
|
402
|
+
" contents: write",
|
|
403
|
+
" concurrency:",
|
|
404
|
+
" group: kb-publish",
|
|
405
|
+
" cancel-in-progress: false",
|
|
406
|
+
" env:",
|
|
407
|
+
" KB_DATABASE_URL: ${{ secrets.KB_DATABASE_URL }}",
|
|
408
|
+
" KB_GITHUB_REPO: ${{ github.repository }}",
|
|
409
|
+
" GITHUB_TOKEN: ${{ github.token }}",
|
|
410
|
+
" KB_EMBEDDING_MODE: ${{ secrets.KB_EMBEDDING_MODE || vars.KB_EMBEDDING_MODE }}",
|
|
411
|
+
" KB_EMBEDDING_API_URL: ${{ secrets.KB_EMBEDDING_API_URL || vars.KB_EMBEDDING_API_URL }}",
|
|
412
|
+
" KB_EMBEDDING_MODEL: ${{ secrets.KB_EMBEDDING_MODEL || vars.KB_EMBEDDING_MODEL }}",
|
|
413
|
+
" KB_EMBEDDING_API_KEY: ${{ secrets.KB_EMBEDDING_API_KEY }}",
|
|
414
|
+
" KB_DB_CONNECT_TIMEOUT_MS: ${{ secrets.KB_DB_CONNECT_TIMEOUT_MS || vars.KB_DB_CONNECT_TIMEOUT_MS || '20000' }}",
|
|
415
|
+
" steps:",
|
|
416
|
+
" - name: Checkout",
|
|
417
|
+
" uses: actions/checkout@v4",
|
|
418
|
+
"",
|
|
419
|
+
" - name: Ensure publish secret is configured",
|
|
420
|
+
" run: |",
|
|
421
|
+
' if [ -z "${KB_DATABASE_URL:-}" ]; then',
|
|
422
|
+
' echo "KB_DATABASE_URL secret is required for publish" >&2',
|
|
423
|
+
" exit 1",
|
|
424
|
+
" fi",
|
|
425
|
+
"",
|
|
426
|
+
" - name: Setup Node",
|
|
427
|
+
" uses: actions/setup-node@v4",
|
|
428
|
+
" with:",
|
|
429
|
+
" node-version: 24",
|
|
430
|
+
"",
|
|
431
|
+
` - name: Install ${PACKAGE_NAME}`,
|
|
432
|
+
` run: npm install -g ${PACKAGE_NAME}`,
|
|
433
|
+
"",
|
|
434
|
+
" - name: Publish knowledge",
|
|
435
|
+
" run: kb publish --docs-root ./kb/docs",
|
|
436
|
+
""
|
|
437
|
+
].join("\n");
|
|
438
|
+
}
|
package/lib/search.js
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { connect, ensureCompatibility, schemaStatus } from "./db.js";
|
|
2
|
+
import { embedText, tokenOverlap, vectorLiteral } from "./embeddings.js";
|
|
3
|
+
|
|
4
|
+
export async function searchIndex({
|
|
5
|
+
databaseUrl,
|
|
6
|
+
embeddingProfile,
|
|
7
|
+
query,
|
|
8
|
+
limit = 5
|
|
9
|
+
}) {
|
|
10
|
+
const client = await connect(databaseUrl);
|
|
11
|
+
try {
|
|
12
|
+
await ensureCompatibility(client, embeddingProfile);
|
|
13
|
+
const lexicalRows = await lexicalCandidates(client, {
|
|
14
|
+
query,
|
|
15
|
+
limit: Math.max(limit * 10, 30)
|
|
16
|
+
});
|
|
17
|
+
const semanticRows = await semanticCandidates(client, {
|
|
18
|
+
embeddingProfile,
|
|
19
|
+
query,
|
|
20
|
+
limit: Math.max(limit * 10, 30)
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const merged = new Map();
|
|
24
|
+
for (const row of lexicalRows) {
|
|
25
|
+
merged.set(row.doc_id, {
|
|
26
|
+
...row,
|
|
27
|
+
lexical_score: Number(row.lexical_score),
|
|
28
|
+
semantic_score: 0
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
for (const row of semanticRows) {
|
|
32
|
+
const existing = merged.get(row.doc_id) ?? {
|
|
33
|
+
...row,
|
|
34
|
+
lexical_score: 0,
|
|
35
|
+
semantic_score: 0
|
|
36
|
+
};
|
|
37
|
+
existing.semantic_score = Math.max(Number(row.semantic_score), existing.semantic_score);
|
|
38
|
+
merged.set(row.chunk_id, existing);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const results = [...merged.values()].map((row) => {
|
|
42
|
+
const lexical = Math.max(
|
|
43
|
+
Number(row.lexical_score ?? 0),
|
|
44
|
+
tokenOverlap(query, `${row.title} ${row.content}`)
|
|
45
|
+
);
|
|
46
|
+
const semantic = Number(row.semantic_score ?? 0);
|
|
47
|
+
const finalScore = combineScores({
|
|
48
|
+
lexical,
|
|
49
|
+
semantic,
|
|
50
|
+
query,
|
|
51
|
+
title: row.title
|
|
52
|
+
});
|
|
53
|
+
return {
|
|
54
|
+
chunkId: row.doc_id,
|
|
55
|
+
docId: row.doc_id,
|
|
56
|
+
title: row.title,
|
|
57
|
+
heading: row.title,
|
|
58
|
+
content: row.content,
|
|
59
|
+
path: row.path,
|
|
60
|
+
lexicalScore: lexical,
|
|
61
|
+
semanticScore: semantic,
|
|
62
|
+
finalScore,
|
|
63
|
+
lastReviewed: String(row.updated_at)
|
|
64
|
+
};
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return results.sort((a, b) => b.finalScore - a.finalScore).slice(0, limit);
|
|
68
|
+
} finally {
|
|
69
|
+
await client.end();
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function askIndex(options) {
|
|
74
|
+
const results = await searchIndex(options);
|
|
75
|
+
if (!results.length) {
|
|
76
|
+
return { answer: "No matching standards found.", results: [] };
|
|
77
|
+
}
|
|
78
|
+
const lines = [`Best guidance for: ${options.query}`, ""];
|
|
79
|
+
for (const result of results) {
|
|
80
|
+
lines.push(`- ${result.title}`);
|
|
81
|
+
lines.push(` ${snippet(result.content)}`);
|
|
82
|
+
lines.push(` Source: ${result.path} | reviewed ${result.lastReviewed}`);
|
|
83
|
+
}
|
|
84
|
+
return { answer: lines.join("\n"), results };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export async function listDocuments({ databaseUrl }) {
|
|
88
|
+
const client = await connect(databaseUrl);
|
|
89
|
+
try {
|
|
90
|
+
const result = await client.query(
|
|
91
|
+
`
|
|
92
|
+
SELECT doc_id, title, path
|
|
93
|
+
FROM documents
|
|
94
|
+
ORDER BY doc_id
|
|
95
|
+
`
|
|
96
|
+
);
|
|
97
|
+
return result.rows;
|
|
98
|
+
} finally {
|
|
99
|
+
await client.end();
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export async function knowledgeCatalog({ databaseUrl }) {
|
|
104
|
+
const client = await connect(databaseUrl);
|
|
105
|
+
try {
|
|
106
|
+
const docs = await client.query(`
|
|
107
|
+
SELECT doc_id, title, path, updated_at
|
|
108
|
+
FROM documents
|
|
109
|
+
ORDER BY doc_id
|
|
110
|
+
`);
|
|
111
|
+
return {
|
|
112
|
+
topics: [],
|
|
113
|
+
projects: [],
|
|
114
|
+
documents: docs.rows.map((row) => ({
|
|
115
|
+
doc_id: row.doc_id,
|
|
116
|
+
title: row.title,
|
|
117
|
+
path: row.path,
|
|
118
|
+
updated_at: String(row.updated_at)
|
|
119
|
+
}))
|
|
120
|
+
};
|
|
121
|
+
} finally {
|
|
122
|
+
await client.end();
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export async function doctor({ databaseUrl, embeddingProfile }) {
|
|
127
|
+
let client = null;
|
|
128
|
+
try {
|
|
129
|
+
client = await connect(databaseUrl);
|
|
130
|
+
const schema = await schemaStatus(client);
|
|
131
|
+
await ensureCompatibility(client, embeddingProfile);
|
|
132
|
+
const docs = await client.query("SELECT COUNT(*)::int AS count FROM documents");
|
|
133
|
+
return {
|
|
134
|
+
ok: true,
|
|
135
|
+
documents: docs.rows[0].count,
|
|
136
|
+
vectors: docs.rows[0].count,
|
|
137
|
+
embeddingMode: embeddingProfile.mode,
|
|
138
|
+
embeddingModel: embeddingProfile.model,
|
|
139
|
+
embeddingDimensions: embeddingProfile.dimensions,
|
|
140
|
+
schemaCurrent: schema.currentVersion,
|
|
141
|
+
schemaLatest: schema.latestVersion,
|
|
142
|
+
schemaPending: schema.pendingCount
|
|
143
|
+
};
|
|
144
|
+
} catch (error) {
|
|
145
|
+
return {
|
|
146
|
+
ok: false,
|
|
147
|
+
documents: 0,
|
|
148
|
+
vectors: 0,
|
|
149
|
+
embeddingMode: null,
|
|
150
|
+
schemaCurrent: null,
|
|
151
|
+
schemaLatest: null,
|
|
152
|
+
schemaPending: null,
|
|
153
|
+
error: String(error.message ?? error)
|
|
154
|
+
};
|
|
155
|
+
} finally {
|
|
156
|
+
if (client) {
|
|
157
|
+
await client.end();
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function snippet(content, limit = 220) {
|
|
163
|
+
const normalized = content.replace(/\s+/g, " ").trim();
|
|
164
|
+
if (normalized.length <= limit) {
|
|
165
|
+
return normalized;
|
|
166
|
+
}
|
|
167
|
+
return `${normalized.slice(0, limit - 3).trimEnd()}...`;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
async function lexicalCandidates(client, { query, limit }) {
|
|
171
|
+
const result = await client.query(
|
|
172
|
+
`
|
|
173
|
+
SELECT
|
|
174
|
+
doc_id, title, content, path, updated_at,
|
|
175
|
+
ts_rank_cd(search_tsv, websearch_to_tsquery('english', $1))::float AS lexical_score
|
|
176
|
+
FROM documents
|
|
177
|
+
WHERE search_tsv @@ websearch_to_tsquery('english', $1)
|
|
178
|
+
ORDER BY lexical_score DESC
|
|
179
|
+
LIMIT $2
|
|
180
|
+
`,
|
|
181
|
+
[query, limit]
|
|
182
|
+
);
|
|
183
|
+
return result.rows;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
async function semanticCandidates(client, { embeddingProfile, query, limit }) {
|
|
187
|
+
const queryEmbedding = vectorLiteral(await embedText(query, embeddingProfile));
|
|
188
|
+
const result = await client.query(
|
|
189
|
+
`
|
|
190
|
+
SELECT
|
|
191
|
+
doc_id, title, content, path, updated_at,
|
|
192
|
+
(1 - (embedding <=> $1::vector))::float AS semantic_score
|
|
193
|
+
FROM documents
|
|
194
|
+
ORDER BY embedding <=> $1::vector
|
|
195
|
+
LIMIT $2
|
|
196
|
+
`,
|
|
197
|
+
[queryEmbedding, limit]
|
|
198
|
+
);
|
|
199
|
+
return result.rows;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function combineScores({ lexical, semantic, query, title }) {
|
|
203
|
+
let score = lexical * 0.68 + Math.max(semantic, 0) * 0.22;
|
|
204
|
+
score += tokenOverlap(query, title) * 0.08;
|
|
205
|
+
return score;
|
|
206
|
+
}
|