@nzpr/kb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +5 -0
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/bin/kb-admin.js +5 -0
- package/bin/kb.js +5 -0
- package/docker-compose.pgvector.yml +19 -0
- package/lib/admin-cli.js +203 -0
- package/lib/chunking.js +16 -0
- package/lib/cli-common.js +73 -0
- package/lib/cli.js +391 -0
- package/lib/config.js +109 -0
- package/lib/db.js +81 -0
- package/lib/embeddings.js +94 -0
- package/lib/frontmatter.js +66 -0
- package/lib/index.js +140 -0
- package/lib/kb-proposals.js +188 -0
- package/lib/migrations.js +149 -0
- package/lib/repo-init.js +438 -0
- package/lib/search.js +206 -0
- package/migrations/0001_initial.sql +77 -0
- package/migrations/0002_relax_embedding_dimension.sql +9 -0
- package/migrations/0003_simplify_documents_table.sql +64 -0
- package/package.json +58 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
2
|
+
|
|
3
|
+
CREATE TABLE IF NOT EXISTS schema_migrations (
|
|
4
|
+
version INTEGER PRIMARY KEY,
|
|
5
|
+
name TEXT NOT NULL,
|
|
6
|
+
checksum TEXT NOT NULL,
|
|
7
|
+
applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
8
|
+
);
|
|
9
|
+
|
|
10
|
+
CREATE TABLE IF NOT EXISTS kb_metadata (
|
|
11
|
+
key TEXT PRIMARY KEY,
|
|
12
|
+
value TEXT NOT NULL
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
16
|
+
doc_id TEXT PRIMARY KEY,
|
|
17
|
+
path TEXT NOT NULL,
|
|
18
|
+
title TEXT NOT NULL,
|
|
19
|
+
scope TEXT NOT NULL,
|
|
20
|
+
topic TEXT NOT NULL,
|
|
21
|
+
status TEXT NOT NULL,
|
|
22
|
+
owner TEXT NOT NULL,
|
|
23
|
+
audience TEXT NOT NULL,
|
|
24
|
+
projects TEXT[] NOT NULL DEFAULT '{}',
|
|
25
|
+
tags TEXT[] NOT NULL DEFAULT '{}',
|
|
26
|
+
last_reviewed DATE NOT NULL,
|
|
27
|
+
content_hash TEXT NOT NULL,
|
|
28
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
32
|
+
chunk_id TEXT PRIMARY KEY,
|
|
33
|
+
doc_id TEXT NOT NULL REFERENCES documents(doc_id) ON DELETE CASCADE,
|
|
34
|
+
title TEXT NOT NULL,
|
|
35
|
+
heading TEXT NOT NULL,
|
|
36
|
+
content TEXT NOT NULL,
|
|
37
|
+
projects TEXT[] NOT NULL DEFAULT '{}',
|
|
38
|
+
tags TEXT[] NOT NULL DEFAULT '{}',
|
|
39
|
+
scope TEXT NOT NULL,
|
|
40
|
+
status TEXT NOT NULL,
|
|
41
|
+
topic TEXT NOT NULL,
|
|
42
|
+
path TEXT NOT NULL,
|
|
43
|
+
last_reviewed DATE NOT NULL,
|
|
44
|
+
embedding VECTOR(256) NOT NULL,
|
|
45
|
+
search_tsv tsvector NOT NULL DEFAULT ''::tsvector
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
CREATE OR REPLACE FUNCTION update_chunks_search_tsv()
|
|
49
|
+
RETURNS trigger
|
|
50
|
+
LANGUAGE plpgsql
|
|
51
|
+
AS $$
|
|
52
|
+
BEGIN
|
|
53
|
+
NEW.search_tsv :=
|
|
54
|
+
setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
|
|
55
|
+
setweight(to_tsvector('english', coalesce(NEW.heading, '')), 'A') ||
|
|
56
|
+
setweight(to_tsvector('english', coalesce(NEW.content, '')), 'B') ||
|
|
57
|
+
setweight(to_tsvector('english', array_to_string(NEW.projects, ' ')), 'C') ||
|
|
58
|
+
setweight(to_tsvector('english', array_to_string(NEW.tags, ' ')), 'C');
|
|
59
|
+
RETURN NEW;
|
|
60
|
+
END;
|
|
61
|
+
$$;
|
|
62
|
+
|
|
63
|
+
CREATE TRIGGER trg_chunks_search_tsv
|
|
64
|
+
BEFORE INSERT OR UPDATE OF title, heading, content, projects, tags
|
|
65
|
+
ON chunks
|
|
66
|
+
FOR EACH ROW
|
|
67
|
+
EXECUTE FUNCTION update_chunks_search_tsv();
|
|
68
|
+
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_search_tsv ON chunks USING GIN (search_tsv);
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_doc_id ON chunks (doc_id);
|
|
71
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_topic_status ON chunks (topic, status);
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_embedding_ivfflat
|
|
73
|
+
ON chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
|
74
|
+
|
|
75
|
+
INSERT INTO kb_metadata (key, value)
|
|
76
|
+
VALUES ('embedding_mode', 'local-hash'), ('embedding_dim', '256')
|
|
77
|
+
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
ALTER TABLE documents
|
|
2
|
+
ADD COLUMN IF NOT EXISTS content TEXT NOT NULL DEFAULT '',
|
|
3
|
+
ADD COLUMN IF NOT EXISTS embedding VECTOR,
|
|
4
|
+
ADD COLUMN IF NOT EXISTS search_tsv tsvector NOT NULL DEFAULT ''::tsvector;
|
|
5
|
+
|
|
6
|
+
CREATE OR REPLACE FUNCTION update_documents_search_tsv()
|
|
7
|
+
RETURNS trigger
|
|
8
|
+
LANGUAGE plpgsql
|
|
9
|
+
AS $$
|
|
10
|
+
BEGIN
|
|
11
|
+
NEW.search_tsv :=
|
|
12
|
+
setweight(to_tsvector('english', coalesce(NEW.title, '')), 'A') ||
|
|
13
|
+
setweight(to_tsvector('english', coalesce(NEW.content, '')), 'B');
|
|
14
|
+
RETURN NEW;
|
|
15
|
+
END;
|
|
16
|
+
$$;
|
|
17
|
+
|
|
18
|
+
DROP TRIGGER IF EXISTS trg_documents_search_tsv ON documents;
|
|
19
|
+
CREATE TRIGGER trg_documents_search_tsv
|
|
20
|
+
BEFORE INSERT OR UPDATE OF title, content
|
|
21
|
+
ON documents
|
|
22
|
+
FOR EACH ROW
|
|
23
|
+
EXECUTE FUNCTION update_documents_search_tsv();
|
|
24
|
+
|
|
25
|
+
UPDATE documents AS d
|
|
26
|
+
SET
|
|
27
|
+
content = COALESCE(
|
|
28
|
+
(
|
|
29
|
+
SELECT string_agg(c.content, E'\n\n' ORDER BY c.chunk_id)
|
|
30
|
+
FROM chunks AS c
|
|
31
|
+
WHERE c.doc_id = d.doc_id
|
|
32
|
+
),
|
|
33
|
+
d.content,
|
|
34
|
+
''
|
|
35
|
+
),
|
|
36
|
+
embedding = COALESCE(
|
|
37
|
+
(
|
|
38
|
+
SELECT c.embedding
|
|
39
|
+
FROM chunks AS c
|
|
40
|
+
WHERE c.doc_id = d.doc_id
|
|
41
|
+
ORDER BY c.chunk_id
|
|
42
|
+
LIMIT 1
|
|
43
|
+
),
|
|
44
|
+
d.embedding
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
DELETE FROM documents WHERE embedding IS NULL;
|
|
48
|
+
|
|
49
|
+
ALTER TABLE documents
|
|
50
|
+
ALTER COLUMN embedding SET NOT NULL;
|
|
51
|
+
|
|
52
|
+
DROP TABLE IF EXISTS chunks;
|
|
53
|
+
|
|
54
|
+
DROP INDEX IF EXISTS idx_documents_search_tsv;
|
|
55
|
+
CREATE INDEX idx_documents_search_tsv ON documents USING GIN (search_tsv);
|
|
56
|
+
|
|
57
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS scope;
|
|
58
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS topic;
|
|
59
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS status;
|
|
60
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS owner;
|
|
61
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS audience;
|
|
62
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS projects;
|
|
63
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS tags;
|
|
64
|
+
ALTER TABLE documents DROP COLUMN IF EXISTS last_reviewed;
|
package/package.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@nzpr/kb",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Postgres/pgvector-backed knowledge base CLI for LLM agents and engineering teams.",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "git+https://github.com/nzpr/kb.git"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://github.com/nzpr/kb#readme",
|
|
10
|
+
"bugs": {
|
|
11
|
+
"url": "https://github.com/nzpr/kb/issues"
|
|
12
|
+
},
|
|
13
|
+
"type": "module",
|
|
14
|
+
"bin": {
|
|
15
|
+
"kb": "./bin/kb.js",
|
|
16
|
+
"kb-admin": "./bin/kb-admin.js"
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"bin/",
|
|
20
|
+
"lib/",
|
|
21
|
+
"migrations/",
|
|
22
|
+
"README.md",
|
|
23
|
+
"docker-compose.pgvector.yml",
|
|
24
|
+
".env.example"
|
|
25
|
+
],
|
|
26
|
+
"scripts": {
|
|
27
|
+
"test": "node --test",
|
|
28
|
+
"validate": "node --test",
|
|
29
|
+
"prepublishOnly": "npm test",
|
|
30
|
+
"start": "node ./bin/kb.js",
|
|
31
|
+
"publish-knowledge": "node ./bin/kb.js publish",
|
|
32
|
+
"admin": "node ./bin/kb-admin.js",
|
|
33
|
+
"migrate": "node ./bin/kb-admin.js migrate",
|
|
34
|
+
"init-db": "node ./bin/kb-admin.js init-db",
|
|
35
|
+
"ingest": "node ./bin/kb-admin.js ingest",
|
|
36
|
+
"schema-status": "node ./bin/kb-admin.js status",
|
|
37
|
+
"doctor": "node ./bin/kb.js doctor"
|
|
38
|
+
},
|
|
39
|
+
"keywords": [
|
|
40
|
+
"knowledge-base",
|
|
41
|
+
"cli",
|
|
42
|
+
"postgres",
|
|
43
|
+
"pgvector",
|
|
44
|
+
"llm",
|
|
45
|
+
"rag"
|
|
46
|
+
],
|
|
47
|
+
"license": "MIT",
|
|
48
|
+
"publishConfig": {
|
|
49
|
+
"access": "public"
|
|
50
|
+
},
|
|
51
|
+
"engines": {
|
|
52
|
+
"node": ">=20"
|
|
53
|
+
},
|
|
54
|
+
"dependencies": {
|
|
55
|
+
"pg": "^8.16.3",
|
|
56
|
+
"yaml": "^2.8.1"
|
|
57
|
+
}
|
|
58
|
+
}
|