@hasna/knowledge 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ import { Database } from 'bun:sqlite';
2
+ import { ensureParentDir } from './workspace';
3
+
4
+ export const CURRENT_SCHEMA_VERSION = 1;
5
+
6
+ export interface KnowledgeDbStats {
7
+ schema_version: number;
8
+ sources: number;
9
+ source_revisions: number;
10
+ chunks: number;
11
+ wiki_pages: number;
12
+ citations: number;
13
+ indexes: number;
14
+ runs: number;
15
+ run_events: number;
16
+ }
17
+
18
+ const MIGRATION_1 = `
19
+ PRAGMA journal_mode = WAL;
20
+ PRAGMA foreign_keys = ON;
21
+
22
+ CREATE TABLE IF NOT EXISTS schema_versions (
23
+ version INTEGER PRIMARY KEY,
24
+ applied_at TEXT NOT NULL
25
+ );
26
+
27
+ CREATE TABLE IF NOT EXISTS sources (
28
+ id TEXT PRIMARY KEY,
29
+ uri TEXT NOT NULL UNIQUE,
30
+ kind TEXT NOT NULL,
31
+ title TEXT,
32
+ metadata_json TEXT NOT NULL DEFAULT '{}',
33
+ acl_json TEXT NOT NULL DEFAULT '{}',
34
+ created_at TEXT NOT NULL,
35
+ updated_at TEXT NOT NULL
36
+ );
37
+
38
+ CREATE TABLE IF NOT EXISTS source_revisions (
39
+ id TEXT PRIMARY KEY,
40
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
41
+ revision TEXT NOT NULL,
42
+ hash TEXT,
43
+ extracted_text_uri TEXT,
44
+ metadata_json TEXT NOT NULL DEFAULT '{}',
45
+ created_at TEXT NOT NULL,
46
+ UNIQUE(source_id, revision)
47
+ );
48
+
49
+ CREATE TABLE IF NOT EXISTS chunks (
50
+ id TEXT PRIMARY KEY,
51
+ source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
52
+ wiki_page_id TEXT,
53
+ kind TEXT NOT NULL,
54
+ ordinal INTEGER NOT NULL,
55
+ text TEXT NOT NULL,
56
+ token_count INTEGER,
57
+ start_offset INTEGER,
58
+ end_offset INTEGER,
59
+ metadata_json TEXT NOT NULL DEFAULT '{}',
60
+ created_at TEXT NOT NULL
61
+ );
62
+
63
+ CREATE TABLE IF NOT EXISTS chunk_embeddings (
64
+ id TEXT PRIMARY KEY,
65
+ chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
66
+ provider TEXT NOT NULL,
67
+ model TEXT NOT NULL,
68
+ dimensions INTEGER NOT NULL,
69
+ vector_json TEXT NOT NULL,
70
+ created_at TEXT NOT NULL,
71
+ UNIQUE(chunk_id, provider, model)
72
+ );
73
+
74
+ CREATE TABLE IF NOT EXISTS wiki_pages (
75
+ id TEXT PRIMARY KEY,
76
+ path TEXT NOT NULL UNIQUE,
77
+ title TEXT NOT NULL,
78
+ artifact_uri TEXT,
79
+ content_hash TEXT,
80
+ status TEXT NOT NULL DEFAULT 'active',
81
+ metadata_json TEXT NOT NULL DEFAULT '{}',
82
+ created_at TEXT NOT NULL,
83
+ updated_at TEXT NOT NULL
84
+ );
85
+
86
+ CREATE TABLE IF NOT EXISTS wiki_backlinks (
87
+ from_page_id TEXT NOT NULL REFERENCES wiki_pages(id) ON DELETE CASCADE,
88
+ to_page_id TEXT NOT NULL REFERENCES wiki_pages(id) ON DELETE CASCADE,
89
+ label TEXT,
90
+ created_at TEXT NOT NULL,
91
+ PRIMARY KEY(from_page_id, to_page_id)
92
+ );
93
+
94
+ CREATE TABLE IF NOT EXISTS citations (
95
+ id TEXT PRIMARY KEY,
96
+ wiki_page_id TEXT REFERENCES wiki_pages(id) ON DELETE CASCADE,
97
+ chunk_id TEXT REFERENCES chunks(id) ON DELETE SET NULL,
98
+ source_uri TEXT NOT NULL,
99
+ quote TEXT,
100
+ start_offset INTEGER,
101
+ end_offset INTEGER,
102
+ metadata_json TEXT NOT NULL DEFAULT '{}',
103
+ created_at TEXT NOT NULL
104
+ );
105
+
106
+ CREATE TABLE IF NOT EXISTS knowledge_indexes (
107
+ id TEXT PRIMARY KEY,
108
+ kind TEXT NOT NULL,
109
+ name TEXT NOT NULL,
110
+ artifact_uri TEXT,
111
+ shard_key TEXT,
112
+ metadata_json TEXT NOT NULL DEFAULT '{}',
113
+ created_at TEXT NOT NULL,
114
+ updated_at TEXT NOT NULL,
115
+ UNIQUE(kind, name, shard_key)
116
+ );
117
+
118
+ CREATE TABLE IF NOT EXISTS runs (
119
+ id TEXT PRIMARY KEY,
120
+ type TEXT NOT NULL,
121
+ prompt TEXT,
122
+ status TEXT NOT NULL,
123
+ provider TEXT,
124
+ model TEXT,
125
+ cost_tokens INTEGER NOT NULL DEFAULT 0,
126
+ cost_usd REAL NOT NULL DEFAULT 0,
127
+ metadata_json TEXT NOT NULL DEFAULT '{}',
128
+ created_at TEXT NOT NULL,
129
+ updated_at TEXT NOT NULL
130
+ );
131
+
132
+ CREATE TABLE IF NOT EXISTS run_events (
133
+ id TEXT PRIMARY KEY,
134
+ run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
135
+ level TEXT NOT NULL,
136
+ event TEXT NOT NULL,
137
+ metadata_json TEXT NOT NULL DEFAULT '{}',
138
+ created_at TEXT NOT NULL
139
+ );
140
+
141
+ CREATE TABLE IF NOT EXISTS provider_usage (
142
+ id TEXT PRIMARY KEY,
143
+ run_id TEXT REFERENCES runs(id) ON DELETE SET NULL,
144
+ provider TEXT NOT NULL,
145
+ model TEXT NOT NULL,
146
+ input_tokens INTEGER NOT NULL DEFAULT 0,
147
+ output_tokens INTEGER NOT NULL DEFAULT 0,
148
+ cost_usd REAL NOT NULL DEFAULT 0,
149
+ metadata_json TEXT NOT NULL DEFAULT '{}',
150
+ created_at TEXT NOT NULL
151
+ );
152
+
153
+ CREATE TABLE IF NOT EXISTS redaction_findings (
154
+ id TEXT PRIMARY KEY,
155
+ source_uri TEXT,
156
+ run_id TEXT REFERENCES runs(id) ON DELETE SET NULL,
157
+ severity TEXT NOT NULL,
158
+ finding_type TEXT NOT NULL,
159
+ metadata_json TEXT NOT NULL DEFAULT '{}',
160
+ created_at TEXT NOT NULL
161
+ );
162
+
163
+ CREATE TABLE IF NOT EXISTS storage_objects (
164
+ id TEXT PRIMARY KEY,
165
+ artifact_uri TEXT NOT NULL UNIQUE,
166
+ kind TEXT NOT NULL,
167
+ content_type TEXT,
168
+ hash TEXT,
169
+ size_bytes INTEGER,
170
+ metadata_json TEXT NOT NULL DEFAULT '{}',
171
+ created_at TEXT NOT NULL,
172
+ updated_at TEXT NOT NULL
173
+ );
174
+
175
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
176
+ text,
177
+ title,
178
+ source_uri,
179
+ content='',
180
+ tokenize='porter unicode61'
181
+ );
182
+
183
+ INSERT OR IGNORE INTO schema_versions(version, applied_at)
184
+ VALUES (1, datetime('now'));
185
+ `;
186
+
187
+ export function openKnowledgeDb(path: string): Database {
188
+ ensureParentDir(path);
189
+ const db = new Database(path);
190
+ db.exec('PRAGMA foreign_keys = ON;');
191
+ return db;
192
+ }
193
+
194
+ export function migrateKnowledgeDb(path: string): { path: string; schema_version: number } {
195
+ const db = openKnowledgeDb(path);
196
+ try {
197
+ db.exec(MIGRATION_1);
198
+ return { path, schema_version: getSchemaVersion(db) };
199
+ } finally {
200
+ db.close();
201
+ }
202
+ }
203
+
204
+ export function getSchemaVersion(db: Database): number {
205
+ const row = db.query<{ version: number }, []>('SELECT MAX(version) AS version FROM schema_versions').get();
206
+ return row?.version ?? 0;
207
+ }
208
+
209
+ function count(db: Database, table: string): number {
210
+ const row = db.query<{ n: number }, []>(`SELECT COUNT(*) AS n FROM ${table}`).get();
211
+ return row?.n ?? 0;
212
+ }
213
+
214
+ export function getKnowledgeDbStats(path: string): KnowledgeDbStats {
215
+ const db = openKnowledgeDb(path);
216
+ try {
217
+ return {
218
+ schema_version: getSchemaVersion(db),
219
+ sources: count(db, 'sources'),
220
+ source_revisions: count(db, 'source_revisions'),
221
+ chunks: count(db, 'chunks'),
222
+ wiki_pages: count(db, 'wiki_pages'),
223
+ citations: count(db, 'citations'),
224
+ indexes: count(db, 'knowledge_indexes'),
225
+ runs: count(db, 'runs'),
226
+ run_events: count(db, 'run_events'),
227
+ };
228
+ } finally {
229
+ db.close();
230
+ }
231
+ }