akm-cli 0.0.0 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/db.js ADDED
@@ -0,0 +1,371 @@
1
+ import { Database } from "bun:sqlite";
2
+ import fs from "node:fs";
3
+ import { createRequire } from "node:module";
4
+ import path from "node:path";
5
+ import { cosineSimilarity } from "./embedder";
6
+ import { getDbPath } from "./paths";
7
+ import { warn } from "./warn";
8
+ // ── Constants ───────────────────────────────────────────────────────────────
9
+ export const DB_VERSION = 6;
10
+ export const EMBEDDING_DIM = 384;
11
+ // ── Database lifecycle ──────────────────────────────────────────────────────
12
+ export function openDatabase(dbPath, options) {
13
+ const resolvedPath = dbPath ?? getDbPath();
14
+ const dir = path.dirname(resolvedPath);
15
+ if (!fs.existsSync(dir)) {
16
+ fs.mkdirSync(dir, { recursive: true });
17
+ }
18
+ const db = new Database(resolvedPath);
19
+ db.exec("PRAGMA journal_mode = WAL");
20
+ db.exec("PRAGMA foreign_keys = ON");
21
+ // Try to load sqlite-vec extension
22
+ loadVecExtension(db);
23
+ ensureSchema(db, options?.embeddingDim ?? EMBEDDING_DIM);
24
+ // Warn once at init if using JS fallback with many entries
25
+ warnIfVecMissing(db, { once: true });
26
+ return db;
27
+ }
28
+ export function closeDatabase(db) {
29
+ db.close();
30
+ }
31
+ // ── sqlite-vec extension ────────────────────────────────────────────────────
32
+ const vecStatus = new WeakMap();
33
+ function loadVecExtension(db) {
34
+ try {
35
+ const esmRequire = createRequire(import.meta.url);
36
+ const sqliteVec = esmRequire("sqlite-vec");
37
+ sqliteVec.load(db);
38
+ vecStatus.set(db, true);
39
+ }
40
+ catch {
41
+ vecStatus.set(db, false);
42
+ }
43
+ }
44
+ export function isVecAvailable(db) {
45
+ return vecStatus.get(db) ?? false;
46
+ }
47
+ const VEC_DOCS_URL = "https://github.com/itlackey/agentikit/blob/main/docs/configuration.md#sqlite-vec-extension";
48
+ const VEC_FALLBACK_THRESHOLD = 10_000;
49
+ let vecInitWarned = false;
50
+ /**
51
+ * Warn if sqlite-vec is unavailable and embedding count exceeds threshold.
52
+ * Called from openDatabase (once at init) and from indexer (each run).
53
+ */
54
+ export function warnIfVecMissing(db, { once } = { once: false }) {
55
+ if (isVecAvailable(db))
56
+ return;
57
+ if (once && vecInitWarned)
58
+ return;
59
+ try {
60
+ const row = db.prepare("SELECT COUNT(*) AS cnt FROM embeddings").get();
61
+ const count = row?.cnt ?? 0;
62
+ if (count >= VEC_FALLBACK_THRESHOLD) {
63
+ warn("Semantic search is using JS fallback for %d entries. Install sqlite-vec for faster performance.\n See: %s", count, VEC_DOCS_URL);
64
+ if (once)
65
+ vecInitWarned = true;
66
+ }
67
+ }
68
+ catch {
69
+ /* embeddings table may not exist yet during init */
70
+ }
71
+ }
72
+ // ── Schema ──────────────────────────────────────────────────────────────────
73
+ function ensureSchema(db, embeddingDim) {
74
+ // Create meta table first so we can check version
75
+ db.exec(`
76
+ CREATE TABLE IF NOT EXISTS index_meta (
77
+ key TEXT PRIMARY KEY,
78
+ value TEXT NOT NULL
79
+ );
80
+ `);
81
+ // Check stored version — if it differs from DB_VERSION, drop and recreate all tables
82
+ const storedVersion = getMeta(db, "version");
83
+ if (storedVersion && storedVersion !== String(DB_VERSION)) {
84
+ db.exec("DROP TABLE IF EXISTS embeddings");
85
+ db.exec("DROP TABLE IF EXISTS entries_vec");
86
+ db.exec("DROP TABLE IF EXISTS entries_fts");
87
+ db.exec("DROP INDEX IF EXISTS idx_entries_dir");
88
+ db.exec("DROP INDEX IF EXISTS idx_entries_type");
89
+ db.exec("DROP TABLE IF EXISTS entries");
90
+ db.exec("DELETE FROM index_meta");
91
+ }
92
+ db.exec(`
93
+ CREATE TABLE IF NOT EXISTS entries (
94
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
95
+ entry_key TEXT NOT NULL UNIQUE,
96
+ dir_path TEXT NOT NULL,
97
+ file_path TEXT NOT NULL,
98
+ stash_dir TEXT NOT NULL,
99
+ entry_json TEXT NOT NULL,
100
+ search_text TEXT NOT NULL,
101
+ entry_type TEXT NOT NULL
102
+ );
103
+
104
+ CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
105
+ CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
106
+ `);
107
+ // BLOB-based embedding storage (always available, no sqlite-vec needed)
108
+ db.exec(`
109
+ CREATE TABLE IF NOT EXISTS embeddings (
110
+ id INTEGER PRIMARY KEY,
111
+ embedding BLOB NOT NULL,
112
+ FOREIGN KEY (id) REFERENCES entries(id)
113
+ );
114
+ `);
115
+ // FTS5 table — standalone with explicit entry_id for joining
116
+ const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
117
+ if (!ftsExists) {
118
+ db.exec(`
119
+ CREATE VIRTUAL TABLE entries_fts USING fts5(
120
+ entry_id UNINDEXED,
121
+ search_text,
122
+ tokenize='porter unicode61'
123
+ );
124
+ `);
125
+ }
126
+ // sqlite-vec table
127
+ if (isVecAvailable(db)) {
128
+ // Check if stored embedding dimension differs from configured one
129
+ const storedDim = getMeta(db, "embeddingDim");
130
+ if (storedDim && storedDim !== String(embeddingDim)) {
131
+ try {
132
+ db.exec("DROP TABLE IF EXISTS entries_vec");
133
+ }
134
+ catch {
135
+ /* ignore */
136
+ }
137
+ }
138
+ const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
139
+ if (!vecExists) {
140
+ db.exec(`
141
+ CREATE VIRTUAL TABLE entries_vec USING vec0(
142
+ id INTEGER PRIMARY KEY,
143
+ embedding FLOAT[${embeddingDim}]
144
+ );
145
+ `);
146
+ }
147
+ setMeta(db, "embeddingDim", String(embeddingDim));
148
+ }
149
+ // Set version if not present
150
+ const version = getMeta(db, "version");
151
+ if (!version) {
152
+ setMeta(db, "version", String(DB_VERSION));
153
+ }
154
+ }
155
+ // ── Meta helpers ────────────────────────────────────────────────────────────
156
+ export function getMeta(db, key) {
157
+ const row = db.prepare("SELECT value FROM index_meta WHERE key = ?").get(key);
158
+ return row?.value;
159
+ }
160
+ export function setMeta(db, key, value) {
161
+ db.prepare("INSERT OR REPLACE INTO index_meta (key, value) VALUES (?, ?)").run(key, value);
162
+ }
163
+ // ── Entry operations ────────────────────────────────────────────────────────
164
+ export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, searchText) {
165
+ const stmt = db.prepare(`
166
+ INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
167
+ VALUES (?, ?, ?, ?, ?, ?, ?)
168
+ ON CONFLICT(entry_key) DO UPDATE SET
169
+ dir_path = excluded.dir_path,
170
+ file_path = excluded.file_path,
171
+ stash_dir = excluded.stash_dir,
172
+ entry_json = excluded.entry_json,
173
+ search_text = excluded.search_text,
174
+ entry_type = excluded.entry_type
175
+ `);
176
+ stmt.run(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type);
177
+ // Fetch the row id explicitly since last_insert_rowid() is unreliable for ON CONFLICT DO UPDATE
178
+ const row = db.prepare("SELECT id FROM entries WHERE entry_key = ?").get(entryKey);
179
+ return row.id;
180
+ }
181
+ export function deleteEntriesByDir(db, dirPath) {
182
+ const ids = db.prepare("SELECT id FROM entries WHERE dir_path = ?").all(dirPath);
183
+ for (const { id } of ids) {
184
+ try {
185
+ db.prepare("DELETE FROM embeddings WHERE id = ?").run(id);
186
+ }
187
+ catch {
188
+ /* ignore */
189
+ }
190
+ if (isVecAvailable(db)) {
191
+ try {
192
+ db.prepare("DELETE FROM entries_vec WHERE id = ?").run(id);
193
+ }
194
+ catch {
195
+ /* ignore */
196
+ }
197
+ }
198
+ }
199
+ db.prepare("DELETE FROM entries WHERE dir_path = ?").run(dirPath);
200
+ }
201
+ export function rebuildFts(db) {
202
+ db.exec("DELETE FROM entries_fts");
203
+ db.exec("INSERT INTO entries_fts (entry_id, search_text) SELECT CAST(id AS TEXT), search_text FROM entries");
204
+ }
205
+ // ── Vector operations ───────────────────────────────────────────────────────
206
+ export function upsertEmbedding(db, entryId, embedding) {
207
+ const buf = float32Buffer(embedding);
208
+ // Always write to BLOB table (works without sqlite-vec)
209
+ db.prepare("INSERT OR REPLACE INTO embeddings (id, embedding) VALUES (?, ?)").run(entryId, buf);
210
+ // Also write to sqlite-vec table when available (fast path)
211
+ if (isVecAvailable(db)) {
212
+ try {
213
+ db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
214
+ }
215
+ catch {
216
+ /* ignore */
217
+ }
218
+ db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
219
+ }
220
+ }
221
+ export function searchVec(db, queryEmbedding, k) {
222
+ // Fast path: use sqlite-vec when available
223
+ if (isVecAvailable(db)) {
224
+ const buf = float32Buffer(queryEmbedding);
225
+ try {
226
+ return db
227
+ .prepare("SELECT id, distance FROM entries_vec WHERE embedding MATCH ? AND k = ?")
228
+ .all(buf, k);
229
+ }
230
+ catch {
231
+ return [];
232
+ }
233
+ }
234
+ // Fallback: JS-based cosine similarity over BLOB table
235
+ return searchBlobVec(db, queryEmbedding, k);
236
+ }
237
+ function float32Buffer(vec) {
238
+ const f32 = new Float32Array(vec);
239
+ return Buffer.from(f32.buffer);
240
+ }
241
+ function bufferToFloat32(buf) {
242
+ const f32 = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
243
+ return Array.from(f32);
244
+ }
245
+ function searchBlobVec(db, queryEmbedding, k) {
246
+ try {
247
+ const rows = db.prepare("SELECT id, embedding FROM embeddings").all();
248
+ if (rows.length === 0)
249
+ return [];
250
+ const scored = [];
251
+ for (const row of rows) {
252
+ const embedding = bufferToFloat32(row.embedding);
253
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
254
+ scored.push({ id: row.id, similarity });
255
+ }
256
+ scored.sort((a, b) => b.similarity - a.similarity);
257
+ // Convert cosine similarity to L2 distance for compatibility with sqlite-vec interface
258
+ // For normalized vectors: L2² = 2(1 - cos_sim)
259
+ return scored.slice(0, k).map(({ id, similarity }) => ({
260
+ id,
261
+ distance: Math.sqrt(2 * Math.max(0, 1 - similarity)),
262
+ }));
263
+ }
264
+ catch {
265
+ return [];
266
+ }
267
+ }
268
+ // ── FTS5 search ─────────────────────────────────────────────────────────────
269
+ export function searchFts(db, query, limit, entryType) {
270
+ const ftsQuery = sanitizeFtsQuery(query);
271
+ if (!ftsQuery)
272
+ return [];
273
+ let sql;
274
+ let params;
275
+ if (entryType && entryType !== "any") {
276
+ sql = `
277
+ SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
278
+ bm25(entries_fts) AS bm25Score
279
+ FROM entries_fts f
280
+ JOIN entries e ON e.id = CAST(f.entry_id AS INTEGER)
281
+ WHERE entries_fts MATCH ?
282
+ AND e.entry_type = ?
283
+ ORDER BY bm25Score
284
+ LIMIT ?
285
+ `;
286
+ params = [ftsQuery, entryType, limit];
287
+ }
288
+ else {
289
+ sql = `
290
+ SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
291
+ bm25(entries_fts) AS bm25Score
292
+ FROM entries_fts f
293
+ JOIN entries e ON e.id = CAST(f.entry_id AS INTEGER)
294
+ WHERE entries_fts MATCH ?
295
+ ORDER BY bm25Score
296
+ LIMIT ?
297
+ `;
298
+ params = [ftsQuery, limit];
299
+ }
300
+ try {
301
+ const rows = db.prepare(sql).all(...params);
302
+ return rows.map((row) => ({
303
+ id: row.id,
304
+ filePath: row.filePath,
305
+ entry: JSON.parse(row.entry_json),
306
+ searchText: row.searchText,
307
+ bm25Score: row.bm25Score,
308
+ }));
309
+ }
310
+ catch {
311
+ return [];
312
+ }
313
+ }
314
+ function sanitizeFtsQuery(query) {
315
+ const tokens = query
316
+ .replace(/[^a-zA-Z0-9\s]/g, " ")
317
+ .split(/\s+/)
318
+ .filter((t) => t.length >= 1);
319
+ if (tokens.length === 0)
320
+ return "";
321
+ // Use unquoted tokens so the porter stemmer can normalize word forms
322
+ return tokens.join(" ");
323
+ }
324
+ // ── All entries ─────────────────────────────────────────────────────────────
325
+ export function getAllEntries(db, entryType) {
326
+ let sql;
327
+ let params;
328
+ if (entryType && entryType !== "any") {
329
+ sql =
330
+ "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
331
+ params = [entryType];
332
+ }
333
+ else {
334
+ sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
335
+ params = [];
336
+ }
337
+ const rows = db.prepare(sql).all(...params);
338
+ return rows.map((row) => ({
339
+ id: row.id,
340
+ entryKey: row.entry_key,
341
+ dirPath: row.dir_path,
342
+ filePath: row.file_path,
343
+ stashDir: row.stash_dir,
344
+ entry: JSON.parse(row.entry_json),
345
+ searchText: row.search_text,
346
+ }));
347
+ }
348
+ export function getEntryCount(db) {
349
+ const row = db.prepare("SELECT COUNT(*) AS cnt FROM entries").get();
350
+ return row.cnt;
351
+ }
352
+ export function getEntryById(db, id) {
353
+ const row = db.prepare("SELECT file_path, entry_json FROM entries WHERE id = ?").get(id);
354
+ if (!row)
355
+ return undefined;
356
+ return { filePath: row.file_path, entry: JSON.parse(row.entry_json) };
357
+ }
358
+ export function getEntriesByDir(db, dirPath) {
359
+ const rows = db
360
+ .prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
361
+ .all(dirPath);
362
+ return rows.map((row) => ({
363
+ id: row.id,
364
+ entryKey: row.entry_key,
365
+ dirPath: row.dir_path,
366
+ filePath: row.file_path,
367
+ stashDir: row.stash_dir,
368
+ entry: JSON.parse(row.entry_json),
369
+ searchText: row.search_text,
370
+ }));
371
+ }
@@ -0,0 +1,150 @@
1
+ import { fetchWithTimeout } from "./common";
2
+ let localEmbedder;
3
+ async function getLocalEmbedder() {
4
+ if (!localEmbedder) {
5
+ let pipeline;
6
+ try {
7
+ const mod = await import("@xenova/transformers");
8
+ pipeline = mod.pipeline;
9
+ }
10
+ catch {
11
+ throw new Error("Semantic search requires @xenova/transformers. Install it with: npm install @xenova/transformers");
12
+ }
13
+ const pipelineFn = pipeline;
14
+ localEmbedder = await pipelineFn("feature-extraction", "Xenova/all-MiniLM-L6-v2");
15
+ }
16
+ if (!localEmbedder) {
17
+ throw new Error("Failed to initialize local embedder.");
18
+ }
19
+ return localEmbedder;
20
+ }
21
+ async function embedLocal(text) {
22
+ const model = await getLocalEmbedder();
23
+ const result = await model(text, { pooling: "mean", normalize: true });
24
+ return Array.from(result.data);
25
+ }
26
+ // ── OpenAI-compatible remote embedder ───────────────────────────────────────
27
+ async function embedRemote(text, config) {
28
+ const headers = { "Content-Type": "application/json" };
29
+ if (config.apiKey) {
30
+ headers.Authorization = `Bearer ${config.apiKey}`;
31
+ }
32
+ const body = {
33
+ input: text,
34
+ model: config.model,
35
+ };
36
+ if (config.dimension) {
37
+ body.dimensions = config.dimension;
38
+ }
39
+ const response = await fetchWithTimeout(config.endpoint, {
40
+ method: "POST",
41
+ headers,
42
+ body: JSON.stringify(body),
43
+ });
44
+ if (!response.ok) {
45
+ const body = await response.text().catch(() => "");
46
+ throw new Error(`Embedding request failed (${response.status}): ${body}`);
47
+ }
48
+ const json = (await response.json());
49
+ if (!json.data?.[0]?.embedding) {
50
+ throw new Error("Unexpected embedding response format: missing data[0].embedding");
51
+ }
52
+ return json.data[0].embedding;
53
+ }
54
+ // ── Public API ──────────────────────────────────────────────────────────────
55
+ /**
56
+ * Generate an embedding for the given text.
57
+ * If embeddingConfig is provided, uses the configured OpenAI-compatible endpoint.
58
+ * Otherwise falls back to local @xenova/transformers.
59
+ */
60
+ export async function embed(text, embeddingConfig) {
61
+ if (embeddingConfig) {
62
+ return embedRemote(text, embeddingConfig);
63
+ }
64
+ return embedLocal(text);
65
+ }
66
+ // ── Batch embedding ─────────────────────────────────────────────────────────
67
+ /**
68
+ * Generate embeddings for multiple texts in batch.
69
+ * Uses the OpenAI-compatible batch API for remote endpoints (batches of 100).
70
+ * Falls back to sequential embedding for local transformer pipeline.
71
+ */
72
+ export async function embedBatch(texts, embeddingConfig) {
73
+ if (texts.length === 0)
74
+ return [];
75
+ if (embeddingConfig) {
76
+ return embedRemoteBatch(texts, embeddingConfig);
77
+ }
78
+ // Local transformer: process sequentially (pipeline handles one at a time)
79
+ const results = [];
80
+ for (const text of texts) {
81
+ results.push(await embedLocal(text));
82
+ }
83
+ return results;
84
+ }
85
+ async function embedRemoteBatch(texts, config) {
86
+ const BATCH_SIZE = 100;
87
+ const results = [];
88
+ const headers = { "Content-Type": "application/json" };
89
+ if (config.apiKey) {
90
+ headers.Authorization = `Bearer ${config.apiKey}`;
91
+ }
92
+ for (let i = 0; i < texts.length; i += BATCH_SIZE) {
93
+ const batch = texts.slice(i, i + BATCH_SIZE);
94
+ const body = {
95
+ input: batch,
96
+ model: config.model,
97
+ };
98
+ if (config.dimension) {
99
+ body.dimensions = config.dimension;
100
+ }
101
+ const response = await fetchWithTimeout(config.endpoint, {
102
+ method: "POST",
103
+ headers,
104
+ body: JSON.stringify(body),
105
+ });
106
+ if (!response.ok) {
107
+ const respBody = await response.text().catch(() => "");
108
+ throw new Error(`Embedding batch request failed (${response.status}): ${respBody}`);
109
+ }
110
+ const json = (await response.json());
111
+ if (!json.data || json.data.length !== batch.length) {
112
+ throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}`);
113
+ }
114
+ results.push(...json.data.map((d) => d.embedding));
115
+ }
116
+ return results;
117
+ }
118
+ // ── Similarity ──────────────────────────────────────────────────────────────
119
+ export function cosineSimilarity(a, b) {
120
+ const len = Math.min(a.length, b.length);
121
+ if (len === 0)
122
+ return 0;
123
+ let dot = 0, magA = 0, magB = 0;
124
+ for (let i = 0; i < len; i++) {
125
+ dot += a[i] * b[i];
126
+ magA += a[i] * a[i];
127
+ magB += b[i] * b[i];
128
+ }
129
+ const denom = Math.sqrt(magA) * Math.sqrt(magB);
130
+ return denom === 0 ? 0 : dot / denom;
131
+ }
132
+ // ── Availability check ──────────────────────────────────────────────────────
133
+ export async function isEmbeddingAvailable(embeddingConfig) {
134
+ if (embeddingConfig) {
135
+ try {
136
+ await embedRemote("test", embeddingConfig);
137
+ return true;
138
+ }
139
+ catch {
140
+ return false;
141
+ }
142
+ }
143
+ try {
144
+ await getLocalEmbedder();
145
+ return true;
146
+ }
147
+ catch {
148
+ return false;
149
+ }
150
+ }
package/dist/errors.js ADDED
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Typed error classes for structured exit code classification.
3
+ *
4
+ * - ConfigError -> exit 78 (configuration / environment problems)
5
+ * - UsageError -> exit 2 (bad CLI arguments or invalid input)
6
+ * - NotFoundError -> exit 1 (requested resource missing)
7
+ */
8
+ /** Raised when configuration or environment is invalid or missing. */
9
+ export class ConfigError extends Error {
10
+ constructor(msg) {
11
+ super(msg);
12
+ this.name = "ConfigError";
13
+ }
14
+ }
15
+ /** Raised when the user supplies invalid arguments or input. */
16
+ export class UsageError extends Error {
17
+ constructor(msg) {
18
+ super(msg);
19
+ this.name = "UsageError";
20
+ }
21
+ }
22
+ /** Raised when a requested resource (asset, entry, file) is not found. */
23
+ export class NotFoundError extends Error {
24
+ constructor(msg) {
25
+ super(msg);
26
+ this.name = "NotFoundError";
27
+ }
28
+ }