coding-friend-cli 1.16.0 → 1.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/dist/{chunk-D4EWPGBL.js → chunk-C5LYVVEI.js} +1 -1
- package/dist/{chunk-X5WEODUD.js → chunk-CYQU33FY.js} +1 -0
- package/dist/{chunk-QNLL3ZDF.js → chunk-G6CEEMAR.js} +3 -3
- package/dist/{chunk-4DB4XTSL.js → chunk-KTX4MGMR.js} +15 -1
- package/dist/{chunk-KJUGTLPQ.js → chunk-YO6JKGR3.js} +38 -2
- package/dist/{config-AIZJJ5D2.js → config-LZFXXOI4.js} +276 -14
- package/dist/{dev-WJ5QQ35B.js → dev-R3IYWZ3M.js} +2 -2
- package/dist/{disable-JDVOQNZG.js → disable-R6K5YJN4.js} +2 -2
- package/dist/{enable-JBJ4Q2S7.js → enable-HF4PYVJN.js} +2 -2
- package/dist/{host-NA7LZ4HX.js → host-SYZH3FVC.js} +4 -4
- package/dist/index.js +78 -18
- package/dist/{init-FZ3GG53E.js → init-MF7ISADJ.js} +102 -6
- package/dist/{install-I3GOS56Q.js → install-Q4PWEU43.js} +4 -4
- package/dist/{mcp-DLS3J6QJ.js → mcp-TBEDYELW.js} +4 -4
- package/dist/memory-RGLM35HC.js +647 -0
- package/dist/postinstall.js +1 -1
- package/dist/{session-E3CZJJZQ.js → session-H4XW2WXH.js} +1 -1
- package/dist/{statusline-6HQCDWBD.js → statusline-6Y2EBAFQ.js} +1 -1
- package/dist/{uninstall-JN5YIKKM.js → uninstall-3PSUDGI4.js} +3 -3
- package/dist/{update-OWS4IJTG.js → update-WL6SFGGO.js} +4 -4
- package/lib/cf-memory/CHANGELOG.md +25 -0
- package/lib/cf-memory/README.md +284 -0
- package/lib/cf-memory/package-lock.json +2790 -0
- package/lib/cf-memory/package.json +31 -0
- package/lib/cf-memory/scripts/migrate-frontmatter.ts +134 -0
- package/lib/cf-memory/src/__tests__/daemon-e2e.test.ts +223 -0
- package/lib/cf-memory/src/__tests__/daemon.test.ts +407 -0
- package/lib/cf-memory/src/__tests__/dedup.test.ts +103 -0
- package/lib/cf-memory/src/__tests__/embeddings.test.ts +292 -0
- package/lib/cf-memory/src/__tests__/lazy-install.test.ts +210 -0
- package/lib/cf-memory/src/__tests__/markdown-backend.test.ts +410 -0
- package/lib/cf-memory/src/__tests__/migration.test.ts +255 -0
- package/lib/cf-memory/src/__tests__/migrations.test.ts +288 -0
- package/lib/cf-memory/src/__tests__/minisearch-backend.test.ts +262 -0
- package/lib/cf-memory/src/__tests__/ollama.test.ts +48 -0
- package/lib/cf-memory/src/__tests__/schema.test.ts +128 -0
- package/lib/cf-memory/src/__tests__/search.test.ts +115 -0
- package/lib/cf-memory/src/__tests__/temporal-decay.test.ts +54 -0
- package/lib/cf-memory/src/__tests__/tier.test.ts +293 -0
- package/lib/cf-memory/src/__tests__/tools.test.ts +83 -0
- package/lib/cf-memory/src/backends/markdown.ts +318 -0
- package/lib/cf-memory/src/backends/minisearch.ts +203 -0
- package/lib/cf-memory/src/backends/sqlite/embeddings.ts +286 -0
- package/lib/cf-memory/src/backends/sqlite/index.ts +549 -0
- package/lib/cf-memory/src/backends/sqlite/migrations.ts +188 -0
- package/lib/cf-memory/src/backends/sqlite/schema.ts +120 -0
- package/lib/cf-memory/src/backends/sqlite/search.ts +296 -0
- package/lib/cf-memory/src/bin/cf-memory.ts +2 -0
- package/lib/cf-memory/src/daemon/entry.ts +99 -0
- package/lib/cf-memory/src/daemon/process.ts +271 -0
- package/lib/cf-memory/src/daemon/server.ts +166 -0
- package/lib/cf-memory/src/daemon/watcher.ts +90 -0
- package/lib/cf-memory/src/index.ts +53 -0
- package/lib/cf-memory/src/lib/backend.ts +23 -0
- package/lib/cf-memory/src/lib/daemon-client.ts +163 -0
- package/lib/cf-memory/src/lib/dedup.ts +80 -0
- package/lib/cf-memory/src/lib/lazy-install.ts +274 -0
- package/lib/cf-memory/src/lib/ollama.ts +76 -0
- package/lib/cf-memory/src/lib/temporal-decay.ts +19 -0
- package/lib/cf-memory/src/lib/tier.ts +107 -0
- package/lib/cf-memory/src/lib/types.ts +109 -0
- package/lib/cf-memory/src/resources/index.ts +62 -0
- package/lib/cf-memory/src/server.ts +20 -0
- package/lib/cf-memory/src/tools/delete.ts +38 -0
- package/lib/cf-memory/src/tools/list.ts +38 -0
- package/lib/cf-memory/src/tools/retrieve.ts +52 -0
- package/lib/cf-memory/src/tools/search.ts +47 -0
- package/lib/cf-memory/src/tools/store.ts +70 -0
- package/lib/cf-memory/src/tools/update.ts +62 -0
- package/lib/cf-memory/tsconfig.json +15 -0
- package/lib/cf-memory/vitest.config.ts +7 -0
- package/package.json +1 -1
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite schema migrations.
|
|
3
|
+
*
|
|
4
|
+
* Each migration is a function that takes a Database instance and applies
|
|
5
|
+
* the necessary schema changes. Migrations are idempotent.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
SCHEMA_V1,
|
|
10
|
+
SCHEMA_VERSION,
|
|
11
|
+
SCHEMA_V2_METADATA,
|
|
12
|
+
PRAGMA_SETTINGS,
|
|
13
|
+
getVecTableSQL,
|
|
14
|
+
} from "./schema.js";
|
|
15
|
+
|
|
16
|
+
export type DatabaseLike = {
|
|
17
|
+
// better-sqlite3's exec method — runs SQL, NOT child_process.exec
|
|
18
|
+
exec(sql: string): void;
|
|
19
|
+
pragma(pragma: string): unknown;
|
|
20
|
+
prepare(sql: string): {
|
|
21
|
+
get(...params: unknown[]): unknown;
|
|
22
|
+
run(...params: unknown[]): unknown;
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Get the current schema version from the database.
|
|
28
|
+
* Returns 0 if the schema hasn't been initialized yet.
|
|
29
|
+
*/
|
|
30
|
+
export function getSchemaVersion(db: DatabaseLike): number {
|
|
31
|
+
try {
|
|
32
|
+
const row = db
|
|
33
|
+
.prepare("SELECT version FROM schema_version LIMIT 1")
|
|
34
|
+
.get() as { version: number } | undefined;
|
|
35
|
+
return row?.version ?? 0;
|
|
36
|
+
} catch {
|
|
37
|
+
// Table doesn't exist yet
|
|
38
|
+
return 0;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Apply PRAGMA settings for performance.
|
|
44
|
+
*/
|
|
45
|
+
export function applyPragmas(db: DatabaseLike): void {
|
|
46
|
+
for (const pragma of PRAGMA_SETTINGS) {
|
|
47
|
+
db.exec(pragma);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Initialize the schema from scratch (version 0 → 1).
|
|
53
|
+
*/
|
|
54
|
+
function migrateV0ToV1(db: DatabaseLike): void {
|
|
55
|
+
for (const sql of SCHEMA_V1) {
|
|
56
|
+
db.exec(sql);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Migrate from schema v1 to v2: add metadata table.
|
|
62
|
+
*/
|
|
63
|
+
function migrateV1ToV2(db: DatabaseLike): void {
|
|
64
|
+
// better-sqlite3 Database.exec — runs SQL, not shell commands
|
|
65
|
+
db.exec(SCHEMA_V2_METADATA);
|
|
66
|
+
// Insert default embedding metadata (must match DEFAULT_TRANSFORMERS_MODEL)
|
|
67
|
+
db.prepare("INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)").run(
|
|
68
|
+
"embedding_model",
|
|
69
|
+
"Xenova/all-MiniLM-L6-v2",
|
|
70
|
+
);
|
|
71
|
+
db.prepare("INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)").run(
|
|
72
|
+
"embedding_dims",
|
|
73
|
+
"384",
|
|
74
|
+
);
|
|
75
|
+
// Update schema version
|
|
76
|
+
db.prepare("UPDATE schema_version SET version = ?").run(2);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Get a metadata value by key. Returns null if not found.
|
|
81
|
+
*/
|
|
82
|
+
export function getMetadata(db: DatabaseLike, key: string): string | null {
|
|
83
|
+
const row = db
|
|
84
|
+
.prepare("SELECT value FROM metadata WHERE key = ?")
|
|
85
|
+
.get(key) as { value: string } | undefined;
|
|
86
|
+
return row?.value ?? null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Set a metadata key-value pair (insert or update).
|
|
91
|
+
*/
|
|
92
|
+
export function setMetadata(
|
|
93
|
+
db: DatabaseLike,
|
|
94
|
+
key: string,
|
|
95
|
+
value: string,
|
|
96
|
+
): void {
|
|
97
|
+
db.prepare("INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)").run(
|
|
98
|
+
key,
|
|
99
|
+
value,
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Check if the current embedding model/dims differ from what is stored.
|
|
105
|
+
* Returns mismatched=false for fresh databases (no metadata yet).
|
|
106
|
+
*/
|
|
107
|
+
export function checkEmbeddingMismatch(
|
|
108
|
+
db: DatabaseLike,
|
|
109
|
+
currentModel: string,
|
|
110
|
+
currentDims: number,
|
|
111
|
+
): {
|
|
112
|
+
mismatched: boolean;
|
|
113
|
+
storedModel: string | null;
|
|
114
|
+
storedDims: number | null;
|
|
115
|
+
currentModel: string;
|
|
116
|
+
currentDims: number;
|
|
117
|
+
} {
|
|
118
|
+
const storedModel = getMetadata(db, "embedding_model");
|
|
119
|
+
const storedDimsStr = getMetadata(db, "embedding_dims");
|
|
120
|
+
const storedDims =
|
|
121
|
+
storedDimsStr !== null ? parseInt(storedDimsStr, 10) : null;
|
|
122
|
+
|
|
123
|
+
// Fresh database or partial metadata -- not a mismatch
|
|
124
|
+
if (storedModel === null || storedDims === null) {
|
|
125
|
+
return {
|
|
126
|
+
mismatched: false,
|
|
127
|
+
storedModel,
|
|
128
|
+
storedDims,
|
|
129
|
+
currentModel,
|
|
130
|
+
currentDims,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const mismatched = storedModel !== currentModel || storedDims !== currentDims;
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
mismatched,
|
|
138
|
+
storedModel,
|
|
139
|
+
storedDims,
|
|
140
|
+
currentModel,
|
|
141
|
+
currentDims,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Try to create the sqlite-vec virtual table.
|
|
147
|
+
* Returns true if successful, false if extension not loaded.
|
|
148
|
+
*/
|
|
149
|
+
export function createVecTable(db: DatabaseLike, dims?: number): boolean {
|
|
150
|
+
try {
|
|
151
|
+
// better-sqlite3 Database.exec -- runs SQL, not shell commands
|
|
152
|
+
db.exec(getVecTableSQL(dims));
|
|
153
|
+
return true;
|
|
154
|
+
} catch {
|
|
155
|
+
// sqlite-vec extension not loaded -- vector search disabled
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Run all necessary migrations to bring the database to the current version.
|
|
162
|
+
*
|
|
163
|
+
* This is idempotent: running it multiple times on the same database
|
|
164
|
+
* produces the same result.
|
|
165
|
+
*/
|
|
166
|
+
export function migrate(db: DatabaseLike): {
|
|
167
|
+
version: number;
|
|
168
|
+
migrated: boolean;
|
|
169
|
+
} {
|
|
170
|
+
applyPragmas(db);
|
|
171
|
+
|
|
172
|
+
const currentVersion = getSchemaVersion(db);
|
|
173
|
+
|
|
174
|
+
if (currentVersion >= SCHEMA_VERSION) {
|
|
175
|
+
return { version: currentVersion, migrated: false };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Apply migrations in order
|
|
179
|
+
if (currentVersion < 1) {
|
|
180
|
+
migrateV0ToV1(db);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (currentVersion < 2) {
|
|
184
|
+
migrateV1ToV2(db);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return { version: SCHEMA_VERSION, migrated: true };
|
|
188
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite schema definitions for Tier 1 memory backend.
|
|
3
|
+
*
|
|
4
|
+
* Tables:
|
|
5
|
+
* - memories: main table storing memory metadata + content
|
|
6
|
+
* - memories_fts: FTS5 virtual table for full-text search
|
|
7
|
+
* - vec_memories: sqlite-vec virtual table for vector similarity
|
|
8
|
+
* - embedding_cache: content hash → embedding mapping
|
|
9
|
+
* - schema_version: single-row version tracker
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/** Current schema version — bump when adding migrations */
|
|
13
|
+
export const SCHEMA_VERSION = 2;
|
|
14
|
+
|
|
15
|
+
/** Embedding dimensions for all-MiniLM-L6-v2 */
|
|
16
|
+
export const EMBEDDING_DIMS = 384;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* SQL statements to create the initial schema (version 1).
|
|
20
|
+
*/
|
|
21
|
+
export const SCHEMA_V1: string[] = [
|
|
22
|
+
// Version tracking
|
|
23
|
+
`CREATE TABLE IF NOT EXISTS schema_version (
|
|
24
|
+
version INTEGER NOT NULL
|
|
25
|
+
)`,
|
|
26
|
+
|
|
27
|
+
// Main memories table
|
|
28
|
+
`CREATE TABLE IF NOT EXISTS memories (
|
|
29
|
+
id TEXT PRIMARY KEY,
|
|
30
|
+
slug TEXT NOT NULL,
|
|
31
|
+
category TEXT NOT NULL,
|
|
32
|
+
title TEXT NOT NULL,
|
|
33
|
+
description TEXT NOT NULL,
|
|
34
|
+
type TEXT NOT NULL,
|
|
35
|
+
tags TEXT NOT NULL DEFAULT '[]',
|
|
36
|
+
importance INTEGER NOT NULL DEFAULT 3,
|
|
37
|
+
created TEXT NOT NULL,
|
|
38
|
+
updated TEXT NOT NULL,
|
|
39
|
+
source TEXT NOT NULL DEFAULT 'conversation',
|
|
40
|
+
content TEXT NOT NULL DEFAULT '',
|
|
41
|
+
content_hash TEXT NOT NULL DEFAULT ''
|
|
42
|
+
)`,
|
|
43
|
+
|
|
44
|
+
// Indexes for common queries
|
|
45
|
+
`CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type)`,
|
|
46
|
+
`CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category)`,
|
|
47
|
+
`CREATE INDEX IF NOT EXISTS idx_memories_updated ON memories(updated DESC)`,
|
|
48
|
+
`CREATE INDEX IF NOT EXISTS idx_memories_content_hash ON memories(content_hash)`,
|
|
49
|
+
|
|
50
|
+
// FTS5 virtual table for full-text search (BM25 ranking)
|
|
51
|
+
`CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
|
52
|
+
title,
|
|
53
|
+
description,
|
|
54
|
+
tags,
|
|
55
|
+
content,
|
|
56
|
+
content=memories,
|
|
57
|
+
content_rowid=rowid,
|
|
58
|
+
tokenize='porter unicode61'
|
|
59
|
+
)`,
|
|
60
|
+
|
|
61
|
+
// FTS5 triggers to keep the index in sync
|
|
62
|
+
`CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN
|
|
63
|
+
INSERT INTO memories_fts(rowid, title, description, tags, content)
|
|
64
|
+
VALUES (new.rowid, new.title, new.description, new.tags, new.content);
|
|
65
|
+
END`,
|
|
66
|
+
|
|
67
|
+
`CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN
|
|
68
|
+
INSERT INTO memories_fts(memories_fts, rowid, title, description, tags, content)
|
|
69
|
+
VALUES ('delete', old.rowid, old.title, old.description, old.tags, old.content);
|
|
70
|
+
END`,
|
|
71
|
+
|
|
72
|
+
`CREATE TRIGGER IF NOT EXISTS memories_au AFTER UPDATE ON memories BEGIN
|
|
73
|
+
INSERT INTO memories_fts(memories_fts, rowid, title, description, tags, content)
|
|
74
|
+
VALUES ('delete', old.rowid, old.title, old.description, old.tags, old.content);
|
|
75
|
+
INSERT INTO memories_fts(rowid, title, description, tags, content)
|
|
76
|
+
VALUES (new.rowid, new.title, new.description, new.tags, new.content);
|
|
77
|
+
END`,
|
|
78
|
+
|
|
79
|
+
// Embedding cache: avoids re-embedding unchanged content
|
|
80
|
+
`CREATE TABLE IF NOT EXISTS embedding_cache (
|
|
81
|
+
content_hash TEXT PRIMARY KEY,
|
|
82
|
+
embedding BLOB NOT NULL,
|
|
83
|
+
model TEXT NOT NULL,
|
|
84
|
+
created TEXT NOT NULL
|
|
85
|
+
)`,
|
|
86
|
+
|
|
87
|
+
// Insert schema version
|
|
88
|
+
`INSERT INTO schema_version (version) VALUES (1)`,
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* SQL to create the metadata table (added in schema v2).
|
|
93
|
+
*/
|
|
94
|
+
export const SCHEMA_V2_METADATA = `CREATE TABLE IF NOT EXISTS metadata (
|
|
95
|
+
key TEXT PRIMARY KEY,
|
|
96
|
+
value TEXT NOT NULL
|
|
97
|
+
)`;
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* PRAGMA settings for optimal performance.
|
|
101
|
+
*/
|
|
102
|
+
export const PRAGMA_SETTINGS: string[] = [
|
|
103
|
+
"PRAGMA journal_mode = WAL",
|
|
104
|
+
"PRAGMA synchronous = NORMAL",
|
|
105
|
+
"PRAGMA cache_size = -64000", // 64MB cache
|
|
106
|
+
"PRAGMA foreign_keys = ON",
|
|
107
|
+
"PRAGMA temp_store = MEMORY",
|
|
108
|
+
"PRAGMA mmap_size = 268435456", // 256MB mmap
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* SQL to create the sqlite-vec virtual table.
|
|
113
|
+
* This is separate because sqlite-vec extension must be loaded first.
|
|
114
|
+
*/
|
|
115
|
+
export function getVecTableSQL(dims: number = EMBEDDING_DIMS): string {
|
|
116
|
+
return `CREATE VIRTUAL TABLE IF NOT EXISTS vec_memories USING vec0(
|
|
117
|
+
memory_id TEXT PRIMARY KEY,
|
|
118
|
+
embedding float[${dims}]
|
|
119
|
+
)`;
|
|
120
|
+
}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid search: FTS5 BM25 + sqlite-vec cosine similarity + RRF fusion.
|
|
3
|
+
*
|
|
4
|
+
* Query routing:
|
|
5
|
+
* - Quoted strings or code patterns → keyword only (FTS5)
|
|
6
|
+
* - Questions (starts with who/what/why/how/when/where) → semantic only (vector)
|
|
7
|
+
* - Default → hybrid (FTS5 + vector, fused with RRF)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { DatabaseLike } from "./migrations.js";
|
|
11
|
+
import {
|
|
12
|
+
EmbeddingPipeline,
|
|
13
|
+
EmbeddingCache,
|
|
14
|
+
contentHash,
|
|
15
|
+
prepareEmbeddingText,
|
|
16
|
+
} from "./embeddings.js";
|
|
17
|
+
import { applyTemporalDecay } from "../../lib/temporal-decay.js";
|
|
18
|
+
|
|
19
|
+
/** RRF fusion constant — higher = more weight to individual rankings */
|
|
20
|
+
const RRF_K = 60;
|
|
21
|
+
|
|
22
|
+
export type SearchMode = "keyword" | "semantic" | "hybrid";
|
|
23
|
+
|
|
24
|
+
interface RankedResult {
|
|
25
|
+
id: string;
|
|
26
|
+
score: number;
|
|
27
|
+
matchedOn: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Detect the best search mode based on query shape.
|
|
32
|
+
*/
|
|
33
|
+
export function detectSearchMode(query: string): SearchMode {
|
|
34
|
+
const trimmed = query.trim();
|
|
35
|
+
|
|
36
|
+
// Quoted strings → keyword
|
|
37
|
+
if (/^["'].*["']$/.test(trimmed)) return "keyword";
|
|
38
|
+
|
|
39
|
+
// Code-like patterns (dotted identifiers, ::, ->, path separators) → keyword
|
|
40
|
+
if (/\w\.\w|->|::|\/\w+\//.test(trimmed)) return "keyword";
|
|
41
|
+
|
|
42
|
+
// Questions → semantic
|
|
43
|
+
if (
|
|
44
|
+
/^(who|what|why|how|when|where|is|are|can|does|do|should|which)\b/i.test(
|
|
45
|
+
trimmed,
|
|
46
|
+
)
|
|
47
|
+
) {
|
|
48
|
+
return "semantic";
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return "hybrid";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* FTS5 keyword search using BM25 ranking.
|
|
56
|
+
*/
|
|
57
|
+
export function ftsSearch(
|
|
58
|
+
db: DatabaseLike,
|
|
59
|
+
query: string,
|
|
60
|
+
limit: number,
|
|
61
|
+
typeFilter?: string,
|
|
62
|
+
): RankedResult[] {
|
|
63
|
+
// Escape FTS5 special characters and wrap tokens in quotes to prevent
|
|
64
|
+
// FTS5 operator injection (AND, OR, NOT, NEAR, column: filters)
|
|
65
|
+
const cleaned = query.replace(/['"*()]/g, " ").trim();
|
|
66
|
+
if (!cleaned) return [];
|
|
67
|
+
const escaped = cleaned
|
|
68
|
+
.split(/\s+/)
|
|
69
|
+
.filter(Boolean)
|
|
70
|
+
.map((t) => `"${t}"`)
|
|
71
|
+
.join(" ");
|
|
72
|
+
|
|
73
|
+
// Build FTS5 query with column weights
|
|
74
|
+
// BM25 weights: title(10), description(4), tags(6), content(1)
|
|
75
|
+
let sql = `
|
|
76
|
+
SELECT
|
|
77
|
+
m.id,
|
|
78
|
+
m.updated,
|
|
79
|
+
bm25(memories_fts, 10.0, 4.0, 6.0, 1.0) AS rank,
|
|
80
|
+
memories_fts.title AS fts_title,
|
|
81
|
+
memories_fts.description AS fts_desc,
|
|
82
|
+
memories_fts.tags AS fts_tags,
|
|
83
|
+
memories_fts.content AS fts_content
|
|
84
|
+
FROM memories_fts
|
|
85
|
+
JOIN memories m ON m.rowid = memories_fts.rowid
|
|
86
|
+
WHERE memories_fts MATCH ?
|
|
87
|
+
`;
|
|
88
|
+
|
|
89
|
+
const params: unknown[] = [escaped];
|
|
90
|
+
|
|
91
|
+
if (typeFilter) {
|
|
92
|
+
sql += ` AND m.type = ?`;
|
|
93
|
+
params.push(typeFilter);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
sql += ` ORDER BY rank LIMIT ?`;
|
|
97
|
+
params.push(limit);
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
const stmt = db.prepare(sql);
|
|
101
|
+
const results: RankedResult[] = [];
|
|
102
|
+
const allRows = (
|
|
103
|
+
stmt as unknown as {
|
|
104
|
+
all(...p: unknown[]): Array<Record<string, unknown>>;
|
|
105
|
+
}
|
|
106
|
+
).all(...params);
|
|
107
|
+
|
|
108
|
+
for (const row of allRows) {
|
|
109
|
+
const matchedOn: string[] = [];
|
|
110
|
+
const q = escaped.toLowerCase();
|
|
111
|
+
if (
|
|
112
|
+
String(row.fts_title ?? "")
|
|
113
|
+
.toLowerCase()
|
|
114
|
+
.includes(q)
|
|
115
|
+
)
|
|
116
|
+
matchedOn.push("title");
|
|
117
|
+
if (
|
|
118
|
+
String(row.fts_desc ?? "")
|
|
119
|
+
.toLowerCase()
|
|
120
|
+
.includes(q)
|
|
121
|
+
)
|
|
122
|
+
matchedOn.push("description");
|
|
123
|
+
if (
|
|
124
|
+
String(row.fts_tags ?? "")
|
|
125
|
+
.toLowerCase()
|
|
126
|
+
.includes(q)
|
|
127
|
+
)
|
|
128
|
+
matchedOn.push("tags");
|
|
129
|
+
if (
|
|
130
|
+
matchedOn.length === 0 &&
|
|
131
|
+
String(row.fts_content ?? "")
|
|
132
|
+
.toLowerCase()
|
|
133
|
+
.includes(q)
|
|
134
|
+
) {
|
|
135
|
+
matchedOn.push("content");
|
|
136
|
+
}
|
|
137
|
+
if (matchedOn.length === 0) matchedOn.push("content");
|
|
138
|
+
|
|
139
|
+
const rawScore = -(row.rank as number);
|
|
140
|
+
const decayedScore = applyTemporalDecay(rawScore, String(row.updated));
|
|
141
|
+
|
|
142
|
+
results.push({
|
|
143
|
+
id: String(row.id),
|
|
144
|
+
score: decayedScore,
|
|
145
|
+
matchedOn,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return results;
|
|
150
|
+
} catch {
|
|
151
|
+
// FTS5 query syntax error — return empty
|
|
152
|
+
return [];
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Vector similarity search using sqlite-vec cosine distance.
|
|
158
|
+
*/
|
|
159
|
+
export async function vecSearch(
|
|
160
|
+
db: DatabaseLike,
|
|
161
|
+
query: string,
|
|
162
|
+
limit: number,
|
|
163
|
+
pipeline: EmbeddingPipeline,
|
|
164
|
+
typeFilter?: string,
|
|
165
|
+
): Promise<RankedResult[]> {
|
|
166
|
+
const embedding = await pipeline.embed(query);
|
|
167
|
+
const buffer = Buffer.from(
|
|
168
|
+
embedding.buffer,
|
|
169
|
+
embedding.byteOffset,
|
|
170
|
+
embedding.byteLength,
|
|
171
|
+
);
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
let sql: string;
|
|
175
|
+
let params: unknown[];
|
|
176
|
+
|
|
177
|
+
if (typeFilter) {
|
|
178
|
+
sql = `
|
|
179
|
+
SELECT
|
|
180
|
+
v.memory_id AS id,
|
|
181
|
+
v.distance,
|
|
182
|
+
m.updated
|
|
183
|
+
FROM vec_memories v
|
|
184
|
+
JOIN memories m ON m.id = v.memory_id
|
|
185
|
+
WHERE v.embedding MATCH ? AND k = ? AND m.type = ?
|
|
186
|
+
ORDER BY v.distance
|
|
187
|
+
`;
|
|
188
|
+
params = [buffer, limit, typeFilter];
|
|
189
|
+
} else {
|
|
190
|
+
sql = `
|
|
191
|
+
SELECT
|
|
192
|
+
v.memory_id AS id,
|
|
193
|
+
v.distance,
|
|
194
|
+
m.updated
|
|
195
|
+
FROM vec_memories v
|
|
196
|
+
JOIN memories m ON m.id = v.memory_id
|
|
197
|
+
WHERE v.embedding MATCH ? AND k = ?
|
|
198
|
+
ORDER BY v.distance
|
|
199
|
+
`;
|
|
200
|
+
params = [buffer, limit];
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const stmt = db.prepare(sql);
|
|
204
|
+
const rows = (
|
|
205
|
+
stmt as unknown as {
|
|
206
|
+
all(...p: unknown[]): Array<Record<string, unknown>>;
|
|
207
|
+
}
|
|
208
|
+
).all(...params);
|
|
209
|
+
|
|
210
|
+
return rows.map((row) => {
|
|
211
|
+
const rawScore = 1 - (row.distance as number);
|
|
212
|
+
return {
|
|
213
|
+
id: String(row.id),
|
|
214
|
+
score: applyTemporalDecay(rawScore, String(row.updated)),
|
|
215
|
+
matchedOn: ["semantic"],
|
|
216
|
+
};
|
|
217
|
+
});
|
|
218
|
+
} catch {
|
|
219
|
+
// sqlite-vec not available or query failed
|
|
220
|
+
return [];
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Reciprocal Rank Fusion (RRF) — merge two ranked lists.
|
|
226
|
+
*
|
|
227
|
+
* For each document, RRF score = sum over all lists of: 1 / (k + rank_in_list)
|
|
228
|
+
* where k is a constant (default 60) that controls how much individual rank matters.
|
|
229
|
+
*/
|
|
230
|
+
export function rrfFuse(...lists: RankedResult[][]): RankedResult[] {
|
|
231
|
+
const scores = new Map<string, { score: number; matchedOn: Set<string> }>();
|
|
232
|
+
|
|
233
|
+
for (const list of lists) {
|
|
234
|
+
for (let rank = 0; rank < list.length; rank++) {
|
|
235
|
+
const item = list[rank];
|
|
236
|
+
const existing = scores.get(item.id) ?? {
|
|
237
|
+
score: 0,
|
|
238
|
+
matchedOn: new Set<string>(),
|
|
239
|
+
};
|
|
240
|
+
existing.score += 1 / (RRF_K + rank + 1);
|
|
241
|
+
for (const m of item.matchedOn) {
|
|
242
|
+
existing.matchedOn.add(m);
|
|
243
|
+
}
|
|
244
|
+
scores.set(item.id, existing);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return Array.from(scores.entries())
|
|
249
|
+
.map(([id, { score, matchedOn }]) => ({
|
|
250
|
+
id,
|
|
251
|
+
score,
|
|
252
|
+
matchedOn: Array.from(matchedOn),
|
|
253
|
+
}))
|
|
254
|
+
.sort((a, b) => b.score - a.score);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
export interface HybridSearchOptions {
|
|
258
|
+
db: DatabaseLike;
|
|
259
|
+
query: string;
|
|
260
|
+
limit: number;
|
|
261
|
+
pipeline: EmbeddingPipeline | null;
|
|
262
|
+
vecEnabled: boolean;
|
|
263
|
+
typeFilter?: string;
|
|
264
|
+
modeOverride?: SearchMode;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Perform a hybrid search combining FTS5 and vector similarity.
|
|
269
|
+
*/
|
|
270
|
+
export async function hybridSearch(
|
|
271
|
+
opts: HybridSearchOptions,
|
|
272
|
+
): Promise<RankedResult[]> {
|
|
273
|
+
const { db, query, limit, pipeline, vecEnabled, typeFilter, modeOverride } =
|
|
274
|
+
opts;
|
|
275
|
+
const mode = modeOverride ?? detectSearchMode(query);
|
|
276
|
+
|
|
277
|
+
// Fetch more results than needed for fusion, then trim
|
|
278
|
+
const fetchLimit = Math.max(limit * 3, 20);
|
|
279
|
+
|
|
280
|
+
if (mode === "keyword" || !vecEnabled || !pipeline) {
|
|
281
|
+
return ftsSearch(db, query, limit, typeFilter);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (mode === "semantic") {
|
|
285
|
+
return vecSearch(db, query, limit, pipeline, typeFilter);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Hybrid: run both in parallel, fuse with RRF
|
|
289
|
+
const [ftsResults, vecResults] = await Promise.all([
|
|
290
|
+
ftsSearch(db, query, fetchLimit, typeFilter),
|
|
291
|
+
vecSearch(db, query, fetchLimit, pipeline, typeFilter),
|
|
292
|
+
]);
|
|
293
|
+
|
|
294
|
+
const fused = rrfFuse(ftsResults, vecResults);
|
|
295
|
+
return fused.slice(0, limit);
|
|
296
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Daemon entry point — spawned as a detached child process.
|
|
3
|
+
*
|
|
4
|
+
* Usage: node daemon/entry.js <docsDir> [idleTimeoutMs] [--tier=full|lite]
|
|
5
|
+
*
|
|
6
|
+
* When --tier=full, uses SqliteBackend (requires lazy-installed deps).
|
|
7
|
+
* Otherwise defaults to MiniSearchBackend.
|
|
8
|
+
*/
|
|
9
|
+
import path from "node:path";
|
|
10
|
+
import { MiniSearchBackend } from "../backends/minisearch.js";
|
|
11
|
+
import { areSqliteDepsAvailable } from "../lib/lazy-install.js";
|
|
12
|
+
import type { MemoryBackend } from "../lib/backend.js";
|
|
13
|
+
import { startDaemonServer } from "./process.js";
|
|
14
|
+
import { setupWatcher } from "./watcher.js";
|
|
15
|
+
|
|
16
|
+
const docsDir = process.argv[2];
|
|
17
|
+
if (!docsDir) {
|
|
18
|
+
process.stderr.write(
|
|
19
|
+
"Usage: node daemon/entry.js <docsDir> [idleTimeoutMs] [--tier=full|lite]\n",
|
|
20
|
+
);
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const resolvedDir = path.resolve(docsDir);
|
|
25
|
+
const idleTimeoutMs =
|
|
26
|
+
process.argv[3] && !process.argv[3].startsWith("--")
|
|
27
|
+
? parseInt(process.argv[3], 10)
|
|
28
|
+
: undefined;
|
|
29
|
+
|
|
30
|
+
// Parse --tier flag from any position
|
|
31
|
+
const tierArg = process.argv.find((a) => a.startsWith("--tier="));
|
|
32
|
+
const requestedTier = tierArg?.split("=")[1];
|
|
33
|
+
|
|
34
|
+
// Parse embedding flags from any position
|
|
35
|
+
const embeddingProviderArg = process.argv.find((a) =>
|
|
36
|
+
a.startsWith("--embedding-provider="),
|
|
37
|
+
);
|
|
38
|
+
const embeddingModelArg = process.argv.find((a) =>
|
|
39
|
+
a.startsWith("--embedding-model="),
|
|
40
|
+
);
|
|
41
|
+
const embeddingOllamaUrlArg = process.argv.find((a) =>
|
|
42
|
+
a.startsWith("--embedding-ollama-url="),
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
// Use substring to avoid truncating values that contain "="
|
|
46
|
+
function flagValue(arg: string): string {
|
|
47
|
+
return arg.substring(arg.indexOf("=") + 1);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const embeddingConfig = {
|
|
51
|
+
...(embeddingProviderArg && {
|
|
52
|
+
provider: flagValue(embeddingProviderArg) as "transformers" | "ollama",
|
|
53
|
+
}),
|
|
54
|
+
...(embeddingModelArg && { model: flagValue(embeddingModelArg) }),
|
|
55
|
+
...(embeddingOllamaUrlArg && {
|
|
56
|
+
ollamaUrl: flagValue(embeddingOllamaUrlArg),
|
|
57
|
+
}),
|
|
58
|
+
};
|
|
59
|
+
const hasEmbeddingConfig = Object.keys(embeddingConfig).length > 0;
|
|
60
|
+
|
|
61
|
+
async function createBackend(): Promise<MemoryBackend> {
|
|
62
|
+
// Use SqliteBackend if requested or if deps are available and not forced to lite
|
|
63
|
+
if (
|
|
64
|
+
requestedTier === "full" ||
|
|
65
|
+
(requestedTier !== "lite" && areSqliteDepsAvailable())
|
|
66
|
+
) {
|
|
67
|
+
try {
|
|
68
|
+
const { SqliteBackend } = await import("../backends/sqlite/index.js");
|
|
69
|
+
return new SqliteBackend(
|
|
70
|
+
resolvedDir,
|
|
71
|
+
hasEmbeddingConfig ? { embedding: embeddingConfig } : undefined,
|
|
72
|
+
);
|
|
73
|
+
} catch (err) {
|
|
74
|
+
process.stderr.write(
|
|
75
|
+
`[cf-memory] SqliteBackend failed, falling back to MiniSearch: ${err instanceof Error ? err.message : String(err)}\n`,
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return new MiniSearchBackend(resolvedDir);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const backend = await createBackend();
|
|
83
|
+
|
|
84
|
+
// Only set up file watcher if backend supports rebuild
|
|
85
|
+
const watcher = backend.rebuild
|
|
86
|
+
? setupWatcher(
|
|
87
|
+
resolvedDir,
|
|
88
|
+
backend as Required<Pick<MemoryBackend, "rebuild">>,
|
|
89
|
+
)
|
|
90
|
+
: { close() {} };
|
|
91
|
+
|
|
92
|
+
const { close } = startDaemonServer(backend, {
|
|
93
|
+
idleTimeoutMs,
|
|
94
|
+
onShutdown: () => watcher.close(),
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// Single place for signal handling — no duplicates
|
|
98
|
+
process.on("SIGTERM", close);
|
|
99
|
+
process.on("SIGINT", close);
|