n2-soul 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +121 -0
- package/README.ko.md +197 -0
- package/README.md +197 -0
- package/index.js +30 -0
- package/lib/agent-registry.js +60 -0
- package/lib/config.default.js +68 -0
- package/lib/config.example.js +28 -0
- package/lib/config.js +28 -0
- package/lib/context.js +34 -0
- package/lib/intercom-log.js +187 -0
- package/lib/kv-cache/agent-adapter.js +192 -0
- package/lib/kv-cache/backup.js +357 -0
- package/lib/kv-cache/compressor.js +130 -0
- package/lib/kv-cache/embedding.js +205 -0
- package/lib/kv-cache/index.js +446 -0
- package/lib/kv-cache/schema.js +108 -0
- package/lib/kv-cache/snapshot.js +213 -0
- package/lib/kv-cache/sqlite-store.js +402 -0
- package/lib/kv-cache/tier-manager.js +239 -0
- package/lib/kv-cache/token-saver.js +153 -0
- package/lib/paths.js +20 -0
- package/lib/soul-engine.js +189 -0
- package/lib/utils.js +97 -0
- package/package.json +31 -0
- package/sequences/boot.js +81 -0
- package/sequences/end.js +132 -0
- package/sequences/work.js +257 -0
- package/tools/brain.js +45 -0
- package/tools/kv-cache.js +246 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
// Soul KV-Cache — Backup/Restore engine. sqlite-store 호환 DB 백업.
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* KV-Cache Backup Manager.
|
|
7
|
+
* Backs up snapshots + embeddings into sqlite-store 호환 DB.
|
|
8
|
+
* 백업 DB는 SqliteStore로 바로 열어서 사용 가능 (동일 스키마).
|
|
9
|
+
*
|
|
10
|
+
* Backup structure:
|
|
11
|
+
* {backupDir}/{project}/
|
|
12
|
+
* backup-YYYY-MM-DD.sqlite — 바로 로드 가능한 DB
|
|
13
|
+
* manifest.json — 백업 히스토리
|
|
14
|
+
*/
|
|
15
|
+
class BackupManager {
|
|
16
|
+
/**
|
|
17
|
+
* @param {string} dataDir - Soul data directory (config.DATA_DIR)
|
|
18
|
+
* @param {object} config - backup config section
|
|
19
|
+
*/
|
|
20
|
+
constructor(dataDir, config = {}) {
|
|
21
|
+
this.dataDir = dataDir;
|
|
22
|
+
this.backupDir = config.dir || path.join(dataDir, 'kv-cache', 'backups');
|
|
23
|
+
this.keepCount = config.keepCount || 7;
|
|
24
|
+
this.incremental = config.incremental !== false;
|
|
25
|
+
this._SQL = null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Lazy-init sql.js (shared with sqlite-store).
|
|
30
|
+
* @returns {Promise<object>}
|
|
31
|
+
*/
|
|
32
|
+
async _initSql() {
|
|
33
|
+
if (this._SQL) return this._SQL;
|
|
34
|
+
const { initSqlJs } = require('./sqlite-store');
|
|
35
|
+
this._SQL = await initSqlJs();
|
|
36
|
+
return this._SQL;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Backup: dump all project data into a sqlite-store 호환 DB.
|
|
41
|
+
* JSON 백엔드 → snapshots 폴더에서 읽고 DB에 INSERT.
|
|
42
|
+
* SQLite 백엔드 → .sqlite 파일 직접 복사 (가장 빠름).
|
|
43
|
+
*
|
|
44
|
+
* @param {string} project
|
|
45
|
+
* @param {object} options
|
|
46
|
+
* @param {boolean} options.full - 강제 전체 백업
|
|
47
|
+
* @returns {Promise<object>}
|
|
48
|
+
*/
|
|
49
|
+
async backup(project, options = {}) {
|
|
50
|
+
const projectBackupDir = path.join(this.backupDir, project);
|
|
51
|
+
if (!fs.existsSync(projectBackupDir)) fs.mkdirSync(projectBackupDir, { recursive: true });
|
|
52
|
+
|
|
53
|
+
const manifest = this._loadManifest(project);
|
|
54
|
+
|
|
55
|
+
// 1) SQLite 백엔드: .sqlite 파일 직접 복사 (instant)
|
|
56
|
+
const sqlitePath = path.join(this.dataDir, 'kv-cache', 'sqlite', `${project}.sqlite`);
|
|
57
|
+
if (fs.existsSync(sqlitePath)) {
|
|
58
|
+
return this._backupByCopy(project, sqlitePath, manifest, options);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// 2) JSON 백엔드: snapshots → DB로 변환
|
|
62
|
+
return this._backupFromJson(project, manifest, options);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* SQLite 백엔드 백업: 파일 복사 (동일 스키마, 바로 로드 가능).
|
|
67
|
+
*/
|
|
68
|
+
_backupByCopy(project, sqlitePath, manifest, options) {
|
|
69
|
+
// Incremental check
|
|
70
|
+
if (!options.full && manifest.lastBackup) {
|
|
71
|
+
const lastTime = new Date(manifest.lastBackup).getTime();
|
|
72
|
+
const stat = fs.statSync(sqlitePath);
|
|
73
|
+
if (stat.mtimeMs <= lastTime) {
|
|
74
|
+
return { backupId: null, snapshots: 0, sizeBytes: 0, type: 'skip', message: 'No changes' };
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const backupId = this._makeBackupId();
|
|
79
|
+
const destPath = path.join(this.backupDir, project, `backup-${backupId}.sqlite`);
|
|
80
|
+
fs.copyFileSync(sqlitePath, destPath);
|
|
81
|
+
const stat = fs.statSync(destPath);
|
|
82
|
+
|
|
83
|
+
const entry = { id: backupId, type: 'copy', timestamp: new Date().toISOString(), sizeBytes: stat.size };
|
|
84
|
+
manifest.backups.push(entry);
|
|
85
|
+
manifest.lastBackup = entry.timestamp;
|
|
86
|
+
this._saveManifest(project, manifest);
|
|
87
|
+
this._cleanup(project);
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
backupId, type: 'copy', sizeBytes: stat.size,
|
|
91
|
+
sizeFormatted: this._formatBytes(stat.size),
|
|
92
|
+
path: destPath,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* JSON 백엔드 백업: snapshots 폴더 → sqlite-store 호환 DB로 변환.
|
|
98
|
+
*/
|
|
99
|
+
async _backupFromJson(project, manifest, options) {
|
|
100
|
+
const SQL = await this._initSql();
|
|
101
|
+
const snapDir = path.join(this.dataDir, 'kv-cache', 'snapshots', project);
|
|
102
|
+
|
|
103
|
+
if (!fs.existsSync(snapDir)) {
|
|
104
|
+
return { backupId: null, snapshots: 0, sizeBytes: 0, type: 'empty', error: 'No snapshots' };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Collect JSON files (recursive — JSON backend uses {project}/{date}/*.json)
|
|
108
|
+
const snapFiles = [];
|
|
109
|
+
const scanDir = (dir) => {
|
|
110
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
111
|
+
if (entry.isDirectory()) {
|
|
112
|
+
scanDir(path.join(dir, entry.name));
|
|
113
|
+
} else if (entry.name.endsWith('.json')) {
|
|
114
|
+
snapFiles.push(path.join(dir, entry.name));
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
scanDir(snapDir);
|
|
119
|
+
|
|
120
|
+
if (snapFiles.length === 0) {
|
|
121
|
+
return { backupId: null, snapshots: 0, sizeBytes: 0, type: 'empty', error: 'No snapshots' };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Incremental check
|
|
125
|
+
if (!options.full && manifest.lastBackup) {
|
|
126
|
+
const lastTime = new Date(manifest.lastBackup).getTime();
|
|
127
|
+
const hasChanges = snapFiles.some(f => {
|
|
128
|
+
try { return fs.statSync(f).mtimeMs > lastTime; } catch (e) { return true; }
|
|
129
|
+
});
|
|
130
|
+
if (!hasChanges) {
|
|
131
|
+
return { backupId: null, snapshots: 0, sizeBytes: 0, type: 'skip', message: 'No changes' };
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Create DB with sqlite-store 호환 스키마
|
|
136
|
+
const db = new SQL.Database();
|
|
137
|
+
db.run(`
|
|
138
|
+
CREATE TABLE IF NOT EXISTS snapshots (
|
|
139
|
+
id TEXT PRIMARY KEY,
|
|
140
|
+
agent_name TEXT NOT NULL,
|
|
141
|
+
agent_type TEXT DEFAULT 'external',
|
|
142
|
+
model TEXT,
|
|
143
|
+
started_at TEXT,
|
|
144
|
+
ended_at TEXT,
|
|
145
|
+
turn_count INTEGER DEFAULT 0,
|
|
146
|
+
token_estimate INTEGER DEFAULT 0,
|
|
147
|
+
keys TEXT DEFAULT '[]',
|
|
148
|
+
context TEXT DEFAULT '{}',
|
|
149
|
+
parent_session_id TEXT,
|
|
150
|
+
project_name TEXT NOT NULL,
|
|
151
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
152
|
+
)
|
|
153
|
+
`);
|
|
154
|
+
db.run(`CREATE INDEX IF NOT EXISTS idx_snapshots_project ON snapshots(project_name, ended_at DESC)`);
|
|
155
|
+
|
|
156
|
+
// Embeddings table (bonus — also backed up)
|
|
157
|
+
db.run(`
|
|
158
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
159
|
+
snapshot_id TEXT PRIMARY KEY,
|
|
160
|
+
vector BLOB NOT NULL
|
|
161
|
+
)
|
|
162
|
+
`);
|
|
163
|
+
|
|
164
|
+
// Insert snapshots
|
|
165
|
+
const stmt = db.prepare(`
|
|
166
|
+
INSERT OR REPLACE INTO snapshots
|
|
167
|
+
(id, agent_name, agent_type, model, started_at, ended_at,
|
|
168
|
+
turn_count, token_estimate, keys, context, parent_session_id, project_name)
|
|
169
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
170
|
+
`);
|
|
171
|
+
|
|
172
|
+
let snapCount = 0;
|
|
173
|
+
for (const filePath of snapFiles) {
|
|
174
|
+
try {
|
|
175
|
+
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
176
|
+
const s = JSON.parse(raw);
|
|
177
|
+
stmt.run([
|
|
178
|
+
s.id, s.agentName || 'unknown', s.agentType || 'external', s.model || null,
|
|
179
|
+
s.startedAt || null, s.endedAt || null,
|
|
180
|
+
s.turnCount || 0, s.tokenEstimate || 0,
|
|
181
|
+
JSON.stringify(s.keys || []), JSON.stringify(s.context || {}),
|
|
182
|
+
s.parentSessionId || null, s.projectName || project,
|
|
183
|
+
]);
|
|
184
|
+
snapCount++;
|
|
185
|
+
} catch (e) { /* skip corrupt */ }
|
|
186
|
+
}
|
|
187
|
+
stmt.free();
|
|
188
|
+
|
|
189
|
+
// Insert embeddings
|
|
190
|
+
const embDir = path.join(this.dataDir, 'kv-cache', 'embeddings', project);
|
|
191
|
+
let embCount = 0;
|
|
192
|
+
if (fs.existsSync(embDir)) {
|
|
193
|
+
const embStmt = db.prepare('INSERT OR REPLACE INTO embeddings (snapshot_id, vector) VALUES (?, ?)');
|
|
194
|
+
for (const file of fs.readdirSync(embDir).filter(f => f.endsWith('.json'))) {
|
|
195
|
+
try {
|
|
196
|
+
const vec = fs.readFileSync(path.join(embDir, file), 'utf-8');
|
|
197
|
+
embStmt.run([path.basename(file, '.json'), vec]);
|
|
198
|
+
embCount++;
|
|
199
|
+
} catch (e) { /* skip */ }
|
|
200
|
+
}
|
|
201
|
+
embStmt.free();
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Write DB to disk
|
|
205
|
+
const backupId = this._makeBackupId();
|
|
206
|
+
const destPath = path.join(this.backupDir, project, `backup-${backupId}.sqlite`);
|
|
207
|
+
const dbData = db.export();
|
|
208
|
+
const buffer = Buffer.from(dbData);
|
|
209
|
+
fs.writeFileSync(destPath, buffer);
|
|
210
|
+
db.close();
|
|
211
|
+
|
|
212
|
+
const entry = {
|
|
213
|
+
id: backupId, type: 'full', timestamp: new Date().toISOString(),
|
|
214
|
+
snapshots: snapCount, embeddings: embCount, sizeBytes: buffer.length,
|
|
215
|
+
};
|
|
216
|
+
manifest.backups.push(entry);
|
|
217
|
+
manifest.lastBackup = entry.timestamp;
|
|
218
|
+
this._saveManifest(project, manifest);
|
|
219
|
+
this._cleanup(project);
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
backupId, snapshots: snapCount, embeddings: embCount,
|
|
223
|
+
sizeBytes: buffer.length, sizeFormatted: this._formatBytes(buffer.length),
|
|
224
|
+
type: 'full', path: destPath,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Restore from backup. 백업 DB를 sqlite-store 위치에 복사하거나,
|
|
230
|
+
* JSON 백엔드면 DB에서 읽어서 JSON으로 복원.
|
|
231
|
+
*
|
|
232
|
+
* @param {string} project
|
|
233
|
+
* @param {string} backupId
|
|
234
|
+
* @param {object} options
|
|
235
|
+
* @param {string} options.target - 'sqlite' | 'json' (default: 'json')
|
|
236
|
+
* @returns {Promise<object>}
|
|
237
|
+
*/
|
|
238
|
+
async restore(project, backupId = null, options = {}) {
|
|
239
|
+
const manifest = this._loadManifest(project);
|
|
240
|
+
if (!backupId) {
|
|
241
|
+
if (manifest.backups.length === 0) return { error: 'No backups found', restored: 0 };
|
|
242
|
+
backupId = manifest.backups[manifest.backups.length - 1].id;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const dbPath = path.join(this.backupDir, project, `backup-${backupId}.sqlite`);
|
|
246
|
+
if (!fs.existsSync(dbPath)) return { error: `Backup not found: ${backupId}`, restored: 0 };
|
|
247
|
+
|
|
248
|
+
const target = options.target || 'json';
|
|
249
|
+
|
|
250
|
+
if (target === 'sqlite') {
|
|
251
|
+
// Direct copy to sqlite-store location
|
|
252
|
+
const destDir = path.join(this.dataDir, 'kv-cache', 'sqlite');
|
|
253
|
+
if (!fs.existsSync(destDir)) fs.mkdirSync(destDir, { recursive: true });
|
|
254
|
+
fs.copyFileSync(dbPath, path.join(destDir, `${project}.sqlite`));
|
|
255
|
+
return { restored: 'full', backupId, target: 'sqlite' };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// JSON restore: extract from DB back to snapshot files
|
|
259
|
+
const SQL = await this._initSql();
|
|
260
|
+
const dbData = fs.readFileSync(dbPath);
|
|
261
|
+
const db = new SQL.Database(dbData);
|
|
262
|
+
|
|
263
|
+
const snapDir = path.join(this.dataDir, 'kv-cache', 'snapshots', project);
|
|
264
|
+
if (!fs.existsSync(snapDir)) fs.mkdirSync(snapDir, { recursive: true });
|
|
265
|
+
|
|
266
|
+
let restoredSnaps = 0;
|
|
267
|
+
const snapRows = db.exec('SELECT * FROM snapshots');
|
|
268
|
+
if (snapRows.length > 0) {
|
|
269
|
+
const cols = snapRows[0].columns;
|
|
270
|
+
for (const row of snapRows[0].values) {
|
|
271
|
+
const obj = {};
|
|
272
|
+
cols.forEach((c, i) => { obj[c] = row[i]; });
|
|
273
|
+
const session = {
|
|
274
|
+
id: obj.id, agentName: obj.agent_name, agentType: obj.agent_type,
|
|
275
|
+
model: obj.model, startedAt: obj.started_at, endedAt: obj.ended_at,
|
|
276
|
+
turnCount: obj.turn_count, tokenEstimate: obj.token_estimate,
|
|
277
|
+
keys: JSON.parse(obj.keys || '[]'), context: JSON.parse(obj.context || '{}'),
|
|
278
|
+
parentSessionId: obj.parent_session_id, projectName: obj.project_name,
|
|
279
|
+
};
|
|
280
|
+
fs.writeFileSync(path.join(snapDir, `${session.id}.json`), JSON.stringify(session, null, 2));
|
|
281
|
+
restoredSnaps++;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Embeddings
|
|
286
|
+
const embDir = path.join(this.dataDir, 'kv-cache', 'embeddings', project);
|
|
287
|
+
if (!fs.existsSync(embDir)) fs.mkdirSync(embDir, { recursive: true });
|
|
288
|
+
let restoredEmbs = 0;
|
|
289
|
+
const embRows = db.exec('SELECT snapshot_id, vector FROM embeddings');
|
|
290
|
+
if (embRows.length > 0) {
|
|
291
|
+
for (const row of embRows[0].values) {
|
|
292
|
+
fs.writeFileSync(path.join(embDir, `${row[0]}.json`), row[1]);
|
|
293
|
+
restoredEmbs++;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
db.close();
|
|
298
|
+
return { restored: restoredSnaps, embeddings: restoredEmbs, backupId, target: 'json' };
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/** List backup history. */
|
|
302
|
+
list(project) {
|
|
303
|
+
return this._loadManifest(project).backups.map(b => ({
|
|
304
|
+
...b, sizeFormatted: this._formatBytes(b.sizeBytes),
|
|
305
|
+
}));
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/** Backup status summary. */
|
|
309
|
+
status(project) {
|
|
310
|
+
const manifest = this._loadManifest(project);
|
|
311
|
+
const totalSize = manifest.backups.reduce((s, b) => s + (b.sizeBytes || 0), 0);
|
|
312
|
+
return {
|
|
313
|
+
project, totalBackups: manifest.backups.length,
|
|
314
|
+
lastBackup: manifest.lastBackup, totalBackupSize: this._formatBytes(totalSize),
|
|
315
|
+
keepCount: this.keepCount,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ── Helpers ──
|
|
320
|
+
|
|
321
|
+
_cleanup(project) {
|
|
322
|
+
const manifest = this._loadManifest(project);
|
|
323
|
+
const dir = path.join(this.backupDir, project);
|
|
324
|
+
let deleted = 0;
|
|
325
|
+
while (manifest.backups.length > this.keepCount) {
|
|
326
|
+
const old = manifest.backups.shift();
|
|
327
|
+
try { fs.unlinkSync(path.join(dir, `backup-${old.id}.sqlite`)); deleted++; } catch (e) { }
|
|
328
|
+
}
|
|
329
|
+
if (deleted > 0) this._saveManifest(project, manifest);
|
|
330
|
+
return { deleted };
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
_makeBackupId() {
|
|
334
|
+
return new Date().toISOString().slice(0, 10); // YYYY-MM-DD
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
_loadManifest(project) {
|
|
338
|
+
const p = path.join(this.backupDir, project, 'manifest.json');
|
|
339
|
+
try { if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, 'utf-8')); } catch (e) { }
|
|
340
|
+
return { backups: [], lastBackup: null };
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
_saveManifest(project, manifest) {
|
|
344
|
+
const dir = path.join(this.backupDir, project);
|
|
345
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
346
|
+
fs.writeFileSync(path.join(dir, 'manifest.json'), JSON.stringify(manifest, null, 2));
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
_formatBytes(bytes) {
|
|
350
|
+
if (!bytes) return '0 B';
|
|
351
|
+
const u = ['B', 'KB', 'MB', 'GB'];
|
|
352
|
+
const i = Math.floor(Math.log(bytes) / Math.log(1024));
|
|
353
|
+
return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${u[i]}`;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
module.exports = { BackupManager };
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// Soul KV-Cache — Context compressor. Reduces documents to key-value pairs.
|
|
2
|
+
const { extractKeywords } = require('./agent-adapter');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Compresses full text into a structured KV representation.
|
|
6
|
+
* No LLM call — uses pattern-based extractive summarization.
|
|
7
|
+
*
|
|
8
|
+
* @param {string} text - Full document text
|
|
9
|
+
* @param {number} targetTokens - Target compressed size in estimated tokens
|
|
10
|
+
* @returns {{ keys: string[], compressed: string, ratio: number }}
|
|
11
|
+
*/
|
|
12
|
+
function compress(text, targetTokens = 1000) {
|
|
13
|
+
if (!text || text.length === 0) {
|
|
14
|
+
return { keys: [], compressed: '', ratio: 1 };
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const keys = extractKeywords(text, 15);
|
|
18
|
+
const sentences = splitSentences(text);
|
|
19
|
+
const scored = scoreSentences(sentences, keys);
|
|
20
|
+
|
|
21
|
+
// Select top sentences until we hit token budget
|
|
22
|
+
const maxChars = targetTokens * 3; // conservative char-to-token ratio
|
|
23
|
+
let charCount = 0;
|
|
24
|
+
const selected = [];
|
|
25
|
+
|
|
26
|
+
for (const item of scored) {
|
|
27
|
+
if (charCount + item.sentence.length > maxChars) break;
|
|
28
|
+
selected.push(item);
|
|
29
|
+
charCount += item.sentence.length;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Re-order by original position for readability
|
|
33
|
+
selected.sort((a, b) => a.index - b.index);
|
|
34
|
+
const compressed = selected.map(s => s.sentence).join(' ');
|
|
35
|
+
|
|
36
|
+
return {
|
|
37
|
+
keys,
|
|
38
|
+
compressed,
|
|
39
|
+
ratio: text.length > 0 ? Math.round((compressed.length / text.length) * 100) / 100 : 1,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Decompresses a snapshot context into readable format.
|
|
45
|
+
*
|
|
46
|
+
* @param {object} snapshot - KV-Cache session object
|
|
47
|
+
* @returns {string} Human-readable context
|
|
48
|
+
*/
|
|
49
|
+
function decompress(snapshot) {
|
|
50
|
+
if (!snapshot) return '';
|
|
51
|
+
|
|
52
|
+
const parts = [];
|
|
53
|
+
|
|
54
|
+
if (snapshot.keys?.length > 0) {
|
|
55
|
+
parts.push(`Keywords: ${snapshot.keys.join(', ')}`);
|
|
56
|
+
}
|
|
57
|
+
if (snapshot.context?.summary) {
|
|
58
|
+
parts.push(`Summary: ${snapshot.context.summary}`);
|
|
59
|
+
}
|
|
60
|
+
if (snapshot.context?.decisions?.length > 0) {
|
|
61
|
+
parts.push(`Decisions:\n${snapshot.context.decisions.map(d => ` - ${d}`).join('\n')}`);
|
|
62
|
+
}
|
|
63
|
+
if (snapshot.context?.todo?.length > 0) {
|
|
64
|
+
parts.push(`TODO:\n${snapshot.context.todo.map(t => ` - ${t}`).join('\n')}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return parts.join('\n\n');
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Splits text into sentences. Handles Korean and English.
|
|
72
|
+
*
|
|
73
|
+
* @param {string} text
|
|
74
|
+
* @returns {string[]}
|
|
75
|
+
*/
|
|
76
|
+
function splitSentences(text) {
|
|
77
|
+
// First split by line breaks (most reliable for Korean docs)
|
|
78
|
+
const byLines = text.split(/\n+/).map(s => s.trim()).filter(s => s.length > 5);
|
|
79
|
+
|
|
80
|
+
// Then split long lines by sentence boundaries
|
|
81
|
+
const result = [];
|
|
82
|
+
for (const line of byLines) {
|
|
83
|
+
if (line.length < 150) {
|
|
84
|
+
result.push(line);
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
// Korean sentence enders: 다/요/함/음/됨/습니다/입니다/했다/됐다/이다/한다
|
|
88
|
+
// English: .!?
|
|
89
|
+
const parts = line.split(/(?<=[.!?])\s+|(?<=(?:습니다|입니다|했다|됐다|한다|됩니다|합니다|있다|없다|이다|된다|한다|해야|할것|하자|함\.|음\.|됨\.))\s*/);
|
|
90
|
+
for (const p of parts) {
|
|
91
|
+
const trimmed = p.trim();
|
|
92
|
+
if (trimmed.length > 5) result.push(trimmed);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Scores sentences by keyword relevance.
|
|
100
|
+
*
|
|
101
|
+
* @param {string[]} sentences
|
|
102
|
+
* @param {string[]} keywords
|
|
103
|
+
* @returns {{ sentence: string, score: number, index: number }[]}
|
|
104
|
+
*/
|
|
105
|
+
function scoreSentences(sentences, keywords) {
|
|
106
|
+
const scored = sentences.map((sentence, index) => {
|
|
107
|
+
const lower = sentence.toLowerCase();
|
|
108
|
+
let score = 0;
|
|
109
|
+
|
|
110
|
+
for (const kw of keywords) {
|
|
111
|
+
const escaped = kw.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
112
|
+
const matches = (lower.match(new RegExp(escaped, 'g')) || []).length;
|
|
113
|
+
score += matches;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Bonus for first/last sentences (usually most informative)
|
|
117
|
+
if (index === 0) score += 2;
|
|
118
|
+
if (index === sentences.length - 1) score += 1;
|
|
119
|
+
|
|
120
|
+
// Bonus for shorter sentences (more likely to be conclusions)
|
|
121
|
+
if (sentence.length < 100) score += 0.5;
|
|
122
|
+
|
|
123
|
+
return { sentence, score, index };
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// Sort by score descending
|
|
127
|
+
return scored.sort((a, b) => b.score - a.score);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
module.exports = { compress, decompress, splitSentences, scoreSentences };
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
// Soul KV-Cache — Embedding engine. Generates and searches vector embeddings via Ollama.
|
|
2
|
+
const http = require('http');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Embedding engine for semantic search via Ollama local API.
|
|
6
|
+
* Converts text to vector embeddings and computes cosine similarity.
|
|
7
|
+
*
|
|
8
|
+
* Supported models: nomic-embed-text (recommended), qwen3, llama3, etc.
|
|
9
|
+
* Any Ollama model that supports /api/embeddings endpoint works.
|
|
10
|
+
*/
|
|
11
|
+
class EmbeddingEngine {
|
|
12
|
+
/**
|
|
13
|
+
* @param {object} config - Embedding config from config.KV_CACHE.embedding
|
|
14
|
+
* @param {string} config.model - Ollama model name
|
|
15
|
+
* @param {string} config.endpoint - Ollama endpoint (default: http://127.0.0.1:11434)
|
|
16
|
+
*/
|
|
17
|
+
constructor(config = {}) {
|
|
18
|
+
this.model = config.model || 'nomic-embed-text';
|
|
19
|
+
this.endpoint = config.endpoint || 'http://127.0.0.1:11434';
|
|
20
|
+
this.dimensions = null; // Set after first embedding call
|
|
21
|
+
this._available = null; // Cached availability check
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Check if Ollama is available and the model supports embeddings.
|
|
26
|
+
* @returns {Promise<boolean>}
|
|
27
|
+
*/
|
|
28
|
+
async isAvailable() {
|
|
29
|
+
if (this._available !== null) return this._available;
|
|
30
|
+
try {
|
|
31
|
+
const vec = await this.embed('test');
|
|
32
|
+
this._available = vec.length > 0;
|
|
33
|
+
this.dimensions = vec.length;
|
|
34
|
+
return this._available;
|
|
35
|
+
} catch (e) {
|
|
36
|
+
this._available = false;
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Generate embedding vector for text.
|
|
43
|
+
*
|
|
44
|
+
* @param {string} text - Input text
|
|
45
|
+
* @returns {Promise<number[]>} Embedding vector
|
|
46
|
+
*/
|
|
47
|
+
async embed(text) {
|
|
48
|
+
if (!text || text.trim().length === 0) return [];
|
|
49
|
+
|
|
50
|
+
// Truncate to ~2000 chars for embedding efficiency
|
|
51
|
+
const input = text.length > 2000 ? text.slice(0, 2000) : text;
|
|
52
|
+
|
|
53
|
+
// Try /api/embeddings first (older API), then /api/embed (newer)
|
|
54
|
+
for (const path of ['/api/embeddings', '/api/embed']) {
|
|
55
|
+
try {
|
|
56
|
+
const body = path === '/api/embeddings'
|
|
57
|
+
? { model: this.model, prompt: input }
|
|
58
|
+
: { model: this.model, input: input };
|
|
59
|
+
|
|
60
|
+
const result = await this._post(path, body);
|
|
61
|
+
|
|
62
|
+
if (result.embedding && Array.isArray(result.embedding)) {
|
|
63
|
+
this.dimensions = result.embedding.length;
|
|
64
|
+
return result.embedding;
|
|
65
|
+
}
|
|
66
|
+
if (result.embeddings && Array.isArray(result.embeddings) && result.embeddings[0]) {
|
|
67
|
+
this.dimensions = result.embeddings[0].length;
|
|
68
|
+
return result.embeddings[0];
|
|
69
|
+
}
|
|
70
|
+
} catch (e) {
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return [];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Generate embeddings for multiple texts in batch.
|
|
80
|
+
*
|
|
81
|
+
* @param {string[]} texts
|
|
82
|
+
* @returns {Promise<number[][]>}
|
|
83
|
+
*/
|
|
84
|
+
async embedBatch(texts) {
|
|
85
|
+
const results = [];
|
|
86
|
+
for (const text of texts) {
|
|
87
|
+
results.push(await this.embed(text));
|
|
88
|
+
}
|
|
89
|
+
return results;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Compute cosine similarity between two vectors.
|
|
94
|
+
*
|
|
95
|
+
* @param {number[]} a
|
|
96
|
+
* @param {number[]} b
|
|
97
|
+
* @returns {number} Similarity score (0-1)
|
|
98
|
+
*/
|
|
99
|
+
cosineSimilarity(a, b) {
|
|
100
|
+
if (!a || !b || a.length === 0 || b.length === 0 || a.length !== b.length) return 0;
|
|
101
|
+
|
|
102
|
+
let dotProduct = 0;
|
|
103
|
+
let normA = 0;
|
|
104
|
+
let normB = 0;
|
|
105
|
+
|
|
106
|
+
for (let i = 0; i < a.length; i++) {
|
|
107
|
+
dotProduct += a[i] * b[i];
|
|
108
|
+
normA += a[i] * a[i];
|
|
109
|
+
normB += b[i] * b[i];
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
113
|
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Search for the most similar vectors to a query vector.
|
|
118
|
+
*
|
|
119
|
+
* @param {number[]} queryVec - Query embedding vector
|
|
120
|
+
* @param {{ id: string, vector: number[] }[]} candidates - Candidate vectors
|
|
121
|
+
* @param {number} topK - Number of results to return
|
|
122
|
+
* @param {number} threshold - Minimum similarity threshold (0-1)
|
|
123
|
+
* @returns {{ id: string, score: number }[]}
|
|
124
|
+
*/
|
|
125
|
+
rankBySimilarity(queryVec, candidates, topK = 10, threshold = 0.3) {
|
|
126
|
+
if (!queryVec || queryVec.length === 0 || candidates.length === 0) return [];
|
|
127
|
+
|
|
128
|
+
const scored = candidates
|
|
129
|
+
.map(c => ({
|
|
130
|
+
id: c.id,
|
|
131
|
+
score: this.cosineSimilarity(queryVec, c.vector),
|
|
132
|
+
}))
|
|
133
|
+
.filter(c => c.score >= threshold)
|
|
134
|
+
.sort((a, b) => b.score - a.score);
|
|
135
|
+
|
|
136
|
+
return scored.slice(0, topK);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Create a searchable text from a snapshot for embedding.
|
|
141
|
+
*
|
|
142
|
+
* @param {object} snapshot - KV-Cache session snapshot
|
|
143
|
+
* @returns {string} Combined text for embedding
|
|
144
|
+
*/
|
|
145
|
+
snapshotToText(snapshot) {
|
|
146
|
+
const parts = [];
|
|
147
|
+
if (snapshot.keys?.length > 0) parts.push(snapshot.keys.join(' '));
|
|
148
|
+
if (snapshot.context?.summary) parts.push(snapshot.context.summary);
|
|
149
|
+
if (snapshot.context?.decisions?.length > 0) {
|
|
150
|
+
parts.push(snapshot.context.decisions.join(' '));
|
|
151
|
+
}
|
|
152
|
+
if (snapshot.context?.todo?.length > 0) {
|
|
153
|
+
parts.push(snapshot.context.todo.join(' '));
|
|
154
|
+
}
|
|
155
|
+
return parts.join('. ');
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* HTTP POST helper for Ollama API.
|
|
160
|
+
*
|
|
161
|
+
* @param {string} path - API path
|
|
162
|
+
* @param {object} body - Request body
|
|
163
|
+
* @returns {Promise<object>} Response JSON
|
|
164
|
+
*/
|
|
165
|
+
_post(path, body) {
|
|
166
|
+
return new Promise((resolve, reject) => {
|
|
167
|
+
const url = new URL(this.endpoint);
|
|
168
|
+
const options = {
|
|
169
|
+
hostname: url.hostname,
|
|
170
|
+
port: url.port || 11434,
|
|
171
|
+
path,
|
|
172
|
+
method: 'POST',
|
|
173
|
+
headers: { 'Content-Type': 'application/json' },
|
|
174
|
+
timeout: 30000,
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
const req = http.request(options, res => {
|
|
178
|
+
let data = '';
|
|
179
|
+
res.on('data', chunk => { data += chunk; });
|
|
180
|
+
res.on('end', () => {
|
|
181
|
+
if (res.statusCode >= 400) {
|
|
182
|
+
reject(new Error(`Ollama ${res.statusCode}: ${data.slice(0, 200)}`));
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
try {
|
|
186
|
+
resolve(JSON.parse(data));
|
|
187
|
+
} catch (e) {
|
|
188
|
+
reject(new Error(`Invalid JSON from Ollama: ${data.slice(0, 100)}`));
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
req.on('error', reject);
|
|
194
|
+
req.on('timeout', () => {
|
|
195
|
+
req.destroy();
|
|
196
|
+
reject(new Error('Ollama request timed out'));
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
req.write(JSON.stringify(body));
|
|
200
|
+
req.end();
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
module.exports = { EmbeddingEngine };
|