n2-soul 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,446 @@
1
+ // Soul KV-Cache — Orchestrator. Coordinates snapshot, compressor, and adapter.
2
+ const path = require('path');
3
+ const fs = require('fs');
4
+ const { logError } = require('../utils');
5
+ const { SnapshotEngine } = require('./snapshot');
6
+ const { compress, decompress } = require('./compressor');
7
+ const { fromMcpSession, toResumePrompt, extractKeywords } = require('./agent-adapter');
8
+ const { createSession, migrateSession } = require('./schema');
9
+ const { extractAtLevel, autoLevel } = require('./token-saver');
10
+ const { TierManager } = require('./tier-manager');
11
+
12
+ /**
13
+ * Creates the appropriate storage engine based on config.
14
+ * For SQLite backend, triggers async init in background.
15
+ *
16
+ * @param {string} dataDir
17
+ * @param {object} config
18
+ * @returns {SnapshotEngine|import('./sqlite-store').SqliteStore}
19
+ */
20
+ function createStorageEngine(dataDir, config) {
21
+ const backend = config.backend || 'json';
22
+ const snapshotDir = config.snapshotDir || path.join(dataDir, 'kv-cache', 'snapshots');
23
+ let engine;
24
+
25
+ if (backend === 'sqlite') {
26
+ try {
27
+ const { SqliteStore, initSqlJs } = require('./sqlite-store');
28
+ const sqliteDir = config.sqliteDir || path.join(dataDir, 'kv-cache', 'sqlite');
29
+ engine = new SqliteStore(sqliteDir);
30
+ // Trigger async init in background
31
+ initSqlJs().then(() => {
32
+ engine._ready = true;
33
+ }).catch(e => {
34
+ console.error(`[kv-cache] SQLite init failed: ${e.message}`);
35
+ });
36
+ } catch (e) {
37
+ logError('kv-cache:sqlite', `SQLite unavailable (${e.message}), falling back to JSON`);
38
+ engine = new SnapshotEngine(snapshotDir);
39
+ }
40
+ } else {
41
+ engine = new SnapshotEngine(snapshotDir);
42
+ }
43
+
44
+ // Wrap with TierManager if tier config is present
45
+ const tierConfig = config.tier;
46
+ if (tierConfig) {
47
+ return new TierManager(engine, tierConfig);
48
+ }
49
+
50
+ return engine;
51
+ }
52
+
53
+ /**
54
+ * Main KV-Cache orchestrator.
55
+ * Coordinates snapshot persistence, context compression, and session management.
56
+ */
57
+ class SoulKVCache {
58
+ /**
59
+ * @param {string} dataDir - Soul data directory (config.DATA_DIR)
60
+ * @param {object} config - KV_CACHE config section
61
+ */
62
+ constructor(dataDir, config = {}) {
63
+ this.snapshot = createStorageEngine(dataDir, config);
64
+ this.dataDir = dataDir;
65
+ this.config = {
66
+ backend: config.backend || 'json',
67
+ compressionTarget: config.compressionTarget || 1000,
68
+ maxSnapshotsPerProject: config.maxSnapshotsPerProject || 50,
69
+ maxSnapshotAgeDays: config.maxSnapshotAgeDays || 30,
70
+ tokenBudget: config.tokenBudget || {
71
+ bootContext: 2000,
72
+ searchResult: 500,
73
+ progressiveLoad: true,
74
+ },
75
+ };
76
+
77
+ // Embedding engine (optional, requires Ollama)
78
+ this.embedding = null;
79
+ this._embeddingReady = false;
80
+ const embConfig = config.embedding;
81
+ if (embConfig?.enabled) {
82
+ const { EmbeddingEngine } = require('./embedding');
83
+ this.embedding = new EmbeddingEngine(embConfig);
84
+ // Check availability in background (non-blocking)
85
+ this.embedding.isAvailable().then(ok => {
86
+ this._embeddingReady = ok;
87
+ if (ok) {
88
+ logError('kv-cache:embedding', `Embedding ready: ${embConfig.model} (${this.embedding.dimensions}d)`);
89
+ } else {
90
+ logError('kv-cache:embedding', 'Embedding unavailable, falling back to keyword search');
91
+ }
92
+ }).catch(() => {
93
+ this._embeddingReady = false;
94
+ });
95
+ }
96
+
97
+ // Backup manager (optional)
98
+ this._backup = null;
99
+ this._backupTimer = null;
100
+ const backupConfig = config.backup;
101
+ if (backupConfig?.enabled) {
102
+ const { BackupManager } = require('./backup');
103
+ this._backup = new BackupManager(dataDir, backupConfig);
104
+
105
+ // Auto-backup scheduler
106
+ const schedule = backupConfig.schedule || 'daily';
107
+ if (schedule !== 'manual') {
108
+ const intervalMs = schedule === 'weekly' ? 7 * 24 * 60 * 60 * 1000 : 24 * 60 * 60 * 1000;
109
+ // First backup after 5 minutes, then on interval
110
+ this._backupTimer = setTimeout(() => {
111
+ this._runAutoBackup();
112
+ this._backupTimer = setInterval(() => this._runAutoBackup(), intervalMs);
113
+ }, 5 * 60 * 1000);
114
+ logError('kv-cache:backup', `Auto-backup scheduled: ${schedule}`);
115
+ }
116
+ }
117
+ }
118
+
119
+ /**
120
+ * Save a session snapshot with automatic compression.
121
+ *
122
+ * @param {string} agent - Agent name
123
+ * @param {string} project - Project name
124
+ * @param {object} sessionData - Raw session data (from n2_work_end or browser)
125
+ * @returns {string} Snapshot ID
126
+ */
127
+ save(agent, project, sessionData) {
128
+ // Convert MCP session data to normalized schema
129
+ const normalized = fromMcpSession({
130
+ agent,
131
+ project,
132
+ ...sessionData,
133
+ });
134
+
135
+ // Compress summary if it's too long
136
+ if (normalized.context.summary) {
137
+ const result = compress(
138
+ normalized.context.summary,
139
+ this.config.compressionTarget
140
+ );
141
+ normalized.keys = [...new Set([...normalized.keys, ...result.keys])];
142
+ normalized.context.summary = result.compressed || normalized.context.summary;
143
+ }
144
+
145
+ const id = this.snapshot.save(normalized);
146
+
147
+ // Generate embedding in background (non-blocking, fire-and-forget)
148
+ if (this._embeddingReady && this.embedding) {
149
+ const text = this.embedding.snapshotToText(normalized);
150
+ this.embedding.embed(text).then(vec => {
151
+ if (vec.length > 0) {
152
+ this._storeEmbedding(project, id, vec);
153
+ }
154
+ }).catch((e) => { logError('kv-cache:embed', e); });
155
+ }
156
+
157
+ return id;
158
+ }
159
+
160
+ /**
161
+ * Load the most recent snapshot for a project.
162
+ * Supports progressive loading levels (L1/L2/L3) and token budget.
163
+ *
164
+ * @param {string} project - Project name
165
+ * @param {object} options
166
+ * @param {string} options.level - Progressive level: 'L1', 'L2', 'L3', or 'auto'
167
+ * @param {number} options.budget - Token budget for context (used with 'auto' level)
168
+ * @returns {object|null} Session snapshot or null
169
+ */
170
+ load(project, options = {}) {
171
+ const snap = this.snapshot.loadLatest(project);
172
+ if (!snap) return null;
173
+
174
+ const level = options.level || 'auto';
175
+ const budget = options.budget || this.config.tokenBudget.bootContext;
176
+
177
+ if (level === 'auto') {
178
+ const result = autoLevel(snap, budget);
179
+ snap._resumePrompt = result.prompt;
180
+ snap._level = result.level;
181
+ snap._promptTokens = result.tokens;
182
+ } else {
183
+ const result = extractAtLevel(snap, level);
184
+ snap._resumePrompt = result.prompt;
185
+ snap._level = result.level;
186
+ snap._promptTokens = result.tokens;
187
+ }
188
+
189
+ return snap;
190
+ }
191
+
192
+ /**
193
+ * Search across snapshots by keyword or semantic similarity.
194
+ * When Ollama embedding is available: uses cosine similarity (semantic).
195
+ * Otherwise: falls back to keyword-based LIKE search.
196
+ *
197
+ * @param {string} query
198
+ * @param {string} project
199
+ * @param {number} limit
200
+ * @returns {object[]|Promise<object[]>}
201
+ */
202
+ search(query, project, limit = 10) {
203
+ // Try semantic search if embedding is available
204
+ if (this._embeddingReady && this.embedding) {
205
+ return this._semanticSearch(query, project, limit);
206
+ }
207
+ // Fallback to keyword search
208
+ return this.snapshot.search(query, project, limit);
209
+ }
210
+
211
+ /**
212
+ * Semantic search using Ollama embeddings.
213
+ * @param {string} query
214
+ * @param {string} project
215
+ * @param {number} limit
216
+ * @returns {Promise<object[]>}
217
+ */
218
+ async _semanticSearch(query, project, limit) {
219
+ try {
220
+ const queryVec = await this.embedding.embed(query);
221
+ if (queryVec.length === 0) {
222
+ // Embedding failed, fallback to keyword search
223
+ return this.snapshot.search(query, project, limit);
224
+ }
225
+
226
+ // Get all snapshots and compute similarity
227
+ const allSnaps = this.snapshot.list(project, 9999);
228
+ const candidates = [];
229
+
230
+ for (const snap of allSnaps) {
231
+ const stored = this._loadEmbedding(project, snap.id);
232
+ if (stored) {
233
+ candidates.push({ id: snap.id, vector: stored, snap });
234
+ }
235
+ }
236
+
237
+ if (candidates.length === 0) {
238
+ // No embeddings stored, fallback
239
+ return this.snapshot.search(query, project, limit);
240
+ }
241
+
242
+ const ranked = this.embedding.rankBySimilarity(queryVec, candidates, limit, 0.2);
243
+ return ranked.map(r => {
244
+ const snap = candidates.find(c => c.id === r.id)?.snap;
245
+ return { ...snap, _score: r.score, _searchMode: 'semantic' };
246
+ });
247
+ } catch (e) {
248
+ logError('kv-cache:semantic-search', e);
249
+ return this.snapshot.search(query, project, limit);
250
+ }
251
+ }
252
+
253
+ /**
254
+ * List snapshots for a project.
255
+ *
256
+ * @param {string} project
257
+ * @param {number} limit
258
+ * @returns {object[]}
259
+ */
260
+ listSnapshots(project, limit = 10) {
261
+ return this.snapshot.list(project, limit);
262
+ }
263
+
264
+ /**
265
+ * Garbage collect old snapshots.
266
+ *
267
+ * @param {string} project
268
+ * @param {number} maxAgeDays - Override config value
269
+ * @returns {{ deleted: number }}
270
+ */
271
+ gc(project, maxAgeDays) {
272
+ const age = maxAgeDays ?? this.config.maxSnapshotAgeDays;
273
+ return this.snapshot.gc(project, age, this.config.maxSnapshotsPerProject);
274
+ }
275
+
276
+ /**
277
+ * Estimate token count for a text string.
278
+ * Model-agnostic: uses chars/4 for ASCII, chars/2 for CJK.
279
+ *
280
+ * @param {string} text
281
+ * @returns {number}
282
+ */
283
+ estimateTokens(text) {
284
+ if (!text) return 0;
285
+ const cjkCount = (text.match(/[\u3000-\u9fff\uac00-\ud7af]/g) || []).length;
286
+ const asciiCount = text.length - cjkCount;
287
+ return Math.ceil(asciiCount / 4 + cjkCount / 2);
288
+ }
289
+
290
+ /**
291
+ * Migrate JSON snapshots to SQLite for a project.
292
+ * Only works when current backend is 'sqlite'.
293
+ *
294
+ * @param {string} project
295
+ * @returns {{ migrated: number, errors: number }|{ error: string }}
296
+ */
297
+ migrate(project) {
298
+ if (this.config.backend !== 'sqlite' || !this.snapshot.migrateFromJson) {
299
+ return { error: 'Migration only available when backend is sqlite' };
300
+ }
301
+ const jsonDir = path.join(this.dataDir, 'kv-cache', 'snapshots');
302
+ return this.snapshot.migrateFromJson(jsonDir, project);
303
+ }
304
+
305
+ /**
306
+ * Returns current backend info for diagnostics.
307
+ * @returns {{ backend: string, snapshotCount: number, embedding: string }}
308
+ */
309
+ backendInfo(project) {
310
+ const count = this.listSnapshots(project, 9999).length;
311
+ return {
312
+ backend: this.config.backend,
313
+ snapshotCount: count,
314
+ embedding: this._embeddingReady ? `active (${this.embedding?.model})` : 'off',
315
+ };
316
+ }
317
+
318
+ /**
319
+ * Store an embedding vector to disk.
320
+ * Vectors stored as JSON at {dataDir}/kv-cache/embeddings/{project}/{id}.json
321
+ *
322
+ * @param {string} project
323
+ * @param {string} snapshotId
324
+ * @param {number[]} vector
325
+ */
326
+ _storeEmbedding(project, snapshotId, vector) {
327
+ const dir = path.join(this.dataDir, 'kv-cache', 'embeddings', project);
328
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
329
+ const filePath = path.join(dir, `${snapshotId}.json`);
330
+ fs.writeFileSync(filePath, JSON.stringify(vector));
331
+ }
332
+
333
+ /**
334
+ * Load an embedding vector from disk.
335
+ *
336
+ * @param {string} project
337
+ * @param {string} snapshotId
338
+ * @returns {number[]|null}
339
+ */
340
+ _loadEmbedding(project, snapshotId) {
341
+ const filePath = path.join(this.dataDir, 'kv-cache', 'embeddings', project, `${snapshotId}.json`);
342
+ if (!fs.existsSync(filePath)) return null;
343
+ try {
344
+ return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
345
+ } catch (e) {
346
+ logError('kv-cache:load-embedding', e);
347
+ return null;
348
+ }
349
+ }
350
+
351
+ /**
352
+ * Backup project data into a sqlite-store 호환 DB.
353
+ * @param {string} project
354
+ * @param {object} options
355
+ * @returns {Promise<object>}
356
+ */
357
+ async backup(project, options = {}) {
358
+ if (!this._backup) {
359
+ // Lazy init even if not enabled in config
360
+ const { BackupManager } = require('./backup');
361
+ this._backup = new BackupManager(this.dataDir, {});
362
+ }
363
+ return this._backup.backup(project, options);
364
+ }
365
+
366
+ /**
367
+ * Restore from backup.
368
+ * @param {string} project
369
+ * @param {string} backupId
370
+ * @param {object} options
371
+ * @returns {Promise<object>}
372
+ */
373
+ async restore(project, backupId = null, options = {}) {
374
+ if (!this._backup) {
375
+ const { BackupManager } = require('./backup');
376
+ this._backup = new BackupManager(this.dataDir, {});
377
+ }
378
+ return this._backup.restore(project, backupId, options);
379
+ }
380
+
381
+ /**
382
+ * List backup history for a project.
383
+ * @param {string} project
384
+ * @returns {object[]}
385
+ */
386
+ listBackups(project) {
387
+ if (!this._backup) {
388
+ const { BackupManager } = require('./backup');
389
+ this._backup = new BackupManager(this.dataDir, {});
390
+ }
391
+ return this._backup.list(project);
392
+ }
393
+
394
+ /**
395
+ * Backup status for a project.
396
+ * @param {string} project
397
+ * @returns {object}
398
+ */
399
+ backupStatus(project) {
400
+ if (!this._backup) {
401
+ const { BackupManager } = require('./backup');
402
+ this._backup = new BackupManager(this.dataDir, {});
403
+ }
404
+ return this._backup.status(project);
405
+ }
406
+
407
+ /**
408
+ * Auto-backup all known projects.
409
+ * Scans snapshot directory for project folders and backs up each.
410
+ */
411
+ async _runAutoBackup() {
412
+ if (!this._backup) return;
413
+ const snapBaseDir = path.join(this.dataDir, 'kv-cache', 'snapshots');
414
+
415
+ try {
416
+ if (!fs.existsSync(snapBaseDir)) return;
417
+ const projects = fs.readdirSync(snapBaseDir, { withFileTypes: true })
418
+ .filter(d => d.isDirectory() && !d.name.startsWith('_'))
419
+ .map(d => d.name);
420
+
421
+ for (const project of projects) {
422
+ try {
423
+ const result = await this._backup.backup(project, {});
424
+ if (result.type !== 'skip' && result.type !== 'empty') {
425
+ console.error(`[kv-cache] Auto-backup: ${project} → ${result.sizeFormatted} (${result.type})`);
426
+ }
427
+ } catch (e) { logError('kv-cache:auto-backup', `${project}: ${e.message}`); }
428
+ }
429
+ } catch (err) {
430
+ console.error(`[kv-cache] Auto-backup error: ${err.message}`);
431
+ }
432
+ }
433
+
434
+ /**
435
+ * Stop auto-backup scheduler and cleanup.
436
+ */
437
+ stopAutoBackup() {
438
+ if (this._backupTimer) {
439
+ clearTimeout(this._backupTimer);
440
+ clearInterval(this._backupTimer);
441
+ this._backupTimer = null;
442
+ }
443
+ }
444
+ }
445
+
446
+ module.exports = { SoulKVCache };
@@ -0,0 +1,108 @@
1
+ // Soul KV-Cache — Universal agent session schema. Model-agnostic, distribution-ready.
2
+ const crypto = require('crypto');
3
+
4
+ /** Schema version — increment on breaking changes to session structure. */
5
+ const SCHEMA_VERSION = 1;
6
+
7
+ /**
8
+ * Validates and creates a normalized agent session object.
9
+ * Works for any agent type: MCP, browser executor, or external.
10
+ *
11
+ * @param {object} input - Raw session data
12
+ * @returns {object} Validated session object conforming to schema
13
+ */
14
+ function createSession(input = {}) {
15
+ return {
16
+ schemaVersion: SCHEMA_VERSION,
17
+ id: input.id || crypto.randomUUID(),
18
+ agentName: input.agentName || 'unknown',
19
+ agentType: validateAgentType(input.agentType),
20
+ model: input.model || null,
21
+
22
+ startedAt: input.startedAt || new Date().toISOString(),
23
+ endedAt: input.endedAt || null,
24
+ turnCount: input.turnCount || 0,
25
+ tokenEstimate: input.tokenEstimate || 0,
26
+
27
+ keys: Array.isArray(input.keys) ? input.keys : [],
28
+ context: normalizeContext(input.context),
29
+
30
+ parentSessionId: input.parentSessionId || null,
31
+ projectName: input.projectName || 'default',
32
+ };
33
+ }
34
+
35
+ /**
36
+ * Validates agent type string.
37
+ * @param {string} type
38
+ * @returns {'mcp'|'browser'|'external'}
39
+ */
40
+ function validateAgentType(type) {
41
+ const valid = ['mcp', 'browser', 'external'];
42
+ return valid.includes(type) ? type : 'external';
43
+ }
44
+
45
+ /**
46
+ * Normalizes context object with safe defaults.
47
+ * @param {object} ctx
48
+ * @returns {object}
49
+ */
50
+ function normalizeContext(ctx) {
51
+ const c = ctx || {};
52
+ return {
53
+ summary: c.summary || '',
54
+ decisions: Array.isArray(c.decisions) ? c.decisions : [],
55
+ filesChanged: Array.isArray(c.filesChanged) ? c.filesChanged : [],
56
+ todo: Array.isArray(c.todo) ? c.todo : [],
57
+ };
58
+ }
59
+
60
+ /**
61
+ * Merges two sessions (e.g., continuing from a parent session).
62
+ * The newer session's context takes priority; parent keys are preserved.
63
+ *
64
+ * @param {object} parent - Previous session
65
+ * @param {object} child - Current session
66
+ * @returns {object} Merged session
67
+ */
68
+ function mergeSession(parent, child) {
69
+ const merged = createSession(child);
70
+ merged.parentSessionId = parent.id;
71
+
72
+ // Merge keys (deduplicate)
73
+ const keySet = new Set([...parent.keys, ...merged.keys]);
74
+ merged.keys = Array.from(keySet);
75
+
76
+ // Carry forward unresolved TODOs from parent
77
+ const parentTodo = (parent.context?.todo || []).filter(Boolean);
78
+ const childTodo = merged.context.todo;
79
+ const todoSet = new Set([...parentTodo, ...childTodo]);
80
+ merged.context.todo = Array.from(todoSet);
81
+
82
+ return merged;
83
+ }
84
+
85
+ /**
86
+ * Migrates a snapshot from older schema versions to current.
87
+ * Ensures backward compatibility when loading old snapshots.
88
+ *
89
+ * @param {object} snapshot - Raw snapshot (possibly old version)
90
+ * @returns {object} Migrated snapshot at current schema version
91
+ */
92
+ function migrateSession(snapshot) {
93
+ if (!snapshot) return createSession();
94
+ const version = snapshot.schemaVersion || 0;
95
+
96
+ // v0 → v1: add schemaVersion field
97
+ if (version < 1) {
98
+ snapshot.schemaVersion = 1;
99
+ if (!snapshot.context) snapshot.context = normalizeContext({});
100
+ }
101
+
102
+ // Future migrations go here:
103
+ // if (version < 2) { ... }
104
+
105
+ return snapshot;
106
+ }
107
+
108
+ module.exports = { SCHEMA_VERSION, createSession, validateAgentType, normalizeContext, mergeSession, migrateSession };