@psiclawops/hypermem 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/background-indexer.d.ts +132 -0
- package/dist/background-indexer.d.ts.map +1 -0
- package/dist/background-indexer.js +1044 -0
- package/dist/cache.d.ts +110 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +495 -0
- package/dist/compaction-fence.d.ts +89 -0
- package/dist/compaction-fence.d.ts.map +1 -0
- package/dist/compaction-fence.js +153 -0
- package/dist/compositor.d.ts +226 -0
- package/dist/compositor.d.ts.map +1 -0
- package/dist/compositor.js +2558 -0
- package/dist/content-type-classifier.d.ts +41 -0
- package/dist/content-type-classifier.d.ts.map +1 -0
- package/dist/content-type-classifier.js +181 -0
- package/dist/cross-agent.d.ts +62 -0
- package/dist/cross-agent.d.ts.map +1 -0
- package/dist/cross-agent.js +259 -0
- package/dist/db.d.ts +131 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +402 -0
- package/dist/desired-state-store.d.ts +100 -0
- package/dist/desired-state-store.d.ts.map +1 -0
- package/dist/desired-state-store.js +222 -0
- package/dist/doc-chunk-store.d.ts +140 -0
- package/dist/doc-chunk-store.d.ts.map +1 -0
- package/dist/doc-chunk-store.js +391 -0
- package/dist/doc-chunker.d.ts +99 -0
- package/dist/doc-chunker.d.ts.map +1 -0
- package/dist/doc-chunker.js +324 -0
- package/dist/dreaming-promoter.d.ts +86 -0
- package/dist/dreaming-promoter.d.ts.map +1 -0
- package/dist/dreaming-promoter.js +381 -0
- package/dist/episode-store.d.ts +49 -0
- package/dist/episode-store.d.ts.map +1 -0
- package/dist/episode-store.js +135 -0
- package/dist/fact-store.d.ts +75 -0
- package/dist/fact-store.d.ts.map +1 -0
- package/dist/fact-store.js +236 -0
- package/dist/fleet-store.d.ts +144 -0
- package/dist/fleet-store.d.ts.map +1 -0
- package/dist/fleet-store.js +276 -0
- package/dist/fos-mod.d.ts +178 -0
- package/dist/fos-mod.d.ts.map +1 -0
- package/dist/fos-mod.js +416 -0
- package/dist/hybrid-retrieval.d.ts +64 -0
- package/dist/hybrid-retrieval.d.ts.map +1 -0
- package/dist/hybrid-retrieval.js +344 -0
- package/dist/image-eviction.d.ts +49 -0
- package/dist/image-eviction.d.ts.map +1 -0
- package/dist/image-eviction.js +251 -0
- package/dist/index.d.ts +650 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1072 -0
- package/dist/keystone-scorer.d.ts +51 -0
- package/dist/keystone-scorer.d.ts.map +1 -0
- package/dist/keystone-scorer.js +52 -0
- package/dist/knowledge-graph.d.ts +110 -0
- package/dist/knowledge-graph.d.ts.map +1 -0
- package/dist/knowledge-graph.js +305 -0
- package/dist/knowledge-lint.d.ts +29 -0
- package/dist/knowledge-lint.d.ts.map +1 -0
- package/dist/knowledge-lint.js +116 -0
- package/dist/knowledge-store.d.ts +72 -0
- package/dist/knowledge-store.d.ts.map +1 -0
- package/dist/knowledge-store.js +247 -0
- package/dist/library-schema.d.ts +22 -0
- package/dist/library-schema.d.ts.map +1 -0
- package/dist/library-schema.js +1038 -0
- package/dist/message-store.d.ts +89 -0
- package/dist/message-store.d.ts.map +1 -0
- package/dist/message-store.js +323 -0
- package/dist/metrics-dashboard.d.ts +114 -0
- package/dist/metrics-dashboard.d.ts.map +1 -0
- package/dist/metrics-dashboard.js +260 -0
- package/dist/obsidian-exporter.d.ts +57 -0
- package/dist/obsidian-exporter.d.ts.map +1 -0
- package/dist/obsidian-exporter.js +274 -0
- package/dist/obsidian-watcher.d.ts +147 -0
- package/dist/obsidian-watcher.d.ts.map +1 -0
- package/dist/obsidian-watcher.js +403 -0
- package/dist/open-domain.d.ts +46 -0
- package/dist/open-domain.d.ts.map +1 -0
- package/dist/open-domain.js +125 -0
- package/dist/preference-store.d.ts +54 -0
- package/dist/preference-store.d.ts.map +1 -0
- package/dist/preference-store.js +109 -0
- package/dist/preservation-gate.d.ts +82 -0
- package/dist/preservation-gate.d.ts.map +1 -0
- package/dist/preservation-gate.js +150 -0
- package/dist/proactive-pass.d.ts +63 -0
- package/dist/proactive-pass.d.ts.map +1 -0
- package/dist/proactive-pass.js +239 -0
- package/dist/profiles.d.ts +44 -0
- package/dist/profiles.d.ts.map +1 -0
- package/dist/profiles.js +227 -0
- package/dist/provider-translator.d.ts +50 -0
- package/dist/provider-translator.d.ts.map +1 -0
- package/dist/provider-translator.js +403 -0
- package/dist/rate-limiter.d.ts +76 -0
- package/dist/rate-limiter.d.ts.map +1 -0
- package/dist/rate-limiter.js +179 -0
- package/dist/repair-tool-pairs.d.ts +38 -0
- package/dist/repair-tool-pairs.d.ts.map +1 -0
- package/dist/repair-tool-pairs.js +138 -0
- package/dist/retrieval-policy.d.ts +51 -0
- package/dist/retrieval-policy.d.ts.map +1 -0
- package/dist/retrieval-policy.js +77 -0
- package/dist/schema.d.ts +15 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/schema.js +229 -0
- package/dist/secret-scanner.d.ts +51 -0
- package/dist/secret-scanner.d.ts.map +1 -0
- package/dist/secret-scanner.js +248 -0
- package/dist/seed.d.ts +108 -0
- package/dist/seed.d.ts.map +1 -0
- package/dist/seed.js +177 -0
- package/dist/session-flusher.d.ts +53 -0
- package/dist/session-flusher.d.ts.map +1 -0
- package/dist/session-flusher.js +69 -0
- package/dist/session-topic-map.d.ts +41 -0
- package/dist/session-topic-map.d.ts.map +1 -0
- package/dist/session-topic-map.js +77 -0
- package/dist/spawn-context.d.ts +54 -0
- package/dist/spawn-context.d.ts.map +1 -0
- package/dist/spawn-context.js +159 -0
- package/dist/system-store.d.ts +73 -0
- package/dist/system-store.d.ts.map +1 -0
- package/dist/system-store.js +182 -0
- package/dist/temporal-store.d.ts +80 -0
- package/dist/temporal-store.d.ts.map +1 -0
- package/dist/temporal-store.js +149 -0
- package/dist/topic-detector.d.ts +35 -0
- package/dist/topic-detector.d.ts.map +1 -0
- package/dist/topic-detector.js +249 -0
- package/dist/topic-store.d.ts +45 -0
- package/dist/topic-store.d.ts.map +1 -0
- package/dist/topic-store.js +136 -0
- package/dist/topic-synthesizer.d.ts +51 -0
- package/dist/topic-synthesizer.d.ts.map +1 -0
- package/dist/topic-synthesizer.js +315 -0
- package/dist/trigger-registry.d.ts +63 -0
- package/dist/trigger-registry.d.ts.map +1 -0
- package/dist/trigger-registry.js +163 -0
- package/dist/types.d.ts +533 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +9 -0
- package/dist/vector-store.d.ts +170 -0
- package/dist/vector-store.d.ts.map +1 -0
- package/dist/vector-store.js +677 -0
- package/dist/version.d.ts +34 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +34 -0
- package/dist/wiki-page-emitter.d.ts +65 -0
- package/dist/wiki-page-emitter.d.ts.map +1 -0
- package/dist/wiki-page-emitter.js +258 -0
- package/dist/work-store.d.ts +112 -0
- package/dist/work-store.d.ts.map +1 -0
- package/dist/work-store.js +273 -0
- package/package.json +1 -1
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* hypermem Document Chunk Store
|
|
3
|
+
*
|
|
4
|
+
* Manages doc_chunks in library.db:
|
|
5
|
+
* - Atomic re-indexing by source hash (no stale/fresh coexistence)
|
|
6
|
+
* - FTS5 keyword search fallback
|
|
7
|
+
* - Collection-scoped queries with agent/tier filtering
|
|
8
|
+
* - Source tracking (what's indexed, when, what hash)
|
|
9
|
+
*/
|
|
10
|
+
// ─── Store ──────────────────────────────────────────────────────
|
|
11
|
+
export class DocChunkStore {
|
|
12
|
+
db;
|
|
13
|
+
constructor(db) {
|
|
14
|
+
this.db = db;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Index a set of chunks for a source file.
|
|
18
|
+
*
|
|
19
|
+
* Atomic re-indexing:
|
|
20
|
+
* 1. Check if source_hash has changed
|
|
21
|
+
* 2. If unchanged: skip (idempotent)
|
|
22
|
+
* 3. If changed: delete all chunks with old hash, insert new chunks — in one transaction
|
|
23
|
+
*
|
|
24
|
+
* This ensures no window where stale and fresh chunks coexist.
|
|
25
|
+
*/
|
|
26
|
+
indexChunks(chunks) {
|
|
27
|
+
if (chunks.length === 0) {
|
|
28
|
+
return { inserted: 0, deleted: 0, reindexed: false, skipped: true };
|
|
29
|
+
}
|
|
30
|
+
const first = chunks[0];
|
|
31
|
+
const { sourcePath, collection, sourceHash, scope, agentId } = first;
|
|
32
|
+
const now = new Date().toISOString();
|
|
33
|
+
// Check current indexed state
|
|
34
|
+
const existing = this.db
|
|
35
|
+
.prepare('SELECT source_hash, chunk_count FROM doc_sources WHERE source_path = ? AND collection = ?')
|
|
36
|
+
.get(sourcePath, collection);
|
|
37
|
+
if (existing && existing.source_hash === sourceHash) {
|
|
38
|
+
// Hash unchanged — no-op
|
|
39
|
+
return { inserted: 0, deleted: 0, reindexed: false, skipped: true };
|
|
40
|
+
}
|
|
41
|
+
// Hash changed (or first index) — atomic swap
|
|
42
|
+
let deleted = 0;
|
|
43
|
+
let inserted = 0;
|
|
44
|
+
// Use a transaction for atomicity
|
|
45
|
+
const run = this.db.prepare('SELECT 1').get; // warm
|
|
46
|
+
try {
|
|
47
|
+
// Begin transaction via exec
|
|
48
|
+
this.db.exec('BEGIN');
|
|
49
|
+
// Delete stale chunks for this source
|
|
50
|
+
if (existing) {
|
|
51
|
+
const result = this.db
|
|
52
|
+
.prepare('DELETE FROM doc_chunks WHERE source_path = ? AND collection = ?')
|
|
53
|
+
.run(sourcePath, collection);
|
|
54
|
+
deleted = result.changes;
|
|
55
|
+
}
|
|
56
|
+
// Insert new chunks
|
|
57
|
+
const insertChunk = this.db.prepare(`
|
|
58
|
+
INSERT OR REPLACE INTO doc_chunks
|
|
59
|
+
(id, collection, section_path, depth, content, token_estimate,
|
|
60
|
+
source_hash, source_path, scope, tier, agent_id, parent_path,
|
|
61
|
+
created_at, updated_at)
|
|
62
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
63
|
+
`);
|
|
64
|
+
for (const chunk of chunks) {
|
|
65
|
+
insertChunk.run(chunk.id, chunk.collection, chunk.sectionPath, chunk.depth, chunk.content, chunk.tokenEstimate, chunk.sourceHash, chunk.sourcePath, chunk.scope, chunk.tier ?? null, chunk.agentId ?? null, chunk.parentPath ?? null, now, now);
|
|
66
|
+
inserted++;
|
|
67
|
+
}
|
|
68
|
+
// Update source tracking
|
|
69
|
+
this.db.prepare(`
|
|
70
|
+
INSERT OR REPLACE INTO doc_sources
|
|
71
|
+
(source_path, collection, scope, agent_id, source_hash, chunk_count, indexed_at)
|
|
72
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
73
|
+
`).run(sourcePath, collection, scope, agentId ?? null, sourceHash, inserted, now);
|
|
74
|
+
this.db.exec('COMMIT');
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
this.db.exec('ROLLBACK');
|
|
78
|
+
throw err;
|
|
79
|
+
}
|
|
80
|
+
return { inserted, deleted, reindexed: !!existing, skipped: false };
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Query chunks by collection with optional filters.
|
|
84
|
+
* Falls back to FTS5 keyword search when keyword is provided.
|
|
85
|
+
*/
|
|
86
|
+
queryChunks(query) {
|
|
87
|
+
const { collection, scope, agentId, tier, limit = 20, keyword } = query;
|
|
88
|
+
if (keyword) {
|
|
89
|
+
return this.keywordSearch(keyword, query);
|
|
90
|
+
}
|
|
91
|
+
// Build WHERE clause
|
|
92
|
+
const conditions = ['collection = ?'];
|
|
93
|
+
const params = [collection];
|
|
94
|
+
if (scope) {
|
|
95
|
+
conditions.push('scope = ?');
|
|
96
|
+
params.push(scope);
|
|
97
|
+
}
|
|
98
|
+
if (agentId) {
|
|
99
|
+
conditions.push('(agent_id = ? OR agent_id IS NULL)');
|
|
100
|
+
params.push(agentId);
|
|
101
|
+
}
|
|
102
|
+
if (tier) {
|
|
103
|
+
conditions.push('(tier = ? OR tier IS NULL OR tier = \'all\')');
|
|
104
|
+
params.push(tier);
|
|
105
|
+
}
|
|
106
|
+
params.push(limit);
|
|
107
|
+
const rows = this.db
|
|
108
|
+
.prepare(`
|
|
109
|
+
SELECT id, collection, section_path, depth, content, token_estimate,
|
|
110
|
+
source_hash, source_path, scope, tier, agent_id, parent_path,
|
|
111
|
+
created_at, updated_at
|
|
112
|
+
FROM doc_chunks
|
|
113
|
+
WHERE ${conditions.join(' AND ')}
|
|
114
|
+
ORDER BY depth ASC, section_path ASC
|
|
115
|
+
LIMIT ?
|
|
116
|
+
`)
|
|
117
|
+
.all(...params);
|
|
118
|
+
return rows.map(this.mapRow);
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* FTS5 keyword search across chunks.
|
|
122
|
+
*/
|
|
123
|
+
keywordSearch(keyword, query) {
|
|
124
|
+
const { collection, agentId, tier, limit = 20 } = query;
|
|
125
|
+
const hasFilters = !!(agentId || tier);
|
|
126
|
+
const innerLimit = hasFilters ? limit * 4 : limit;
|
|
127
|
+
// Two-phase: FTS in subquery, metadata filter on small result set.
|
|
128
|
+
let sql = `
|
|
129
|
+
SELECT c.id, c.collection, c.section_path, c.depth, c.content, c.token_estimate,
|
|
130
|
+
c.source_hash, c.source_path, c.scope, c.tier, c.agent_id, c.parent_path,
|
|
131
|
+
c.created_at, c.updated_at
|
|
132
|
+
FROM (
|
|
133
|
+
SELECT rowid, rank FROM doc_chunks_fts WHERE doc_chunks_fts MATCH ? ORDER BY rank LIMIT ?
|
|
134
|
+
) sub
|
|
135
|
+
JOIN doc_chunks c ON c.rowid = sub.rowid
|
|
136
|
+
WHERE c.collection = ?
|
|
137
|
+
`;
|
|
138
|
+
const params = [keyword, innerLimit, collection];
|
|
139
|
+
if (agentId) {
|
|
140
|
+
sql += ' AND (c.agent_id = ? OR c.agent_id IS NULL)';
|
|
141
|
+
params.push(agentId);
|
|
142
|
+
}
|
|
143
|
+
if (tier) {
|
|
144
|
+
sql += " AND (c.tier = ? OR c.tier IS NULL OR c.tier = 'all')";
|
|
145
|
+
params.push(tier);
|
|
146
|
+
}
|
|
147
|
+
sql += ' ORDER BY sub.rank LIMIT ?';
|
|
148
|
+
params.push(limit * 3); // over-fetch to allow dedup
|
|
149
|
+
const rows = this.db.prepare(sql).all(...params);
|
|
150
|
+
// Deduplicate by source_hash to avoid returning identical content
|
|
151
|
+
// from multiple agent-specific copies of shared-fleet docs.
|
|
152
|
+
const seenHashes = new Set();
|
|
153
|
+
const deduped = rows.filter(r => {
|
|
154
|
+
const hash = r['source_hash'];
|
|
155
|
+
if (!hash)
|
|
156
|
+
return true;
|
|
157
|
+
if (seenHashes.has(hash))
|
|
158
|
+
return false;
|
|
159
|
+
seenHashes.add(hash);
|
|
160
|
+
return true;
|
|
161
|
+
});
|
|
162
|
+
return deduped.slice(0, limit).map(this.mapRow);
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Get a single chunk by ID.
|
|
166
|
+
*/
|
|
167
|
+
getChunk(id) {
|
|
168
|
+
const row = this.db
|
|
169
|
+
.prepare(`
|
|
170
|
+
SELECT id, collection, section_path, depth, content, token_estimate,
|
|
171
|
+
source_hash, source_path, scope, tier, agent_id, parent_path,
|
|
172
|
+
created_at, updated_at
|
|
173
|
+
FROM doc_chunks WHERE id = ?
|
|
174
|
+
`)
|
|
175
|
+
.get(id);
|
|
176
|
+
return row ? this.mapRow(row) : null;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Check if a source file needs re-indexing.
|
|
180
|
+
* Returns true if the file has changed or has never been indexed.
|
|
181
|
+
*/
|
|
182
|
+
needsReindex(sourcePath, collection, currentHash) {
|
|
183
|
+
const row = this.db
|
|
184
|
+
.prepare('SELECT source_hash FROM doc_sources WHERE source_path = ? AND collection = ?')
|
|
185
|
+
.get(sourcePath, collection);
|
|
186
|
+
return !row || row.source_hash !== currentHash;
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* List all indexed sources, optionally filtered by agent or collection.
|
|
190
|
+
*/
|
|
191
|
+
listSources(opts) {
|
|
192
|
+
const conditions = [];
|
|
193
|
+
const params = [];
|
|
194
|
+
if (opts?.agentId) {
|
|
195
|
+
conditions.push('agent_id = ?');
|
|
196
|
+
params.push(opts.agentId);
|
|
197
|
+
}
|
|
198
|
+
if (opts?.collection) {
|
|
199
|
+
conditions.push('collection = ?');
|
|
200
|
+
params.push(opts.collection);
|
|
201
|
+
}
|
|
202
|
+
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
203
|
+
const rows = this.db
|
|
204
|
+
.prepare(`
|
|
205
|
+
SELECT source_path, collection, scope, agent_id, source_hash, chunk_count, indexed_at
|
|
206
|
+
FROM doc_sources ${where}
|
|
207
|
+
ORDER BY indexed_at DESC
|
|
208
|
+
`)
|
|
209
|
+
.all(...params);
|
|
210
|
+
return rows.map(r => ({
|
|
211
|
+
sourcePath: r['source_path'],
|
|
212
|
+
collection: r['collection'],
|
|
213
|
+
scope: r['scope'],
|
|
214
|
+
agentId: r['agent_id'],
|
|
215
|
+
sourceHash: r['source_hash'],
|
|
216
|
+
chunkCount: r['chunk_count'],
|
|
217
|
+
indexedAt: r['indexed_at'],
|
|
218
|
+
}));
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Delete all chunks for a specific source file.
|
|
222
|
+
*/
|
|
223
|
+
deleteSource(sourcePath, collection) {
|
|
224
|
+
this.db.exec('BEGIN');
|
|
225
|
+
try {
|
|
226
|
+
const result = this.db
|
|
227
|
+
.prepare('DELETE FROM doc_chunks WHERE source_path = ? AND collection = ?')
|
|
228
|
+
.run(sourcePath, collection);
|
|
229
|
+
this.db.prepare('DELETE FROM doc_sources WHERE source_path = ? AND collection = ?')
|
|
230
|
+
.run(sourcePath, collection);
|
|
231
|
+
this.db.exec('COMMIT');
|
|
232
|
+
return result.changes;
|
|
233
|
+
}
|
|
234
|
+
catch (err) {
|
|
235
|
+
this.db.exec('ROLLBACK');
|
|
236
|
+
throw err;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Index simple string chunks with an optional session key (for ephemeral spawn context).
|
|
241
|
+
*
|
|
242
|
+
* Unlike indexChunks() which works with DocChunk objects and hash-based dedup,
|
|
243
|
+
* this method is designed for ad-hoc session-scoped content: it always inserts fresh
|
|
244
|
+
* rows tagged with the sessionKey, without hash-based skip logic.
|
|
245
|
+
*
|
|
246
|
+
* Chunks stored with a sessionKey are ephemeral — use clearSessionChunks() to remove them.
|
|
247
|
+
*/
|
|
248
|
+
indexDocChunks(agentId, source, chunks, options) {
|
|
249
|
+
if (chunks.length === 0)
|
|
250
|
+
return;
|
|
251
|
+
const now = new Date().toISOString();
|
|
252
|
+
const sessionKey = options?.sessionKey ?? null;
|
|
253
|
+
// Use a stable collection name derived from source path
|
|
254
|
+
const collection = `spawn/${agentId}`;
|
|
255
|
+
try {
|
|
256
|
+
this.db.exec('BEGIN');
|
|
257
|
+
const insert = this.db.prepare(`
|
|
258
|
+
INSERT INTO doc_chunks
|
|
259
|
+
(id, collection, section_path, depth, content, token_estimate,
|
|
260
|
+
source_hash, source_path, scope, tier, agent_id, parent_path,
|
|
261
|
+
session_key, created_at, updated_at)
|
|
262
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
263
|
+
`);
|
|
264
|
+
chunks.forEach((chunkContent, idx) => {
|
|
265
|
+
const id = `spawn:${agentId}:${sessionKey ?? 'none'}:${source}:${idx}:${Date.now()}`;
|
|
266
|
+
const tokenEstimate = Math.ceil(chunkContent.length / 4);
|
|
267
|
+
insert.run(id, collection, `${source}#chunk-${idx}`, 2, chunkContent, tokenEstimate, `spawn-${Date.now()}-${idx}`, // non-deduped hash
|
|
268
|
+
source, 'per-agent', null, agentId, null, sessionKey, now, now);
|
|
269
|
+
});
|
|
270
|
+
this.db.exec('COMMIT');
|
|
271
|
+
}
|
|
272
|
+
catch (err) {
|
|
273
|
+
try {
|
|
274
|
+
this.db.exec('ROLLBACK');
|
|
275
|
+
}
|
|
276
|
+
catch { /* ignore */ }
|
|
277
|
+
console.warn('[hypermem:doc-chunk-store] indexDocChunks failed:', err.message);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Query doc chunks by agentId+query string, with optional session key scoping.
|
|
282
|
+
* When sessionKey is provided, only chunks tagged with that session key are returned.
|
|
283
|
+
*/
|
|
284
|
+
queryDocChunks(agentId, query, options) {
|
|
285
|
+
const limit = options?.limit ?? 10;
|
|
286
|
+
const sessionKey = options?.sessionKey;
|
|
287
|
+
const collection = `spawn/${agentId}`;
|
|
288
|
+
try {
|
|
289
|
+
if (query.trim() && query.trim().length >= 3) {
|
|
290
|
+
// FTS5 keyword search
|
|
291
|
+
let sql = `
|
|
292
|
+
SELECT c.id, c.collection, c.section_path, c.depth, c.content, c.token_estimate,
|
|
293
|
+
c.source_hash, c.source_path, c.scope, c.tier, c.agent_id, c.parent_path,
|
|
294
|
+
c.created_at, c.updated_at
|
|
295
|
+
FROM (
|
|
296
|
+
SELECT rowid, rank FROM doc_chunks_fts WHERE doc_chunks_fts MATCH ? ORDER BY rank LIMIT ?
|
|
297
|
+
) sub
|
|
298
|
+
JOIN doc_chunks c ON c.rowid = sub.rowid
|
|
299
|
+
WHERE c.collection = ?
|
|
300
|
+
`;
|
|
301
|
+
const params = [query, limit * 3, collection];
|
|
302
|
+
if (sessionKey !== undefined) {
|
|
303
|
+
sql += ' AND c.session_key = ?';
|
|
304
|
+
params.push(sessionKey);
|
|
305
|
+
}
|
|
306
|
+
sql += ' ORDER BY sub.rank LIMIT ?';
|
|
307
|
+
params.push(limit);
|
|
308
|
+
const rows = this.db.prepare(sql).all(...params);
|
|
309
|
+
return rows.map(this.mapRow);
|
|
310
|
+
}
|
|
311
|
+
else {
|
|
312
|
+
// Fallback: return most recent chunks for this session
|
|
313
|
+
let sql = `
|
|
314
|
+
SELECT id, collection, section_path, depth, content, token_estimate,
|
|
315
|
+
source_hash, source_path, scope, tier, agent_id, parent_path,
|
|
316
|
+
created_at, updated_at
|
|
317
|
+
FROM doc_chunks
|
|
318
|
+
WHERE collection = ?
|
|
319
|
+
`;
|
|
320
|
+
const params = [collection];
|
|
321
|
+
if (sessionKey !== undefined) {
|
|
322
|
+
sql += ' AND session_key = ?';
|
|
323
|
+
params.push(sessionKey);
|
|
324
|
+
}
|
|
325
|
+
sql += ' ORDER BY created_at DESC LIMIT ?';
|
|
326
|
+
params.push(limit);
|
|
327
|
+
const rows = this.db.prepare(sql).all(...params);
|
|
328
|
+
return rows.map(this.mapRow);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
catch (err) {
|
|
332
|
+
console.warn('[hypermem:doc-chunk-store] queryDocChunks failed:', err.message);
|
|
333
|
+
return [];
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Delete all doc chunks associated with a specific session key.
|
|
338
|
+
* Call this when a spawn session is complete to release ephemeral storage.
|
|
339
|
+
*/
|
|
340
|
+
clearSessionChunks(sessionKey) {
|
|
341
|
+
try {
|
|
342
|
+
const result = this.db
|
|
343
|
+
.prepare('DELETE FROM doc_chunks WHERE session_key = ?')
|
|
344
|
+
.run(sessionKey);
|
|
345
|
+
return result.changes;
|
|
346
|
+
}
|
|
347
|
+
catch (err) {
|
|
348
|
+
console.warn('[hypermem:doc-chunk-store] clearSessionChunks failed:', err.message);
|
|
349
|
+
return 0;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Get chunk stats: count per collection.
|
|
354
|
+
*/
|
|
355
|
+
getStats() {
|
|
356
|
+
const rows = this.db.prepare(`
|
|
357
|
+
SELECT collection,
|
|
358
|
+
COUNT(*) as count,
|
|
359
|
+
COUNT(DISTINCT source_path) as sources,
|
|
360
|
+
SUM(token_estimate) as total_tokens
|
|
361
|
+
FROM doc_chunks
|
|
362
|
+
GROUP BY collection
|
|
363
|
+
ORDER BY collection
|
|
364
|
+
`).all();
|
|
365
|
+
return rows.map(r => ({
|
|
366
|
+
collection: r['collection'],
|
|
367
|
+
count: r['count'],
|
|
368
|
+
sources: r['sources'],
|
|
369
|
+
totalTokens: r['total_tokens'] ?? 0,
|
|
370
|
+
}));
|
|
371
|
+
}
|
|
372
|
+
mapRow(r) {
|
|
373
|
+
return {
|
|
374
|
+
id: r['id'],
|
|
375
|
+
collection: r['collection'],
|
|
376
|
+
sectionPath: r['section_path'],
|
|
377
|
+
depth: r['depth'],
|
|
378
|
+
content: r['content'],
|
|
379
|
+
tokenEstimate: r['token_estimate'],
|
|
380
|
+
sourceHash: r['source_hash'],
|
|
381
|
+
sourcePath: r['source_path'],
|
|
382
|
+
scope: r['scope'],
|
|
383
|
+
tier: r['tier'],
|
|
384
|
+
agentId: r['agent_id'],
|
|
385
|
+
parentPath: r['parent_path'],
|
|
386
|
+
createdAt: r['created_at'],
|
|
387
|
+
updatedAt: r['updated_at'],
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
//# sourceMappingURL=doc-chunk-store.js.map
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* hypermem Document Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits markdown documents into semantically coherent chunks for L3 indexing.
|
|
5
|
+
*
|
|
6
|
+
* Design principles:
|
|
7
|
+
* - Chunk by logical section (## / ###), NOT by token count
|
|
8
|
+
* - Each chunk is a self-contained policy/operational unit
|
|
9
|
+
* - Preserve section hierarchy for context assembly
|
|
10
|
+
* - Track source file hash for atomic re-indexing
|
|
11
|
+
* - Idempotent: same source produces same chunks (deterministic IDs)
|
|
12
|
+
*
|
|
13
|
+
* Collections (as defined in ACA offload spec):
|
|
14
|
+
* governance/policy — POLICY.md, shared-fleet
|
|
15
|
+
* governance/charter — CHARTER.md, per-tier (council/director)
|
|
16
|
+
* governance/comms — COMMS.md, shared-fleet
|
|
17
|
+
* operations/agents — AGENTS.md, per-tier
|
|
18
|
+
* operations/tools — TOOLS.md, per-agent
|
|
19
|
+
* memory/decisions — MEMORY.md, per-agent
|
|
20
|
+
* memory/daily — memory/YYYY-MM-DD.md, per-agent
|
|
21
|
+
* identity/soul — SOUL.md, per-agent (always-loaded kernel, but still indexed)
|
|
22
|
+
* identity/job — JOB.md, per-agent (demand-loaded during deliberation)
|
|
23
|
+
*/
|
|
24
|
+
export interface DocChunk {
|
|
25
|
+
/** Unique deterministic ID: sha256(collection + sectionPath + sourceHash) */
|
|
26
|
+
id: string;
|
|
27
|
+
/** Collection path: governance/policy, operations/tools, etc. */
|
|
28
|
+
collection: string;
|
|
29
|
+
/** Full section path: "§3 > Naming > Single-Name Rule" */
|
|
30
|
+
sectionPath: string;
|
|
31
|
+
/** Section depth (0=root, 1=#, 2=##, 3=###) */
|
|
32
|
+
depth: number;
|
|
33
|
+
/** The actual text content of this chunk */
|
|
34
|
+
content: string;
|
|
35
|
+
/** Token estimate (rough: chars / 4) */
|
|
36
|
+
tokenEstimate: number;
|
|
37
|
+
/** SHA-256 of the source file at time of chunking */
|
|
38
|
+
sourceHash: string;
|
|
39
|
+
/** Source file path (relative to workspace) */
|
|
40
|
+
sourcePath: string;
|
|
41
|
+
/** Scope: shared-fleet | per-tier | per-agent */
|
|
42
|
+
scope: 'shared-fleet' | 'per-tier' | 'per-agent';
|
|
43
|
+
/** Tier filter (for per-tier scope): council | director | all */
|
|
44
|
+
tier?: string;
|
|
45
|
+
/** Agent ID (for per-agent scope) */
|
|
46
|
+
agentId?: string;
|
|
47
|
+
/** Parent section path (for hierarchy context) */
|
|
48
|
+
parentPath?: string;
|
|
49
|
+
}
|
|
50
|
+
export interface ChunkOptions {
|
|
51
|
+
collection: string;
|
|
52
|
+
sourcePath: string;
|
|
53
|
+
scope: DocChunk['scope'];
|
|
54
|
+
tier?: string;
|
|
55
|
+
agentId?: string;
|
|
56
|
+
/** Minimum content length to emit a chunk (avoids empty section headers) */
|
|
57
|
+
minContentLen?: number;
|
|
58
|
+
/** Whether to include parent context prefix in chunk content */
|
|
59
|
+
includeParentContext?: boolean;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Hash a string with SHA-256.
|
|
63
|
+
*/
|
|
64
|
+
export declare function hashContent(content: string): string;
|
|
65
|
+
/**
|
|
66
|
+
* Chunk a markdown document into semantic sections.
|
|
67
|
+
*
|
|
68
|
+
* Approach:
|
|
69
|
+
* - Level 1 (#) headings become top-level section anchors
|
|
70
|
+
* - Level 2 (##) headings become primary chunks
|
|
71
|
+
* - Level 3 (###) headings become sub-chunks under their parent
|
|
72
|
+
* - Content before the first heading becomes a "preamble" chunk
|
|
73
|
+
* - Empty sections (heading only, no content) are skipped unless minContentLen=0
|
|
74
|
+
*
|
|
75
|
+
* For documents with deeply nested content, we group level-3 sections under
|
|
76
|
+
* their parent level-2 section. This keeps related policy sections together.
|
|
77
|
+
*/
|
|
78
|
+
export declare function chunkMarkdown(content: string, opts: ChunkOptions): DocChunk[];
|
|
79
|
+
/**
|
|
80
|
+
* Chunk a file from disk.
|
|
81
|
+
*/
|
|
82
|
+
export declare function chunkFile(filePath: string, opts: Omit<ChunkOptions, 'sourcePath'>): DocChunk[];
|
|
83
|
+
/**
|
|
84
|
+
* Standard collection definitions for ACA workspace files.
|
|
85
|
+
* Maps file names to collection paths and scope metadata.
|
|
86
|
+
*/
|
|
87
|
+
export interface CollectionDef {
|
|
88
|
+
collection: string;
|
|
89
|
+
scope: DocChunk['scope'];
|
|
90
|
+
tier?: string;
|
|
91
|
+
description: string;
|
|
92
|
+
}
|
|
93
|
+
export declare const ACA_COLLECTIONS: Record<string, CollectionDef>;
|
|
94
|
+
/**
|
|
95
|
+
* Infer the collection definition for a file based on its name.
|
|
96
|
+
* Returns undefined if the file is not a known ACA file.
|
|
97
|
+
*/
|
|
98
|
+
export declare function inferCollection(fileName: string, agentId?: string): CollectionDef | undefined;
|
|
99
|
+
//# sourceMappingURL=doc-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"doc-chunker.d.ts","sourceRoot":"","sources":["../src/doc-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAOH,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,EAAE,EAAE,MAAM,CAAC;IACX,iEAAiE;IACjE,UAAU,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,WAAW,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAChB,wCAAwC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,qDAAqD;IACrD,UAAU,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,UAAU,EAAE,MAAM,CAAC;IACnB,iDAAiD;IACjD,KAAK,EAAE,cAAc,GAAG,UAAU,GAAG,WAAW,CAAC;IACjD,iEAAiE;IACjE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qCAAqC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4EAA4E;IAC5E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gEAAgE;IAChE,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC;AAID;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEnD;AAgFD;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,QAAQ,EAAE,CA+I7E;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,GAAG,QAAQ,EAAE,CAG9F;AAID;;;GAGG;AACH,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CA8CzD,CAAC;AAEF;;;GAGG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS,CAc7F"}
|