@psiclawops/hypermem 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/ARCHITECTURE.md +12 -3
  2. package/README.md +30 -6
  3. package/bin/hypermem-status.mjs +166 -0
  4. package/dist/background-indexer.d.ts +132 -0
  5. package/dist/background-indexer.d.ts.map +1 -0
  6. package/dist/background-indexer.js +1044 -0
  7. package/dist/cache.d.ts +110 -0
  8. package/dist/cache.d.ts.map +1 -0
  9. package/dist/cache.js +495 -0
  10. package/dist/compaction-fence.d.ts +89 -0
  11. package/dist/compaction-fence.d.ts.map +1 -0
  12. package/dist/compaction-fence.js +153 -0
  13. package/dist/compositor.d.ts +226 -0
  14. package/dist/compositor.d.ts.map +1 -0
  15. package/dist/compositor.js +2558 -0
  16. package/dist/content-type-classifier.d.ts +41 -0
  17. package/dist/content-type-classifier.d.ts.map +1 -0
  18. package/dist/content-type-classifier.js +181 -0
  19. package/dist/cross-agent.d.ts +62 -0
  20. package/dist/cross-agent.d.ts.map +1 -0
  21. package/dist/cross-agent.js +259 -0
  22. package/dist/db.d.ts +131 -0
  23. package/dist/db.d.ts.map +1 -0
  24. package/dist/db.js +402 -0
  25. package/dist/desired-state-store.d.ts +100 -0
  26. package/dist/desired-state-store.d.ts.map +1 -0
  27. package/dist/desired-state-store.js +222 -0
  28. package/dist/doc-chunk-store.d.ts +140 -0
  29. package/dist/doc-chunk-store.d.ts.map +1 -0
  30. package/dist/doc-chunk-store.js +391 -0
  31. package/dist/doc-chunker.d.ts +99 -0
  32. package/dist/doc-chunker.d.ts.map +1 -0
  33. package/dist/doc-chunker.js +324 -0
  34. package/dist/dreaming-promoter.d.ts +86 -0
  35. package/dist/dreaming-promoter.d.ts.map +1 -0
  36. package/dist/dreaming-promoter.js +381 -0
  37. package/dist/episode-store.d.ts +49 -0
  38. package/dist/episode-store.d.ts.map +1 -0
  39. package/dist/episode-store.js +135 -0
  40. package/dist/fact-store.d.ts +75 -0
  41. package/dist/fact-store.d.ts.map +1 -0
  42. package/dist/fact-store.js +236 -0
  43. package/dist/fleet-store.d.ts +144 -0
  44. package/dist/fleet-store.d.ts.map +1 -0
  45. package/dist/fleet-store.js +276 -0
  46. package/dist/fos-mod.d.ts +178 -0
  47. package/dist/fos-mod.d.ts.map +1 -0
  48. package/dist/fos-mod.js +416 -0
  49. package/dist/hybrid-retrieval.d.ts +64 -0
  50. package/dist/hybrid-retrieval.d.ts.map +1 -0
  51. package/dist/hybrid-retrieval.js +344 -0
  52. package/dist/image-eviction.d.ts +49 -0
  53. package/dist/image-eviction.d.ts.map +1 -0
  54. package/dist/image-eviction.js +251 -0
  55. package/dist/index.d.ts +650 -0
  56. package/dist/index.d.ts.map +1 -0
  57. package/dist/index.js +1072 -0
  58. package/dist/keystone-scorer.d.ts +51 -0
  59. package/dist/keystone-scorer.d.ts.map +1 -0
  60. package/dist/keystone-scorer.js +52 -0
  61. package/dist/knowledge-graph.d.ts +110 -0
  62. package/dist/knowledge-graph.d.ts.map +1 -0
  63. package/dist/knowledge-graph.js +305 -0
  64. package/dist/knowledge-lint.d.ts +29 -0
  65. package/dist/knowledge-lint.d.ts.map +1 -0
  66. package/dist/knowledge-lint.js +116 -0
  67. package/dist/knowledge-store.d.ts +72 -0
  68. package/dist/knowledge-store.d.ts.map +1 -0
  69. package/dist/knowledge-store.js +247 -0
  70. package/dist/library-schema.d.ts +22 -0
  71. package/dist/library-schema.d.ts.map +1 -0
  72. package/dist/library-schema.js +1038 -0
  73. package/dist/message-store.d.ts +89 -0
  74. package/dist/message-store.d.ts.map +1 -0
  75. package/dist/message-store.js +323 -0
  76. package/dist/metrics-dashboard.d.ts +114 -0
  77. package/dist/metrics-dashboard.d.ts.map +1 -0
  78. package/dist/metrics-dashboard.js +260 -0
  79. package/dist/obsidian-exporter.d.ts +57 -0
  80. package/dist/obsidian-exporter.d.ts.map +1 -0
  81. package/dist/obsidian-exporter.js +274 -0
  82. package/dist/obsidian-watcher.d.ts +147 -0
  83. package/dist/obsidian-watcher.d.ts.map +1 -0
  84. package/dist/obsidian-watcher.js +403 -0
  85. package/dist/open-domain.d.ts +46 -0
  86. package/dist/open-domain.d.ts.map +1 -0
  87. package/dist/open-domain.js +125 -0
  88. package/dist/preference-store.d.ts +54 -0
  89. package/dist/preference-store.d.ts.map +1 -0
  90. package/dist/preference-store.js +109 -0
  91. package/dist/preservation-gate.d.ts +82 -0
  92. package/dist/preservation-gate.d.ts.map +1 -0
  93. package/dist/preservation-gate.js +150 -0
  94. package/dist/proactive-pass.d.ts +63 -0
  95. package/dist/proactive-pass.d.ts.map +1 -0
  96. package/dist/proactive-pass.js +239 -0
  97. package/dist/profiles.d.ts +44 -0
  98. package/dist/profiles.d.ts.map +1 -0
  99. package/dist/profiles.js +227 -0
  100. package/dist/provider-translator.d.ts +50 -0
  101. package/dist/provider-translator.d.ts.map +1 -0
  102. package/dist/provider-translator.js +403 -0
  103. package/dist/rate-limiter.d.ts +76 -0
  104. package/dist/rate-limiter.d.ts.map +1 -0
  105. package/dist/rate-limiter.js +179 -0
  106. package/dist/repair-tool-pairs.d.ts +38 -0
  107. package/dist/repair-tool-pairs.d.ts.map +1 -0
  108. package/dist/repair-tool-pairs.js +138 -0
  109. package/dist/retrieval-policy.d.ts +51 -0
  110. package/dist/retrieval-policy.d.ts.map +1 -0
  111. package/dist/retrieval-policy.js +77 -0
  112. package/dist/schema.d.ts +15 -0
  113. package/dist/schema.d.ts.map +1 -0
  114. package/dist/schema.js +229 -0
  115. package/dist/secret-scanner.d.ts +51 -0
  116. package/dist/secret-scanner.d.ts.map +1 -0
  117. package/dist/secret-scanner.js +248 -0
  118. package/dist/seed.d.ts +108 -0
  119. package/dist/seed.d.ts.map +1 -0
  120. package/dist/seed.js +177 -0
  121. package/dist/session-flusher.d.ts +53 -0
  122. package/dist/session-flusher.d.ts.map +1 -0
  123. package/dist/session-flusher.js +69 -0
  124. package/dist/session-topic-map.d.ts +41 -0
  125. package/dist/session-topic-map.d.ts.map +1 -0
  126. package/dist/session-topic-map.js +77 -0
  127. package/dist/spawn-context.d.ts +54 -0
  128. package/dist/spawn-context.d.ts.map +1 -0
  129. package/dist/spawn-context.js +159 -0
  130. package/dist/system-store.d.ts +73 -0
  131. package/dist/system-store.d.ts.map +1 -0
  132. package/dist/system-store.js +182 -0
  133. package/dist/temporal-store.d.ts +80 -0
  134. package/dist/temporal-store.d.ts.map +1 -0
  135. package/dist/temporal-store.js +149 -0
  136. package/dist/topic-detector.d.ts +35 -0
  137. package/dist/topic-detector.d.ts.map +1 -0
  138. package/dist/topic-detector.js +249 -0
  139. package/dist/topic-store.d.ts +45 -0
  140. package/dist/topic-store.d.ts.map +1 -0
  141. package/dist/topic-store.js +136 -0
  142. package/dist/topic-synthesizer.d.ts +51 -0
  143. package/dist/topic-synthesizer.d.ts.map +1 -0
  144. package/dist/topic-synthesizer.js +315 -0
  145. package/dist/trigger-registry.d.ts +63 -0
  146. package/dist/trigger-registry.d.ts.map +1 -0
  147. package/dist/trigger-registry.js +163 -0
  148. package/dist/types.d.ts +537 -0
  149. package/dist/types.d.ts.map +1 -0
  150. package/dist/types.js +9 -0
  151. package/dist/vector-store.d.ts +170 -0
  152. package/dist/vector-store.d.ts.map +1 -0
  153. package/dist/vector-store.js +677 -0
  154. package/dist/version.d.ts +34 -0
  155. package/dist/version.d.ts.map +1 -0
  156. package/dist/version.js +34 -0
  157. package/dist/wiki-page-emitter.d.ts +65 -0
  158. package/dist/wiki-page-emitter.d.ts.map +1 -0
  159. package/dist/wiki-page-emitter.js +258 -0
  160. package/dist/work-store.d.ts +112 -0
  161. package/dist/work-store.d.ts.map +1 -0
  162. package/dist/work-store.js +273 -0
  163. package/package.json +4 -1
@@ -0,0 +1,391 @@
1
+ /**
2
+ * hypermem Document Chunk Store
3
+ *
4
+ * Manages doc_chunks in library.db:
5
+ * - Atomic re-indexing by source hash (no stale/fresh coexistence)
6
+ * - FTS5 keyword search fallback
7
+ * - Collection-scoped queries with agent/tier filtering
8
+ * - Source tracking (what's indexed, when, what hash)
9
+ */
10
+ // ─── Store ──────────────────────────────────────────────────────
11
+ export class DocChunkStore {
12
+ db;
13
+ constructor(db) {
14
+ this.db = db;
15
+ }
16
+ /**
17
+ * Index a set of chunks for a source file.
18
+ *
19
+ * Atomic re-indexing:
20
+ * 1. Check if source_hash has changed
21
+ * 2. If unchanged: skip (idempotent)
22
+ * 3. If changed: delete all chunks with old hash, insert new chunks — in one transaction
23
+ *
24
+ * This ensures no window where stale and fresh chunks coexist.
25
+ */
26
+ indexChunks(chunks) {
27
+ if (chunks.length === 0) {
28
+ return { inserted: 0, deleted: 0, reindexed: false, skipped: true };
29
+ }
30
+ const first = chunks[0];
31
+ const { sourcePath, collection, sourceHash, scope, agentId } = first;
32
+ const now = new Date().toISOString();
33
+ // Check current indexed state
34
+ const existing = this.db
35
+ .prepare('SELECT source_hash, chunk_count FROM doc_sources WHERE source_path = ? AND collection = ?')
36
+ .get(sourcePath, collection);
37
+ if (existing && existing.source_hash === sourceHash) {
38
+ // Hash unchanged — no-op
39
+ return { inserted: 0, deleted: 0, reindexed: false, skipped: true };
40
+ }
41
+ // Hash changed (or first index) — atomic swap
42
+ let deleted = 0;
43
+ let inserted = 0;
44
+ // Use a transaction for atomicity
45
+ const run = this.db.prepare('SELECT 1').get; // warm
46
+ try {
47
+ // Begin transaction via exec
48
+ this.db.exec('BEGIN');
49
+ // Delete stale chunks for this source
50
+ if (existing) {
51
+ const result = this.db
52
+ .prepare('DELETE FROM doc_chunks WHERE source_path = ? AND collection = ?')
53
+ .run(sourcePath, collection);
54
+ deleted = result.changes;
55
+ }
56
+ // Insert new chunks
57
+ const insertChunk = this.db.prepare(`
58
+ INSERT OR REPLACE INTO doc_chunks
59
+ (id, collection, section_path, depth, content, token_estimate,
60
+ source_hash, source_path, scope, tier, agent_id, parent_path,
61
+ created_at, updated_at)
62
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
63
+ `);
64
+ for (const chunk of chunks) {
65
+ insertChunk.run(chunk.id, chunk.collection, chunk.sectionPath, chunk.depth, chunk.content, chunk.tokenEstimate, chunk.sourceHash, chunk.sourcePath, chunk.scope, chunk.tier ?? null, chunk.agentId ?? null, chunk.parentPath ?? null, now, now);
66
+ inserted++;
67
+ }
68
+ // Update source tracking
69
+ this.db.prepare(`
70
+ INSERT OR REPLACE INTO doc_sources
71
+ (source_path, collection, scope, agent_id, source_hash, chunk_count, indexed_at)
72
+ VALUES (?, ?, ?, ?, ?, ?, ?)
73
+ `).run(sourcePath, collection, scope, agentId ?? null, sourceHash, inserted, now);
74
+ this.db.exec('COMMIT');
75
+ }
76
+ catch (err) {
77
+ this.db.exec('ROLLBACK');
78
+ throw err;
79
+ }
80
+ return { inserted, deleted, reindexed: !!existing, skipped: false };
81
+ }
82
+ /**
83
+ * Query chunks by collection with optional filters.
84
+ * Falls back to FTS5 keyword search when keyword is provided.
85
+ */
86
+ queryChunks(query) {
87
+ const { collection, scope, agentId, tier, limit = 20, keyword } = query;
88
+ if (keyword) {
89
+ return this.keywordSearch(keyword, query);
90
+ }
91
+ // Build WHERE clause
92
+ const conditions = ['collection = ?'];
93
+ const params = [collection];
94
+ if (scope) {
95
+ conditions.push('scope = ?');
96
+ params.push(scope);
97
+ }
98
+ if (agentId) {
99
+ conditions.push('(agent_id = ? OR agent_id IS NULL)');
100
+ params.push(agentId);
101
+ }
102
+ if (tier) {
103
+ conditions.push('(tier = ? OR tier IS NULL OR tier = \'all\')');
104
+ params.push(tier);
105
+ }
106
+ params.push(limit);
107
+ const rows = this.db
108
+ .prepare(`
109
+ SELECT id, collection, section_path, depth, content, token_estimate,
110
+ source_hash, source_path, scope, tier, agent_id, parent_path,
111
+ created_at, updated_at
112
+ FROM doc_chunks
113
+ WHERE ${conditions.join(' AND ')}
114
+ ORDER BY depth ASC, section_path ASC
115
+ LIMIT ?
116
+ `)
117
+ .all(...params);
118
+ return rows.map(this.mapRow);
119
+ }
120
+ /**
121
+ * FTS5 keyword search across chunks.
122
+ */
123
+ keywordSearch(keyword, query) {
124
+ const { collection, agentId, tier, limit = 20 } = query;
125
+ const hasFilters = !!(agentId || tier);
126
+ const innerLimit = hasFilters ? limit * 4 : limit;
127
+ // Two-phase: FTS in subquery, metadata filter on small result set.
128
+ let sql = `
129
+ SELECT c.id, c.collection, c.section_path, c.depth, c.content, c.token_estimate,
130
+ c.source_hash, c.source_path, c.scope, c.tier, c.agent_id, c.parent_path,
131
+ c.created_at, c.updated_at
132
+ FROM (
133
+ SELECT rowid, rank FROM doc_chunks_fts WHERE doc_chunks_fts MATCH ? ORDER BY rank LIMIT ?
134
+ ) sub
135
+ JOIN doc_chunks c ON c.rowid = sub.rowid
136
+ WHERE c.collection = ?
137
+ `;
138
+ const params = [keyword, innerLimit, collection];
139
+ if (agentId) {
140
+ sql += ' AND (c.agent_id = ? OR c.agent_id IS NULL)';
141
+ params.push(agentId);
142
+ }
143
+ if (tier) {
144
+ sql += " AND (c.tier = ? OR c.tier IS NULL OR c.tier = 'all')";
145
+ params.push(tier);
146
+ }
147
+ sql += ' ORDER BY sub.rank LIMIT ?';
148
+ params.push(limit * 3); // over-fetch to allow dedup
149
+ const rows = this.db.prepare(sql).all(...params);
150
+ // Deduplicate by source_hash to avoid returning identical content
151
+ // from multiple agent-specific copies of shared-fleet docs.
152
+ const seenHashes = new Set();
153
+ const deduped = rows.filter(r => {
154
+ const hash = r['source_hash'];
155
+ if (!hash)
156
+ return true;
157
+ if (seenHashes.has(hash))
158
+ return false;
159
+ seenHashes.add(hash);
160
+ return true;
161
+ });
162
+ return deduped.slice(0, limit).map(this.mapRow);
163
+ }
164
+ /**
165
+ * Get a single chunk by ID.
166
+ */
167
+ getChunk(id) {
168
+ const row = this.db
169
+ .prepare(`
170
+ SELECT id, collection, section_path, depth, content, token_estimate,
171
+ source_hash, source_path, scope, tier, agent_id, parent_path,
172
+ created_at, updated_at
173
+ FROM doc_chunks WHERE id = ?
174
+ `)
175
+ .get(id);
176
+ return row ? this.mapRow(row) : null;
177
+ }
178
+ /**
179
+ * Check if a source file needs re-indexing.
180
+ * Returns true if the file has changed or has never been indexed.
181
+ */
182
+ needsReindex(sourcePath, collection, currentHash) {
183
+ const row = this.db
184
+ .prepare('SELECT source_hash FROM doc_sources WHERE source_path = ? AND collection = ?')
185
+ .get(sourcePath, collection);
186
+ return !row || row.source_hash !== currentHash;
187
+ }
188
+ /**
189
+ * List all indexed sources, optionally filtered by agent or collection.
190
+ */
191
+ listSources(opts) {
192
+ const conditions = [];
193
+ const params = [];
194
+ if (opts?.agentId) {
195
+ conditions.push('agent_id = ?');
196
+ params.push(opts.agentId);
197
+ }
198
+ if (opts?.collection) {
199
+ conditions.push('collection = ?');
200
+ params.push(opts.collection);
201
+ }
202
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
203
+ const rows = this.db
204
+ .prepare(`
205
+ SELECT source_path, collection, scope, agent_id, source_hash, chunk_count, indexed_at
206
+ FROM doc_sources ${where}
207
+ ORDER BY indexed_at DESC
208
+ `)
209
+ .all(...params);
210
+ return rows.map(r => ({
211
+ sourcePath: r['source_path'],
212
+ collection: r['collection'],
213
+ scope: r['scope'],
214
+ agentId: r['agent_id'],
215
+ sourceHash: r['source_hash'],
216
+ chunkCount: r['chunk_count'],
217
+ indexedAt: r['indexed_at'],
218
+ }));
219
+ }
220
+ /**
221
+ * Delete all chunks for a specific source file.
222
+ */
223
+ deleteSource(sourcePath, collection) {
224
+ this.db.exec('BEGIN');
225
+ try {
226
+ const result = this.db
227
+ .prepare('DELETE FROM doc_chunks WHERE source_path = ? AND collection = ?')
228
+ .run(sourcePath, collection);
229
+ this.db.prepare('DELETE FROM doc_sources WHERE source_path = ? AND collection = ?')
230
+ .run(sourcePath, collection);
231
+ this.db.exec('COMMIT');
232
+ return result.changes;
233
+ }
234
+ catch (err) {
235
+ this.db.exec('ROLLBACK');
236
+ throw err;
237
+ }
238
+ }
239
+ /**
240
+ * Index simple string chunks with an optional session key (for ephemeral spawn context).
241
+ *
242
+ * Unlike indexChunks() which works with DocChunk objects and hash-based dedup,
243
+ * this method is designed for ad-hoc session-scoped content: it always inserts fresh
244
+ * rows tagged with the sessionKey, without hash-based skip logic.
245
+ *
246
+ * Chunks stored with a sessionKey are ephemeral — use clearSessionChunks() to remove them.
247
+ */
248
+ indexDocChunks(agentId, source, chunks, options) {
249
+ if (chunks.length === 0)
250
+ return;
251
+ const now = new Date().toISOString();
252
+ const sessionKey = options?.sessionKey ?? null;
253
+ // Use a stable collection name derived from source path
254
+ const collection = `spawn/${agentId}`;
255
+ try {
256
+ this.db.exec('BEGIN');
257
+ const insert = this.db.prepare(`
258
+ INSERT INTO doc_chunks
259
+ (id, collection, section_path, depth, content, token_estimate,
260
+ source_hash, source_path, scope, tier, agent_id, parent_path,
261
+ session_key, created_at, updated_at)
262
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
263
+ `);
264
+ chunks.forEach((chunkContent, idx) => {
265
+ const id = `spawn:${agentId}:${sessionKey ?? 'none'}:${source}:${idx}:${Date.now()}`;
266
+ const tokenEstimate = Math.ceil(chunkContent.length / 4);
267
+ insert.run(id, collection, `${source}#chunk-${idx}`, 2, chunkContent, tokenEstimate, `spawn-${Date.now()}-${idx}`, // non-deduped hash
268
+ source, 'per-agent', null, agentId, null, sessionKey, now, now);
269
+ });
270
+ this.db.exec('COMMIT');
271
+ }
272
+ catch (err) {
273
+ try {
274
+ this.db.exec('ROLLBACK');
275
+ }
276
+ catch { /* ignore */ }
277
+ console.warn('[hypermem:doc-chunk-store] indexDocChunks failed:', err.message);
278
+ }
279
+ }
280
+ /**
281
+ * Query doc chunks by agentId+query string, with optional session key scoping.
282
+ * When sessionKey is provided, only chunks tagged with that session key are returned.
283
+ */
284
+ queryDocChunks(agentId, query, options) {
285
+ const limit = options?.limit ?? 10;
286
+ const sessionKey = options?.sessionKey;
287
+ const collection = `spawn/${agentId}`;
288
+ try {
289
+ if (query.trim() && query.trim().length >= 3) {
290
+ // FTS5 keyword search
291
+ let sql = `
292
+ SELECT c.id, c.collection, c.section_path, c.depth, c.content, c.token_estimate,
293
+ c.source_hash, c.source_path, c.scope, c.tier, c.agent_id, c.parent_path,
294
+ c.created_at, c.updated_at
295
+ FROM (
296
+ SELECT rowid, rank FROM doc_chunks_fts WHERE doc_chunks_fts MATCH ? ORDER BY rank LIMIT ?
297
+ ) sub
298
+ JOIN doc_chunks c ON c.rowid = sub.rowid
299
+ WHERE c.collection = ?
300
+ `;
301
+ const params = [query, limit * 3, collection];
302
+ if (sessionKey !== undefined) {
303
+ sql += ' AND c.session_key = ?';
304
+ params.push(sessionKey);
305
+ }
306
+ sql += ' ORDER BY sub.rank LIMIT ?';
307
+ params.push(limit);
308
+ const rows = this.db.prepare(sql).all(...params);
309
+ return rows.map(this.mapRow);
310
+ }
311
+ else {
312
+ // Fallback: return most recent chunks for this session
313
+ let sql = `
314
+ SELECT id, collection, section_path, depth, content, token_estimate,
315
+ source_hash, source_path, scope, tier, agent_id, parent_path,
316
+ created_at, updated_at
317
+ FROM doc_chunks
318
+ WHERE collection = ?
319
+ `;
320
+ const params = [collection];
321
+ if (sessionKey !== undefined) {
322
+ sql += ' AND session_key = ?';
323
+ params.push(sessionKey);
324
+ }
325
+ sql += ' ORDER BY created_at DESC LIMIT ?';
326
+ params.push(limit);
327
+ const rows = this.db.prepare(sql).all(...params);
328
+ return rows.map(this.mapRow);
329
+ }
330
+ }
331
+ catch (err) {
332
+ console.warn('[hypermem:doc-chunk-store] queryDocChunks failed:', err.message);
333
+ return [];
334
+ }
335
+ }
336
+ /**
337
+ * Delete all doc chunks associated with a specific session key.
338
+ * Call this when a spawn session is complete to release ephemeral storage.
339
+ */
340
+ clearSessionChunks(sessionKey) {
341
+ try {
342
+ const result = this.db
343
+ .prepare('DELETE FROM doc_chunks WHERE session_key = ?')
344
+ .run(sessionKey);
345
+ return result.changes;
346
+ }
347
+ catch (err) {
348
+ console.warn('[hypermem:doc-chunk-store] clearSessionChunks failed:', err.message);
349
+ return 0;
350
+ }
351
+ }
352
+ /**
353
+ * Get chunk stats: count per collection.
354
+ */
355
+ getStats() {
356
+ const rows = this.db.prepare(`
357
+ SELECT collection,
358
+ COUNT(*) as count,
359
+ COUNT(DISTINCT source_path) as sources,
360
+ SUM(token_estimate) as total_tokens
361
+ FROM doc_chunks
362
+ GROUP BY collection
363
+ ORDER BY collection
364
+ `).all();
365
+ return rows.map(r => ({
366
+ collection: r['collection'],
367
+ count: r['count'],
368
+ sources: r['sources'],
369
+ totalTokens: r['total_tokens'] ?? 0,
370
+ }));
371
+ }
372
+ mapRow(r) {
373
+ return {
374
+ id: r['id'],
375
+ collection: r['collection'],
376
+ sectionPath: r['section_path'],
377
+ depth: r['depth'],
378
+ content: r['content'],
379
+ tokenEstimate: r['token_estimate'],
380
+ sourceHash: r['source_hash'],
381
+ sourcePath: r['source_path'],
382
+ scope: r['scope'],
383
+ tier: r['tier'],
384
+ agentId: r['agent_id'],
385
+ parentPath: r['parent_path'],
386
+ createdAt: r['created_at'],
387
+ updatedAt: r['updated_at'],
388
+ };
389
+ }
390
+ }
391
+ //# sourceMappingURL=doc-chunk-store.js.map
@@ -0,0 +1,99 @@
1
+ /**
2
+ * hypermem Document Chunker
3
+ *
4
+ * Splits markdown documents into semantically coherent chunks for L3 indexing.
5
+ *
6
+ * Design principles:
7
+ * - Chunk by logical section (## / ###), NOT by token count
8
+ * - Each chunk is a self-contained policy/operational unit
9
+ * - Preserve section hierarchy for context assembly
10
+ * - Track source file hash for atomic re-indexing
11
+ * - Idempotent: same source produces same chunks (deterministic IDs)
12
+ *
13
+ * Collections (as defined in ACA offload spec):
14
+ * governance/policy — POLICY.md, shared-fleet
15
+ * governance/charter — CHARTER.md, per-tier (council/director)
16
+ * governance/comms — COMMS.md, shared-fleet
17
+ * operations/agents — AGENTS.md, per-tier
18
+ * operations/tools — TOOLS.md, per-agent
19
+ * memory/decisions — MEMORY.md, per-agent
20
+ * memory/daily — memory/YYYY-MM-DD.md, per-agent
21
+ * identity/soul — SOUL.md, per-agent (always-loaded kernel, but still indexed)
22
+ * identity/job — JOB.md, per-agent (demand-loaded during deliberation)
23
+ */
24
+ export interface DocChunk {
25
+ /** Unique deterministic ID: sha256(collection + sectionPath + sourceHash) */
26
+ id: string;
27
+ /** Collection path: governance/policy, operations/tools, etc. */
28
+ collection: string;
29
+ /** Full section path: "§3 > Naming > Single-Name Rule" */
30
+ sectionPath: string;
31
+ /** Section depth (0=root, 1=#, 2=##, 3=###) */
32
+ depth: number;
33
+ /** The actual text content of this chunk */
34
+ content: string;
35
+ /** Token estimate (rough: chars / 4) */
36
+ tokenEstimate: number;
37
+ /** SHA-256 of the source file at time of chunking */
38
+ sourceHash: string;
39
+ /** Source file path (relative to workspace) */
40
+ sourcePath: string;
41
+ /** Scope: shared-fleet | per-tier | per-agent */
42
+ scope: 'shared-fleet' | 'per-tier' | 'per-agent';
43
+ /** Tier filter (for per-tier scope): council | director | all */
44
+ tier?: string;
45
+ /** Agent ID (for per-agent scope) */
46
+ agentId?: string;
47
+ /** Parent section path (for hierarchy context) */
48
+ parentPath?: string;
49
+ }
50
+ export interface ChunkOptions {
51
+ collection: string;
52
+ sourcePath: string;
53
+ scope: DocChunk['scope'];
54
+ tier?: string;
55
+ agentId?: string;
56
+ /** Minimum content length to emit a chunk (avoids empty section headers) */
57
+ minContentLen?: number;
58
+ /** Whether to include parent context prefix in chunk content */
59
+ includeParentContext?: boolean;
60
+ }
61
+ /**
62
+ * Hash a string with SHA-256.
63
+ */
64
+ export declare function hashContent(content: string): string;
65
+ /**
66
+ * Chunk a markdown document into semantic sections.
67
+ *
68
+ * Approach:
69
+ * - Level 1 (#) headings become top-level section anchors
70
+ * - Level 2 (##) headings become primary chunks
71
+ * - Level 3 (###) headings become sub-chunks under their parent
72
+ * - Content before the first heading becomes a "preamble" chunk
73
+ * - Empty sections (heading only, no content) are skipped unless minContentLen=0
74
+ *
75
+ * For documents with deeply nested content, we group level-3 sections under
76
+ * their parent level-2 section. This keeps related policy sections together.
77
+ */
78
+ export declare function chunkMarkdown(content: string, opts: ChunkOptions): DocChunk[];
79
+ /**
80
+ * Chunk a file from disk.
81
+ */
82
+ export declare function chunkFile(filePath: string, opts: Omit<ChunkOptions, 'sourcePath'>): DocChunk[];
83
+ /**
84
+ * Standard collection definitions for ACA workspace files.
85
+ * Maps file names to collection paths and scope metadata.
86
+ */
87
+ export interface CollectionDef {
88
+ collection: string;
89
+ scope: DocChunk['scope'];
90
+ tier?: string;
91
+ description: string;
92
+ }
93
+ export declare const ACA_COLLECTIONS: Record<string, CollectionDef>;
94
+ /**
95
+ * Infer the collection definition for a file based on its name.
96
+ * Returns undefined if the file is not a known ACA file.
97
+ */
98
+ export declare function inferCollection(fileName: string, agentId?: string): CollectionDef | undefined;
99
+ //# sourceMappingURL=doc-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"doc-chunker.d.ts","sourceRoot":"","sources":["../src/doc-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAOH,MAAM,WAAW,QAAQ;IACvB,6EAA6E;IAC7E,EAAE,EAAE,MAAM,CAAC;IACX,iEAAiE;IACjE,UAAU,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,WAAW,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAChB,wCAAwC;IACxC,aAAa,EAAE,MAAM,CAAC;IACtB,qDAAqD;IACrD,UAAU,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,UAAU,EAAE,MAAM,CAAC;IACnB,iDAAiD;IACjD,KAAK,EAAE,cAAc,GAAG,UAAU,GAAG,WAAW,CAAC;IACjD,iEAAiE;IACjE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qCAAqC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4EAA4E;IAC5E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gEAAgE;IAChE,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC;AAID;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEnD;AAgFD;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,QAAQ,EAAE,CA+I7E;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,GAAG,QAAQ,EAAE,CAG9F;AAID;;;GAGG;AACH,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CA8CzD,CAAC;AAEF;;;GAGG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS,CAc7F"}