clawmem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +660 -0
  2. package/CLAUDE.md +660 -0
  3. package/LICENSE +21 -0
  4. package/README.md +993 -0
  5. package/SKILL.md +717 -0
  6. package/bin/clawmem +75 -0
  7. package/package.json +72 -0
  8. package/src/amem.ts +797 -0
  9. package/src/beads.ts +263 -0
  10. package/src/clawmem.ts +1849 -0
  11. package/src/collections.ts +405 -0
  12. package/src/config.ts +178 -0
  13. package/src/consolidation.ts +123 -0
  14. package/src/directory-context.ts +248 -0
  15. package/src/errors.ts +41 -0
  16. package/src/formatter.ts +427 -0
  17. package/src/graph-traversal.ts +247 -0
  18. package/src/hooks/context-surfacing.ts +317 -0
  19. package/src/hooks/curator-nudge.ts +89 -0
  20. package/src/hooks/decision-extractor.ts +639 -0
  21. package/src/hooks/feedback-loop.ts +214 -0
  22. package/src/hooks/handoff-generator.ts +345 -0
  23. package/src/hooks/postcompact-inject.ts +226 -0
  24. package/src/hooks/precompact-extract.ts +314 -0
  25. package/src/hooks/pretool-inject.ts +79 -0
  26. package/src/hooks/session-bootstrap.ts +324 -0
  27. package/src/hooks/staleness-check.ts +130 -0
  28. package/src/hooks.ts +367 -0
  29. package/src/indexer.ts +327 -0
  30. package/src/intent.ts +294 -0
  31. package/src/limits.ts +26 -0
  32. package/src/llm.ts +1175 -0
  33. package/src/mcp.ts +2138 -0
  34. package/src/memory.ts +336 -0
  35. package/src/mmr.ts +93 -0
  36. package/src/observer.ts +269 -0
  37. package/src/openclaw/engine.ts +283 -0
  38. package/src/openclaw/index.ts +221 -0
  39. package/src/openclaw/plugin.json +83 -0
  40. package/src/openclaw/shell.ts +207 -0
  41. package/src/openclaw/tools.ts +304 -0
  42. package/src/profile.ts +346 -0
  43. package/src/promptguard.ts +218 -0
  44. package/src/retrieval-gate.ts +106 -0
  45. package/src/search-utils.ts +127 -0
  46. package/src/server.ts +783 -0
  47. package/src/splitter.ts +325 -0
  48. package/src/store.ts +4062 -0
  49. package/src/validation.ts +67 -0
  50. package/src/watcher.ts +58 -0
@@ -0,0 +1,325 @@
1
+ /**
2
+ * ClawMem Document Splitter — Granular Fragment Indexing
3
+ *
4
+ * Splits markdown documents into semantic fragments (sections, bullet lists,
5
+ * code blocks, frontmatter facts) for per-fragment embedding. Each fragment
6
+ * gets its own vector, dramatically improving recall for specific facts
7
+ * buried in larger documents.
8
+ */
9
+
10
+ // =============================================================================
11
+ // Types
12
+ // =============================================================================
13
+
14
+ export interface Fragment {
15
+ type: 'full' | 'section' | 'list' | 'code' | 'frontmatter' | 'fact' | 'narrative';
16
+ label: string | null;
17
+ content: string;
18
+ startLine: number;
19
+ }
20
+
21
+ // =============================================================================
22
+ // Config
23
+ // =============================================================================
24
+
25
+ import { MAX_FRAGMENTS_PER_DOC, MAX_SPLITTER_INPUT_CHARS } from "./limits.ts";
26
+
27
+ const MIN_FRAGMENT_CHARS = 50;
28
+ const MAX_FRAGMENT_CHARS = 2000;
29
+ const MIN_DOC_CHARS_FOR_SPLIT = 200;
30
+
31
+ // =============================================================================
32
+ // Main Splitter
33
+ // =============================================================================
34
+
35
+ /**
36
+ * Split a markdown document into semantic fragments for embedding.
37
+ * Always includes a 'full' fragment (entire body). Additional fragments
38
+ * are only generated if the document is large enough to benefit from splitting.
39
+ */
40
+ export function splitDocument(
41
+ body: string,
42
+ frontmatter?: Record<string, any>
43
+ ): Fragment[] {
44
+ // Bound input size to prevent memory blowup
45
+ const boundedBody = body.length > MAX_SPLITTER_INPUT_CHARS
46
+ ? body.slice(0, MAX_SPLITTER_INPUT_CHARS)
47
+ : body;
48
+
49
+ const fragments: Fragment[] = [];
50
+
51
+ // Always include full document as first fragment
52
+ fragments.push({ type: 'full', label: null, content: boundedBody, startLine: 1 });
53
+
54
+ // Skip splitting for very short documents
55
+ if (boundedBody.length < MIN_DOC_CHARS_FOR_SPLIT) return fragments;
56
+
57
+ const lines = boundedBody.split('\n');
58
+ const remaining = () => MAX_FRAGMENTS_PER_DOC - fragments.length;
59
+
60
+ // Extract sections (## headings)
61
+ const sections = extractSections(lines);
62
+ fragments.push(...sections.slice(0, remaining()));
63
+
64
+ // Extract bullet lists
65
+ if (remaining() > 0) {
66
+ const lists = extractLists(lines);
67
+ fragments.push(...lists.slice(0, remaining()));
68
+ }
69
+
70
+ // Extract code blocks
71
+ if (remaining() > 0) {
72
+ const blocks = extractCodeBlocks(lines);
73
+ fragments.push(...blocks.slice(0, remaining()));
74
+ }
75
+
76
+ // Extract frontmatter facts
77
+ if (frontmatter && remaining() > 0) {
78
+ const fmFrags = extractFrontmatter(frontmatter);
79
+ fragments.push(...fmFrags.slice(0, remaining()));
80
+ }
81
+
82
+ return fragments;
83
+ }
84
+
85
+ /**
86
+ * Split observer-generated observations into fact and narrative fragments.
87
+ * Used for documents that have structured `facts` and `narrative` fields.
88
+ */
89
+ export function splitObservation(
90
+ body: string,
91
+ meta: { facts?: string; narrative?: string }
92
+ ): Fragment[] {
93
+ // Bound input size
94
+ const boundedBody = body.length > MAX_SPLITTER_INPUT_CHARS
95
+ ? body.slice(0, MAX_SPLITTER_INPUT_CHARS)
96
+ : body;
97
+
98
+ const fragments: Fragment[] = [];
99
+
100
+ // Full document
101
+ fragments.push({ type: 'full', label: null, content: boundedBody, startLine: 1 });
102
+
103
+ // Individual facts
104
+ if (meta.facts && fragments.length < MAX_FRAGMENTS_PER_DOC) {
105
+ try {
106
+ const facts = JSON.parse(meta.facts) as string[];
107
+ for (const fact of facts) {
108
+ if (fragments.length >= MAX_FRAGMENTS_PER_DOC) break;
109
+ if (fact.length >= MIN_FRAGMENT_CHARS) {
110
+ fragments.push({ type: 'fact', label: null, content: fact, startLine: 0 });
111
+ }
112
+ }
113
+ } catch { /* invalid JSON, skip */ }
114
+ }
115
+
116
+ // Narrative
117
+ if (meta.narrative && meta.narrative.length >= MIN_FRAGMENT_CHARS && fragments.length < MAX_FRAGMENTS_PER_DOC) {
118
+ fragments.push({ type: 'narrative', label: null, content: meta.narrative, startLine: 0 });
119
+ }
120
+
121
+ return fragments;
122
+ }
123
+
124
+ // =============================================================================
125
+ // Section Extraction
126
+ // =============================================================================
127
+
128
+ function extractSections(lines: string[]): Fragment[] {
129
+ const sections: Fragment[] = [];
130
+ let currentHeading: string | null = null;
131
+ let currentLines: string[] = [];
132
+ let currentStartLine = 1;
133
+
134
+ for (let i = 0; i < lines.length; i++) {
135
+ const line = lines[i]!;
136
+ const headingMatch = line.match(/^(#{1,3})\s+(.+)/);
137
+
138
+ if (headingMatch) {
139
+ // Flush previous section
140
+ if (currentHeading !== null && currentLines.length > 0) {
141
+ const content = currentLines.join('\n').trim();
142
+ if (content.length >= MIN_FRAGMENT_CHARS) {
143
+ sections.push({
144
+ type: 'section',
145
+ label: currentHeading,
146
+ content: maybeSplitLarge(content),
147
+ startLine: currentStartLine,
148
+ });
149
+ }
150
+ }
151
+
152
+ currentHeading = headingMatch[2]!.trim();
153
+ currentLines = [line];
154
+ currentStartLine = i + 1;
155
+ } else {
156
+ currentLines.push(line);
157
+ }
158
+ }
159
+
160
+ // Flush last section
161
+ if (currentHeading !== null && currentLines.length > 0) {
162
+ const content = currentLines.join('\n').trim();
163
+ if (content.length >= MIN_FRAGMENT_CHARS) {
164
+ sections.push({
165
+ type: 'section',
166
+ label: currentHeading,
167
+ content: maybeSplitLarge(content),
168
+ startLine: currentStartLine,
169
+ });
170
+ }
171
+ }
172
+
173
+ return sections;
174
+ }
175
+
176
+ // =============================================================================
177
+ // List Extraction
178
+ // =============================================================================
179
+
180
+ function extractLists(lines: string[]): Fragment[] {
181
+ const lists: Fragment[] = [];
182
+ let currentList: string[] = [];
183
+ let listStartLine = 0;
184
+
185
+ for (let i = 0; i < lines.length; i++) {
186
+ const line = lines[i]!;
187
+ const isBullet = /^\s*[-*+]\s/.test(line) || /^\s*\d+\.\s/.test(line);
188
+ // Indented continuation of a list item
189
+ const isContinuation = currentList.length > 0 && /^\s{2,}/.test(line) && line.trim().length > 0;
190
+
191
+ if (isBullet || isContinuation) {
192
+ if (currentList.length === 0) listStartLine = i + 1;
193
+ currentList.push(line);
194
+ } else {
195
+ if (currentList.length >= 2) {
196
+ const content = currentList.join('\n').trim();
197
+ if (content.length >= MIN_FRAGMENT_CHARS) {
198
+ lists.push({
199
+ type: 'list',
200
+ label: null,
201
+ content: maybeSplitLarge(content),
202
+ startLine: listStartLine,
203
+ });
204
+ }
205
+ }
206
+ currentList = [];
207
+ }
208
+ }
209
+
210
+ // Flush trailing list
211
+ if (currentList.length >= 2) {
212
+ const content = currentList.join('\n').trim();
213
+ if (content.length >= MIN_FRAGMENT_CHARS) {
214
+ lists.push({
215
+ type: 'list',
216
+ label: null,
217
+ content: maybeSplitLarge(content),
218
+ startLine: listStartLine,
219
+ });
220
+ }
221
+ }
222
+
223
+ return lists;
224
+ }
225
+
226
+ // =============================================================================
227
+ // Code Block Extraction
228
+ // =============================================================================
229
+
230
+ function extractCodeBlocks(lines: string[]): Fragment[] {
231
+ const blocks: Fragment[] = [];
232
+ let inBlock = false;
233
+ let blockLines: string[] = [];
234
+ let blockLang: string | null = null;
235
+ let blockStartLine = 0;
236
+
237
+ for (let i = 0; i < lines.length; i++) {
238
+ const line = lines[i]!;
239
+
240
+ if (!inBlock && line.match(/^```(\w*)/)) {
241
+ inBlock = true;
242
+ blockLang = line.match(/^```(\w+)/)?.[1] || null;
243
+ blockLines = [line];
244
+ blockStartLine = i + 1;
245
+ } else if (inBlock && line.startsWith('```')) {
246
+ blockLines.push(line);
247
+ const content = blockLines.join('\n').trim();
248
+ if (content.length >= MIN_FRAGMENT_CHARS) {
249
+ blocks.push({
250
+ type: 'code',
251
+ label: blockLang,
252
+ content: maybeSplitLarge(content),
253
+ startLine: blockStartLine,
254
+ });
255
+ }
256
+ inBlock = false;
257
+ blockLines = [];
258
+ blockLang = null;
259
+ } else if (inBlock) {
260
+ blockLines.push(line);
261
+ }
262
+ }
263
+
264
+ return blocks;
265
+ }
266
+
267
+ // =============================================================================
268
+ // Frontmatter Extraction
269
+ // =============================================================================
270
+
271
+ function extractFrontmatter(fm: Record<string, any>): Fragment[] {
272
+ const fragments: Fragment[] = [];
273
+
274
+ for (const [key, value] of Object.entries(fm)) {
275
+ if (key === 'content_type' || key === 'tags') continue; // skip metadata-only fields
276
+
277
+ let text: string;
278
+ if (typeof value === 'string') {
279
+ text = `${key}: ${value}`;
280
+ } else if (typeof value === 'number' || typeof value === 'boolean') {
281
+ text = `${key}: ${String(value)}`;
282
+ } else if (Array.isArray(value)) {
283
+ text = `${key}: ${value.join(', ')}`;
284
+ } else {
285
+ continue;
286
+ }
287
+
288
+ if (text.length >= 10) {
289
+ fragments.push({
290
+ type: 'frontmatter',
291
+ label: key,
292
+ content: text,
293
+ startLine: 0,
294
+ });
295
+ }
296
+ }
297
+
298
+ return fragments;
299
+ }
300
+
301
+ // =============================================================================
302
+ // Helpers
303
+ // =============================================================================
304
+
305
+ /**
306
+ * If content exceeds MAX_FRAGMENT_CHARS, truncate at a paragraph boundary.
307
+ */
308
+ function maybeSplitLarge(content: string): string {
309
+ if (content.length <= MAX_FRAGMENT_CHARS) return content;
310
+
311
+ // Try to split at paragraph boundary
312
+ const paragraphBreak = content.lastIndexOf('\n\n', MAX_FRAGMENT_CHARS);
313
+ if (paragraphBreak > MAX_FRAGMENT_CHARS * 0.5) {
314
+ return content.slice(0, paragraphBreak);
315
+ }
316
+
317
+ // Fall back to line boundary
318
+ const lineBreak = content.lastIndexOf('\n', MAX_FRAGMENT_CHARS);
319
+ if (lineBreak > MAX_FRAGMENT_CHARS * 0.5) {
320
+ return content.slice(0, lineBreak);
321
+ }
322
+
323
+ // Hard truncate
324
+ return content.slice(0, MAX_FRAGMENT_CHARS);
325
+ }