@totalreclaw/totalreclaw 1.5.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAWHUB.md +134 -0
- package/README.md +407 -64
- package/SKILL.md +1032 -0
- package/api-client.ts +5 -5
- package/claims-helper.ts +686 -0
- package/config.ts +211 -0
- package/consolidation.ts +141 -33
- package/contradiction-sync.ts +1389 -0
- package/crypto.ts +63 -261
- package/digest-sync.ts +516 -0
- package/embedding.ts +69 -46
- package/extractor.ts +1307 -84
- package/hot-cache-wrapper.ts +1 -1
- package/import-adapters/base-adapter.ts +4 -5
- package/import-adapters/chatgpt-adapter.ts +323 -0
- package/import-adapters/claude-adapter.ts +146 -0
- package/import-adapters/gemini-adapter.ts +243 -0
- package/import-adapters/index.ts +9 -0
- package/import-adapters/mcp-memory-adapter.ts +4 -2
- package/import-adapters/mem0-adapter.ts +2 -2
- package/import-adapters/types.ts +25 -2
- package/index.ts +2002 -319
- package/llm-client.ts +106 -53
- package/lsh.ts +21 -210
- package/package.json +20 -7
- package/pin.ts +502 -0
- package/reranker.ts +96 -124
- package/skill.json +213 -0
- package/subgraph-search.ts +112 -5
- package/subgraph-store.ts +559 -275
- package/consolidation.test.ts +0 -356
- package/extractor-dedup.test.ts +0 -168
- package/import-adapters/import-adapters.test.ts +0 -595
- package/lsh.test.ts +0 -463
- package/pocv2-e2e-test.ts +0 -917
- package/porter-stemmer.d.ts +0 -4
- package/reranker.test.ts +0 -594
- package/semantic-dedup.test.ts +0 -392
- package/setup.sh +0 -19
- package/store-dedup-wiring.test.ts +0 -186
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import { BaseImportAdapter } from './base-adapter.js';
|
|
2
|
+
import type {
|
|
3
|
+
ImportSource,
|
|
4
|
+
AdapterParseResult,
|
|
5
|
+
ConversationChunk,
|
|
6
|
+
ProgressCallback,
|
|
7
|
+
} from './types.js';
|
|
8
|
+
import fs from 'node:fs';
|
|
9
|
+
import os from 'node:os';
|
|
10
|
+
|
|
11
|
+
/** Maximum messages per conversation chunk for LLM extraction. */
|
|
12
|
+
const CHUNK_SIZE = 20;
|
|
13
|
+
|
|
14
|
+
/** Gap (in minutes) between entries that starts a new pseudo-session. */
|
|
15
|
+
const SESSION_GAP_MINUTES = 30;
|
|
16
|
+
|
|
17
|
+
// ── Timestamp Parsing ────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
const MONTHS: Record<string, number> = {
|
|
20
|
+
Jan: 0, Feb: 1, Mar: 2, Apr: 3, May: 4, Jun: 5,
|
|
21
|
+
Jul: 6, Aug: 7, Sep: 8, Oct: 9, Nov: 10, Dec: 11,
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Parse Gemini timestamp: "1 Apr 2026, 18:39:35 WEST" → ISO 8601.
|
|
26
|
+
* Timezone is treated as UTC (all entries use the same TZ, preserving order).
|
|
27
|
+
*/
|
|
28
|
+
function parseTimestamp(raw: string): string | undefined {
|
|
29
|
+
const m = raw.match(/^(\d{1,2})\s+(\w{3})\s+(\d{4}),\s+(\d{2}):(\d{2}):(\d{2})\s+/);
|
|
30
|
+
if (!m || MONTHS[m[2]] === undefined) return undefined;
|
|
31
|
+
const d = new Date(Date.UTC(+m[3], MONTHS[m[2]], +m[1], +m[4], +m[5], +m[6]));
|
|
32
|
+
return d.toISOString();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ── HTML Helpers ─────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
function decodeEntities(t: string): string {
|
|
38
|
+
return t.replace(/'/g, "'").replace(/"/g, '"').replace(/&/g, '&')
|
|
39
|
+
.replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function stripHTML(html: string): string {
|
|
43
|
+
return html.replace(/<br\s*\/?>/gi, '\n').replace(/<\/p>/gi, '\n')
|
|
44
|
+
.replace(/<\/li>/gi, '\n').replace(/<\/h[1-6]>/gi, '\n')
|
|
45
|
+
.replace(/<hr\s*\/?>/gi, '\n---\n').replace(/<[^>]+>/g, '')
|
|
46
|
+
.replace(/\n{3,}/g, '\n\n').trim();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ── Entry Types ──────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
interface GeminiEntry {
|
|
52
|
+
userPrompt: string;
|
|
53
|
+
aiResponse: string;
|
|
54
|
+
timestampISO: string;
|
|
55
|
+
timestampUnix: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── Gemini Adapter ───────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
export class GeminiAdapter extends BaseImportAdapter {
|
|
61
|
+
readonly source: ImportSource = 'gemini';
|
|
62
|
+
readonly displayName = 'Google Gemini';
|
|
63
|
+
|
|
64
|
+
async parse(
|
|
65
|
+
input: { content?: string; file_path?: string },
|
|
66
|
+
onProgress?: ProgressCallback,
|
|
67
|
+
): Promise<AdapterParseResult> {
|
|
68
|
+
const warnings: string[] = [];
|
|
69
|
+
const errors: string[] = [];
|
|
70
|
+
|
|
71
|
+
let content: string;
|
|
72
|
+
|
|
73
|
+
if (input.content) {
|
|
74
|
+
content = input.content;
|
|
75
|
+
} else if (input.file_path) {
|
|
76
|
+
try {
|
|
77
|
+
const resolved = input.file_path.replace(/^~/, os.homedir());
|
|
78
|
+
content = fs.readFileSync(resolved, 'utf-8');
|
|
79
|
+
} catch (e) {
|
|
80
|
+
errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
81
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
82
|
+
}
|
|
83
|
+
} else {
|
|
84
|
+
errors.push(
|
|
85
|
+
'Gemini import requires either content or file_path. ' +
|
|
86
|
+
'Export from Google Takeout: takeout.google.com → select Gemini Apps → export. ' +
|
|
87
|
+
'Provide the "My Activity.html" file path.',
|
|
88
|
+
);
|
|
89
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (onProgress) {
|
|
93
|
+
onProgress({ current: 0, total: 0, phase: 'parsing', message: 'Parsing Gemini HTML...' });
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Parse HTML into entries
|
|
97
|
+
const entries = this.parseHTML(content);
|
|
98
|
+
if (entries.length === 0) {
|
|
99
|
+
warnings.push('No conversation entries found in the HTML file.');
|
|
100
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Group into pseudo-sessions by temporal proximity
|
|
104
|
+
const sessions = this.groupSessions(entries);
|
|
105
|
+
|
|
106
|
+
if (onProgress) {
|
|
107
|
+
onProgress({
|
|
108
|
+
current: 0,
|
|
109
|
+
total: sessions.length,
|
|
110
|
+
phase: 'parsing',
|
|
111
|
+
message: `Parsed ${entries.length} entries into ${sessions.length} sessions`,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Build conversation chunks from sessions
|
|
116
|
+
const chunks: ConversationChunk[] = [];
|
|
117
|
+
let totalMessages = 0;
|
|
118
|
+
|
|
119
|
+
for (const session of sessions) {
|
|
120
|
+
const messages: Array<{ role: 'user' | 'assistant'; text: string }> = [];
|
|
121
|
+
for (const entry of session) {
|
|
122
|
+
if (entry.userPrompt) messages.push({ role: 'user', text: entry.userPrompt });
|
|
123
|
+
if (entry.aiResponse) messages.push({ role: 'assistant', text: entry.aiResponse });
|
|
124
|
+
}
|
|
125
|
+
if (messages.length === 0) continue;
|
|
126
|
+
|
|
127
|
+
totalMessages += messages.length;
|
|
128
|
+
const timestamp = session[0].timestampISO;
|
|
129
|
+
|
|
130
|
+
// Sub-chunk large sessions
|
|
131
|
+
for (let i = 0; i < messages.length; i += CHUNK_SIZE) {
|
|
132
|
+
const batch = messages.slice(i, i + CHUNK_SIZE);
|
|
133
|
+
const chunkIdx = Math.floor(i / CHUNK_SIZE) + 1;
|
|
134
|
+
const totalChunks = Math.ceil(messages.length / CHUNK_SIZE);
|
|
135
|
+
const title = totalChunks > 1
|
|
136
|
+
? `Gemini session (part ${chunkIdx}/${totalChunks})`
|
|
137
|
+
: 'Gemini session';
|
|
138
|
+
|
|
139
|
+
chunks.push({ title, messages: batch, timestamp });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
facts: [],
|
|
145
|
+
chunks,
|
|
146
|
+
totalMessages,
|
|
147
|
+
warnings,
|
|
148
|
+
errors,
|
|
149
|
+
source_metadata: {
|
|
150
|
+
format: 'gemini-takeout-html',
|
|
151
|
+
total_entries: entries.length,
|
|
152
|
+
sessions_count: sessions.length,
|
|
153
|
+
chunks_count: chunks.length,
|
|
154
|
+
total_messages: totalMessages,
|
|
155
|
+
date_range: {
|
|
156
|
+
earliest: entries[0]?.timestampISO,
|
|
157
|
+
latest: entries[entries.length - 1]?.timestampISO,
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parse Gemini Takeout HTML into structured entries.
|
|
165
|
+
*
|
|
166
|
+
* Each outer-cell div contains: "Prompted USER_TEXT<br>TIMESTAMP<br>RESPONSE_HTML"
|
|
167
|
+
* all within one content-cell.
|
|
168
|
+
*/
|
|
169
|
+
private parseHTML(html: string): GeminiEntry[] {
|
|
170
|
+
const entries: GeminiEntry[] = [];
|
|
171
|
+
const cellPattern = /<div class="outer-cell[^"]*">([\s\S]*?)(?=<div class="outer-cell|$)/g;
|
|
172
|
+
let match: RegExpExecArray | null;
|
|
173
|
+
|
|
174
|
+
while ((match = cellPattern.exec(html)) !== null) {
|
|
175
|
+
const cell = match[1];
|
|
176
|
+
|
|
177
|
+
// Only process "Prompted" entries (skip canvas, feedback)
|
|
178
|
+
const promptedIdx = cell.indexOf('Prompted\u00a0');
|
|
179
|
+
if (promptedIdx === -1) continue;
|
|
180
|
+
|
|
181
|
+
// Extract timestamp
|
|
182
|
+
const tsMatch = cell.match(/(\d{1,2}\s+\w{3}\s+\d{4},\s+\d{2}:\d{2}:\d{2}\s+\w+)/);
|
|
183
|
+
if (!tsMatch) continue;
|
|
184
|
+
const timestampISO = parseTimestamp(tsMatch[1]);
|
|
185
|
+
if (!timestampISO) continue;
|
|
186
|
+
|
|
187
|
+
// Split on timestamp to separate user prompt from AI response
|
|
188
|
+
const afterPrompted = cell.substring(promptedIdx + 'Prompted\u00a0'.length);
|
|
189
|
+
const tsPattern = /(\d{1,2}\s+\w{3}\s+\d{4},\s+\d{2}:\d{2}:\d{2}\s+\w+)/;
|
|
190
|
+
const tsIdx = afterPrompted.search(tsPattern);
|
|
191
|
+
|
|
192
|
+
let userPrompt = '';
|
|
193
|
+
let aiResponse = '';
|
|
194
|
+
|
|
195
|
+
if (tsIdx > 0) {
|
|
196
|
+
userPrompt = stripHTML(decodeEntities(afterPrompted.substring(0, tsIdx))).trim();
|
|
197
|
+
|
|
198
|
+
const tsInner = afterPrompted.match(tsPattern);
|
|
199
|
+
if (tsInner) {
|
|
200
|
+
const afterTs = afterPrompted.substring(tsIdx + tsInner[0].length)
|
|
201
|
+
.replace(/^\s*<br\s*\/?>\s*/i, '');
|
|
202
|
+
const endDiv = afterTs.search(/<\/div>\s*<div class="content-cell/);
|
|
203
|
+
const rawResp = endDiv !== -1 ? afterTs.substring(0, endDiv) : afterTs;
|
|
204
|
+
aiResponse = stripHTML(decodeEntities(rawResp)).trim();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (userPrompt.length < 3 && aiResponse.length < 3) continue;
|
|
209
|
+
|
|
210
|
+
entries.push({
|
|
211
|
+
userPrompt,
|
|
212
|
+
aiResponse,
|
|
213
|
+
timestampISO,
|
|
214
|
+
timestampUnix: Math.floor(new Date(timestampISO).getTime() / 1000),
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Sort chronologically (HTML is newest-first)
|
|
219
|
+
entries.sort((a, b) => a.timestampUnix - b.timestampUnix);
|
|
220
|
+
return entries;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Group entries into pseudo-sessions by temporal proximity.
|
|
225
|
+
*/
|
|
226
|
+
private groupSessions(entries: GeminiEntry[]): GeminiEntry[][] {
|
|
227
|
+
if (entries.length === 0) return [];
|
|
228
|
+
const sessions: GeminiEntry[][] = [];
|
|
229
|
+
let current: GeminiEntry[] = [entries[0]];
|
|
230
|
+
|
|
231
|
+
for (let i = 1; i < entries.length; i++) {
|
|
232
|
+
const gap = entries[i].timestampUnix - entries[i - 1].timestampUnix;
|
|
233
|
+
if (gap > SESSION_GAP_MINUTES * 60) {
|
|
234
|
+
sessions.push(current);
|
|
235
|
+
current = [entries[i]];
|
|
236
|
+
} else {
|
|
237
|
+
current.push(entries[i]);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (current.length > 0) sessions.push(current);
|
|
241
|
+
return sessions;
|
|
242
|
+
}
|
|
243
|
+
}
|
package/import-adapters/index.ts
CHANGED
|
@@ -2,15 +2,24 @@ export { BaseImportAdapter } from './base-adapter.js';
|
|
|
2
2
|
export * from './types.js';
|
|
3
3
|
export { Mem0Adapter } from './mem0-adapter.js';
|
|
4
4
|
export { MCPMemoryAdapter } from './mcp-memory-adapter.js';
|
|
5
|
+
export { ChatGPTAdapter } from './chatgpt-adapter.js';
|
|
6
|
+
export { ClaudeAdapter } from './claude-adapter.js';
|
|
7
|
+
export { GeminiAdapter } from './gemini-adapter.js';
|
|
5
8
|
|
|
6
9
|
import type { ImportSource } from './types.js';
|
|
7
10
|
import { Mem0Adapter } from './mem0-adapter.js';
|
|
8
11
|
import { MCPMemoryAdapter } from './mcp-memory-adapter.js';
|
|
12
|
+
import { ChatGPTAdapter } from './chatgpt-adapter.js';
|
|
13
|
+
import { ClaudeAdapter } from './claude-adapter.js';
|
|
14
|
+
import { GeminiAdapter } from './gemini-adapter.js';
|
|
9
15
|
import type { BaseImportAdapter } from './base-adapter.js';
|
|
10
16
|
|
|
11
17
|
const ADAPTERS: Partial<Record<ImportSource, () => BaseImportAdapter>> = {
|
|
12
18
|
'mem0': () => new Mem0Adapter(),
|
|
13
19
|
'mcp-memory': () => new MCPMemoryAdapter(),
|
|
20
|
+
'chatgpt': () => new ChatGPTAdapter(),
|
|
21
|
+
'claude': () => new ClaudeAdapter(),
|
|
22
|
+
'gemini': () => new GeminiAdapter(),
|
|
14
23
|
};
|
|
15
24
|
|
|
16
25
|
export function getAdapter(source: ImportSource): BaseImportAdapter {
|
|
@@ -67,7 +67,7 @@ export class MCPMemoryAdapter extends BaseImportAdapter {
|
|
|
67
67
|
content = fs.readFileSync(resolvedPath, 'utf-8');
|
|
68
68
|
} catch (e) {
|
|
69
69
|
errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
70
|
-
return { facts: [], warnings, errors };
|
|
70
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
71
71
|
}
|
|
72
72
|
} else {
|
|
73
73
|
// Try default MCP memory path
|
|
@@ -80,7 +80,7 @@ export class MCPMemoryAdapter extends BaseImportAdapter {
|
|
|
80
80
|
'No content, file_path, or file at default path (~/.mcp-memory/memory.jsonl). ' +
|
|
81
81
|
'Provide the memory.jsonl content or file path.',
|
|
82
82
|
);
|
|
83
|
-
return { facts: [], warnings, errors };
|
|
83
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
86
|
|
|
@@ -170,6 +170,8 @@ export class MCPMemoryAdapter extends BaseImportAdapter {
|
|
|
170
170
|
|
|
171
171
|
return {
|
|
172
172
|
facts,
|
|
173
|
+
chunks: [],
|
|
174
|
+
totalMessages: 0,
|
|
173
175
|
warnings,
|
|
174
176
|
errors,
|
|
175
177
|
source_metadata: {
|
|
@@ -81,7 +81,7 @@ export class Mem0Adapter extends BaseImportAdapter {
|
|
|
81
81
|
);
|
|
82
82
|
} else {
|
|
83
83
|
errors.push('Mem0 import requires either content (export file) or api_key');
|
|
84
|
-
return { facts: [], warnings, errors };
|
|
84
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
if (onProgress) {
|
|
@@ -110,7 +110,7 @@ export class Mem0Adapter extends BaseImportAdapter {
|
|
|
110
110
|
warnings.push(`${invalidCount} memories had invalid/empty text and were skipped`);
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
-
return { facts, warnings, errors, source_metadata: { total_from_source: memories.length } };
|
|
113
|
+
return { facts, chunks: [], totalMessages: 0, warnings, errors, source_metadata: { total_from_source: memories.length } };
|
|
114
114
|
}
|
|
115
115
|
|
|
116
116
|
/**
|
package/import-adapters/types.ts
CHANGED
|
@@ -19,7 +19,7 @@ export interface NormalizedFact {
|
|
|
19
19
|
tags?: string[];
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
export type ImportSource = 'mem0' | 'mcp-memory' | 'memoclaw' | 'generic-json' | 'generic-csv';
|
|
22
|
+
export type ImportSource = 'mem0' | 'mcp-memory' | 'chatgpt' | 'claude' | 'gemini' | 'memoclaw' | 'generic-json' | 'generic-csv';
|
|
23
23
|
|
|
24
24
|
/**
|
|
25
25
|
* What the user passes to the import tool.
|
|
@@ -73,15 +73,38 @@ export interface ImportResult {
|
|
|
73
73
|
export type ProgressCallback = (progress: {
|
|
74
74
|
current: number;
|
|
75
75
|
total: number;
|
|
76
|
-
phase: 'fetching' | 'parsing' | 'storing';
|
|
76
|
+
phase: 'fetching' | 'parsing' | 'storing' | 'extracting';
|
|
77
77
|
message: string;
|
|
78
78
|
}) => void;
|
|
79
79
|
|
|
80
|
+
/**
|
|
81
|
+
* A chunk of conversation messages for LLM-based fact extraction.
|
|
82
|
+
* Adapters that parse conversation data (ChatGPT, Claude) return these
|
|
83
|
+
* instead of pre-extracted facts, delegating extraction to the LLM.
|
|
84
|
+
*/
|
|
85
|
+
export interface ConversationChunk {
|
|
86
|
+
/** Human-readable title for progress reporting */
|
|
87
|
+
title: string;
|
|
88
|
+
/** Ordered messages in this chunk */
|
|
89
|
+
messages: Array<{ role: 'user' | 'assistant'; text: string }>;
|
|
90
|
+
/** Original timestamp (ISO 8601) if available */
|
|
91
|
+
timestamp?: string;
|
|
92
|
+
}
|
|
93
|
+
|
|
80
94
|
/**
|
|
81
95
|
* Adapter parse result — returned by each adapter's parse method.
|
|
96
|
+
*
|
|
97
|
+
* Adapters return EITHER `facts` (pre-structured sources like Mem0, MCP Memory)
|
|
98
|
+
* OR `chunks` (conversation-based sources like ChatGPT, Claude) that need
|
|
99
|
+
* LLM extraction. The caller checks which field is populated.
|
|
82
100
|
*/
|
|
83
101
|
export interface AdapterParseResult {
|
|
102
|
+
/** Pre-structured facts (Mem0, MCP Memory adapters) */
|
|
84
103
|
facts: NormalizedFact[];
|
|
104
|
+
/** Conversation chunks needing LLM extraction (ChatGPT, Claude adapters) */
|
|
105
|
+
chunks: ConversationChunk[];
|
|
106
|
+
/** Total message count across all chunks */
|
|
107
|
+
totalMessages: number;
|
|
85
108
|
warnings: string[];
|
|
86
109
|
errors: string[];
|
|
87
110
|
/** Metadata about the source (for logging) */
|