@totalreclaw/totalreclaw 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,12 +8,11 @@ import type {
8
8
  /**
9
9
  * Abstract base class for import adapters.
10
10
  *
11
- * Each adapter:
12
- * 1. Fetches or reads source data
13
- * 2. Parses into NormalizedFact[]
14
- * 3. Validates each fact
11
+ * Adapters are PARSERS only — they convert raw export data into either:
12
+ * - Pre-structured facts (Mem0, MCP Memory — facts are already atomic)
13
+ * - Conversation chunks (ChatGPT, Claude — need LLM extraction)
15
14
  *
16
- * The caller (import tool) handles encryption + storage.
15
+ * The caller (import tool) handles LLM extraction, encryption, and storage.
17
16
  */
18
17
  export abstract class BaseImportAdapter {
19
18
  abstract readonly source: ImportSource;
@@ -0,0 +1,323 @@
1
+ import { BaseImportAdapter } from './base-adapter.js';
2
+ import type {
3
+ ImportSource,
4
+ AdapterParseResult,
5
+ ConversationChunk,
6
+ ProgressCallback,
7
+ } from './types.js';
8
+ import fs from 'node:fs';
9
+ import os from 'node:os';
10
+
11
+ // ── ChatGPT conversations.json types ────────────────────────────────────────
12
+
13
+ interface ChatGPTMessage {
14
+ id: string;
15
+ author: { role: 'user' | 'assistant' | 'system' | 'tool'; name?: string };
16
+ content: {
17
+ content_type: string;
18
+ parts?: (string | null | Record<string, unknown>)[];
19
+ };
20
+ create_time?: number;
21
+ metadata?: Record<string, unknown>;
22
+ }
23
+
24
+ interface ChatGPTMappingNode {
25
+ id: string;
26
+ message?: ChatGPTMessage | null;
27
+ parent?: string | null;
28
+ children: string[];
29
+ }
30
+
31
+ interface ChatGPTConversation {
32
+ id?: string;
33
+ title?: string;
34
+ create_time?: number;
35
+ update_time?: number;
36
+ mapping: Record<string, ChatGPTMappingNode>;
37
+ }
38
+
39
+ /** Maximum messages per conversation chunk for LLM extraction. */
40
+ const CHUNK_SIZE = 20;
41
+
42
+ // ── ChatGPT Adapter ─────────────────────────────────────────────────────────
43
+
44
+ export class ChatGPTAdapter extends BaseImportAdapter {
45
+ readonly source: ImportSource = 'chatgpt';
46
+ readonly displayName = 'ChatGPT';
47
+
48
+ async parse(
49
+ input: { content?: string; file_path?: string },
50
+ onProgress?: ProgressCallback,
51
+ ): Promise<AdapterParseResult> {
52
+ const warnings: string[] = [];
53
+ const errors: string[] = [];
54
+
55
+ let content: string;
56
+
57
+ if (input.content) {
58
+ content = input.content;
59
+ } else if (input.file_path) {
60
+ try {
61
+ const resolvedPath = input.file_path.replace(/^~/, os.homedir());
62
+ content = fs.readFileSync(resolvedPath, 'utf-8');
63
+ } catch (e) {
64
+ errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
65
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
66
+ }
67
+ } else {
68
+ errors.push(
69
+ 'ChatGPT import requires either content (pasted text or JSON) or file_path. ' +
70
+ 'Export from ChatGPT: Settings -> Data Controls -> Export Data (conversations.json), ' +
71
+ 'or copy from Settings -> Personalization -> Memory -> Manage.',
72
+ );
73
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
74
+ }
75
+
76
+ // Detect format: JSON array = conversations.json, plain text = memories
77
+ const trimmed = content.trim();
78
+
79
+ if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
80
+ // Try to parse as JSON (conversations.json or memory list)
81
+ return this.parseConversationsJson(trimmed, warnings, errors, onProgress);
82
+ }
83
+
84
+ // Plain text: ChatGPT memories (one per line)
85
+ return this.parseMemoriesText(trimmed, warnings, errors, onProgress);
86
+ }
87
+
88
+ /**
89
+ * Parse ChatGPT conversations.json — full export with mapping tree.
90
+ * Returns conversation chunks for LLM extraction (no pattern matching).
91
+ */
92
+ private parseConversationsJson(
93
+ content: string,
94
+ warnings: string[],
95
+ errors: string[],
96
+ onProgress?: ProgressCallback,
97
+ ): AdapterParseResult {
98
+ let conversations: ChatGPTConversation[];
99
+
100
+ try {
101
+ const data = JSON.parse(content);
102
+
103
+ if (Array.isArray(data)) {
104
+ conversations = data;
105
+ } else if (data.conversations && Array.isArray(data.conversations)) {
106
+ conversations = data.conversations;
107
+ } else if (data.mapping) {
108
+ // Single conversation object
109
+ conversations = [data];
110
+ } else {
111
+ errors.push(
112
+ 'Unrecognized ChatGPT format. Expected an array of conversation objects (conversations.json) ' +
113
+ 'or plain text (ChatGPT memories).',
114
+ );
115
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
116
+ }
117
+ } catch (e) {
118
+ errors.push(`Failed to parse ChatGPT JSON: ${e instanceof Error ? e.message : 'Unknown error'}`);
119
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
120
+ }
121
+
122
+ if (onProgress) {
123
+ onProgress({
124
+ current: 0,
125
+ total: conversations.length,
126
+ phase: 'parsing',
127
+ message: `Parsing ${conversations.length} ChatGPT conversations...`,
128
+ });
129
+ }
130
+
131
+ const chunks: ConversationChunk[] = [];
132
+ let totalMessages = 0;
133
+ let convIndex = 0;
134
+
135
+ for (const conv of conversations) {
136
+ if (!conv.mapping) {
137
+ warnings.push(`Conversation "${conv.title || 'untitled'}" has no mapping — skipped`);
138
+ continue;
139
+ }
140
+
141
+ // Extract user + assistant messages in chronological order
142
+ const messages = this.extractMessages(conv.mapping);
143
+ if (messages.length === 0) continue;
144
+
145
+ totalMessages += messages.length;
146
+
147
+ // Determine timestamp from first message or conversation
148
+ const timestamp = conv.create_time
149
+ ? new Date(conv.create_time * 1000).toISOString()
150
+ : undefined;
151
+
152
+ const title = conv.title || 'Untitled Conversation';
153
+
154
+ // Chunk into batches of CHUNK_SIZE messages
155
+ for (let i = 0; i < messages.length; i += CHUNK_SIZE) {
156
+ const batch = messages.slice(i, i + CHUNK_SIZE);
157
+ const chunkIndex = Math.floor(i / CHUNK_SIZE) + 1;
158
+ const totalChunks = Math.ceil(messages.length / CHUNK_SIZE);
159
+
160
+ chunks.push({
161
+ title: totalChunks > 1 ? `${title} (part ${chunkIndex}/${totalChunks})` : title,
162
+ messages: batch,
163
+ timestamp,
164
+ });
165
+ }
166
+
167
+ convIndex++;
168
+ if (onProgress && convIndex % 50 === 0) {
169
+ onProgress({
170
+ current: convIndex,
171
+ total: conversations.length,
172
+ phase: 'parsing',
173
+ message: `Parsed ${convIndex}/${conversations.length} conversations (${chunks.length} chunks, ${totalMessages} messages)...`,
174
+ });
175
+ }
176
+ }
177
+
178
+ if (chunks.length === 0 && conversations.length > 0) {
179
+ warnings.push(
180
+ `Parsed ${conversations.length} conversations but found no messages with text content.`,
181
+ );
182
+ }
183
+
184
+ return {
185
+ facts: [],
186
+ chunks,
187
+ totalMessages,
188
+ warnings,
189
+ errors,
190
+ source_metadata: {
191
+ format: 'conversations.json',
192
+ conversations_count: conversations.length,
193
+ chunks_count: chunks.length,
194
+ total_messages: totalMessages,
195
+ },
196
+ };
197
+ }
198
+
199
+ /**
200
+ * Parse ChatGPT memories — plain text, one memory per line.
201
+ * Users copy this from Settings -> Personalization -> Memory -> Manage.
202
+ *
203
+ * Each line becomes a single-message conversation chunk for LLM extraction.
204
+ */
205
+ private parseMemoriesText(
206
+ content: string,
207
+ warnings: string[],
208
+ errors: string[],
209
+ onProgress?: ProgressCallback,
210
+ ): AdapterParseResult {
211
+ // Split by newlines and filter empty lines
212
+ const lines = content.split('\n')
213
+ .map((line) => line.trim())
214
+ .filter((line) => line.length > 0)
215
+ // Skip common header lines
216
+ .filter((line) => !/^(?:memories?|chatgpt memories?|my memories?|saved memories?):?\s*$/i.test(line));
217
+
218
+ if (onProgress) {
219
+ onProgress({
220
+ current: 0,
221
+ total: lines.length,
222
+ phase: 'parsing',
223
+ message: `Parsing ${lines.length} ChatGPT memories...`,
224
+ });
225
+ }
226
+
227
+ // Clean lines: strip bullet/dash/number markers
228
+ const cleanedLines = lines.map((line) =>
229
+ line
230
+ .replace(/^[-*\u2022]\s+/, '') // bullet points
231
+ .replace(/^\d+[.)]\s+/, '') // numbered lists
232
+ .trim(),
233
+ ).filter((line) => line.length >= 3);
234
+
235
+ // Group all memories into chunks of CHUNK_SIZE for efficient LLM extraction
236
+ const chunks: ConversationChunk[] = [];
237
+ for (let i = 0; i < cleanedLines.length; i += CHUNK_SIZE) {
238
+ const batch = cleanedLines.slice(i, i + CHUNK_SIZE);
239
+ chunks.push({
240
+ title: `ChatGPT memories (${i + 1}-${Math.min(i + CHUNK_SIZE, cleanedLines.length)})`,
241
+ messages: batch.map((text) => ({ role: 'user' as const, text })),
242
+ });
243
+ }
244
+
245
+ return {
246
+ facts: [],
247
+ chunks,
248
+ totalMessages: cleanedLines.length,
249
+ warnings,
250
+ errors,
251
+ source_metadata: {
252
+ format: 'memories-text',
253
+ total_lines: lines.length,
254
+ chunks_count: chunks.length,
255
+ },
256
+ };
257
+ }
258
+
259
+ /**
260
+ * Traverse the mapping tree and extract user + assistant messages in chronological order.
261
+ * Both roles are included because the assistant's response often provides context
262
+ * that helps the LLM understand what the user meant.
263
+ */
264
+ private extractMessages(
265
+ mapping: Record<string, ChatGPTMappingNode>,
266
+ ): Array<{ role: 'user' | 'assistant'; text: string }> {
267
+ // Find the root node (the one with no parent or parent not in mapping)
268
+ let rootId: string | undefined;
269
+ for (const [id, node] of Object.entries(mapping)) {
270
+ if (!node.parent || !mapping[node.parent]) {
271
+ rootId = id;
272
+ break;
273
+ }
274
+ }
275
+
276
+ if (!rootId) return [];
277
+
278
+ // Walk the tree breadth-first, following children in order (main thread)
279
+ const messages: Array<{ role: 'user' | 'assistant'; text: string }> = [];
280
+ const visited = new Set<string>();
281
+ const queue: string[] = [rootId];
282
+
283
+ while (queue.length > 0) {
284
+ const nodeId = queue.shift()!;
285
+ if (visited.has(nodeId)) continue;
286
+ visited.add(nodeId);
287
+
288
+ const node = mapping[nodeId];
289
+ if (!node) continue;
290
+
291
+ const role = node.message?.author?.role;
292
+ // Only collect user and assistant messages (skip system, tool)
293
+ if (role === 'user' || role === 'assistant') {
294
+ const textParts = this.extractTextFromParts(node.message?.content?.parts);
295
+ if (textParts && textParts.length >= 3) {
296
+ messages.push({ role, text: textParts });
297
+ }
298
+ }
299
+
300
+ // Follow children (add them to queue in order)
301
+ for (const childId of node.children || []) {
302
+ queue.push(childId);
303
+ }
304
+ }
305
+
306
+ return messages;
307
+ }
308
+
309
+ /**
310
+ * Extract plain text from message content parts.
311
+ * Parts can be strings, null, or complex objects (images, etc.) -- we only want strings.
312
+ */
313
+ private extractTextFromParts(parts?: (string | null | Record<string, unknown>)[]): string | null {
314
+ if (!parts || parts.length === 0) return null;
315
+
316
+ const textParts = parts
317
+ .filter((p): p is string => typeof p === 'string' && p.trim().length > 0);
318
+
319
+ if (textParts.length === 0) return null;
320
+
321
+ return textParts.join(' ').trim();
322
+ }
323
+ }
@@ -0,0 +1,146 @@
1
+ import { BaseImportAdapter } from './base-adapter.js';
2
+ import type {
3
+ ImportSource,
4
+ AdapterParseResult,
5
+ ConversationChunk,
6
+ ProgressCallback,
7
+ } from './types.js';
8
+ import fs from 'node:fs';
9
+ import os from 'node:os';
10
+
11
+ /**
12
+ * Pattern for lines that start with a date prefix.
13
+ * Claude memory entries sometimes have: [2026-03-15] - User prefers TypeScript
14
+ */
15
+ const DATE_PREFIX_RE = /^\[(\d{4}-\d{2}-\d{2})\]\s*[-:]\s*/;
16
+
17
+ /**
18
+ * Pattern for bullet-prefixed lines.
19
+ */
20
+ const BULLET_PREFIX_RE = /^[-*\u2022]\s+/;
21
+
22
+ /**
23
+ * Pattern for numbered list lines.
24
+ */
25
+ const NUMBERED_PREFIX_RE = /^\d+[.)]\s+/;
26
+
27
+ /** Maximum messages per conversation chunk for LLM extraction. */
28
+ const CHUNK_SIZE = 20;
29
+
30
+ export class ClaudeAdapter extends BaseImportAdapter {
31
+ readonly source: ImportSource = 'claude';
32
+ readonly displayName = 'Claude';
33
+
34
+ async parse(
35
+ input: { content?: string; file_path?: string },
36
+ onProgress?: ProgressCallback,
37
+ ): Promise<AdapterParseResult> {
38
+ const warnings: string[] = [];
39
+ const errors: string[] = [];
40
+
41
+ let content: string;
42
+
43
+ if (input.content) {
44
+ content = input.content;
45
+ } else if (input.file_path) {
46
+ try {
47
+ const resolvedPath = input.file_path.replace(/^~/, os.homedir());
48
+ content = fs.readFileSync(resolvedPath, 'utf-8');
49
+ } catch (e) {
50
+ errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
51
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
52
+ }
53
+ } else {
54
+ errors.push(
55
+ 'Claude import requires either content (pasted text) or file_path. ' +
56
+ 'Copy your memories from Claude: Settings -> Memory -> select all and copy.',
57
+ );
58
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
59
+ }
60
+
61
+ // Claude memory export is plain text, one fact per line.
62
+ return this.parseMemoriesText(content.trim(), warnings, errors, onProgress);
63
+ }
64
+
65
+ /**
66
+ * Parse Claude memories — plain text, one memory per line.
67
+ * Returns conversation chunks for LLM extraction (no pattern matching).
68
+ *
69
+ * Each line is cleaned (date prefixes, bullets, numbers stripped) and
70
+ * grouped into chunks for the LLM to process.
71
+ */
72
+ private parseMemoriesText(
73
+ content: string,
74
+ warnings: string[],
75
+ errors: string[],
76
+ onProgress?: ProgressCallback,
77
+ ): AdapterParseResult {
78
+ // Split by newlines and filter
79
+ const lines = content.split('\n')
80
+ .map((line) => line.trim())
81
+ .filter((line) => line.length > 0)
82
+ // Skip common header lines
83
+ .filter((line) => !/^(?:memories?|claude memories?|my memories?|saved memories?):?\s*$/i.test(line));
84
+
85
+ if (onProgress) {
86
+ onProgress({
87
+ current: 0,
88
+ total: lines.length,
89
+ phase: 'parsing',
90
+ message: `Parsing ${lines.length} Claude memories...`,
91
+ });
92
+ }
93
+
94
+ // Clean each line: extract date, strip formatting
95
+ const cleanedEntries: Array<{ text: string; timestamp?: string }> = [];
96
+ for (const line of lines) {
97
+ let cleaned = line;
98
+ let timestamp: string | undefined;
99
+
100
+ // Extract date prefix if present
101
+ const dateMatch = cleaned.match(DATE_PREFIX_RE);
102
+ if (dateMatch) {
103
+ timestamp = dateMatch[1];
104
+ cleaned = cleaned.replace(DATE_PREFIX_RE, '');
105
+ }
106
+
107
+ // Strip bullet/numbering markers
108
+ cleaned = cleaned
109
+ .replace(BULLET_PREFIX_RE, '')
110
+ .replace(NUMBERED_PREFIX_RE, '')
111
+ .trim();
112
+
113
+ if (cleaned.length >= 3) {
114
+ cleanedEntries.push({ text: cleaned, timestamp });
115
+ }
116
+ }
117
+
118
+ // Group memories into chunks of CHUNK_SIZE for efficient LLM extraction
119
+ const chunks: ConversationChunk[] = [];
120
+ for (let i = 0; i < cleanedEntries.length; i += CHUNK_SIZE) {
121
+ const batch = cleanedEntries.slice(i, i + CHUNK_SIZE);
122
+
123
+ // Use the timestamp from the first entry in the batch (if available)
124
+ const batchTimestamp = batch.find((e) => e.timestamp)?.timestamp;
125
+
126
+ chunks.push({
127
+ title: `Claude memories (${i + 1}-${Math.min(i + CHUNK_SIZE, cleanedEntries.length)})`,
128
+ messages: batch.map((entry) => ({ role: 'user' as const, text: entry.text })),
129
+ timestamp: batchTimestamp,
130
+ });
131
+ }
132
+
133
+ return {
134
+ facts: [],
135
+ chunks,
136
+ totalMessages: cleanedEntries.length,
137
+ warnings,
138
+ errors,
139
+ source_metadata: {
140
+ format: 'memories-text',
141
+ total_lines: lines.length,
142
+ chunks_count: chunks.length,
143
+ },
144
+ };
145
+ }
146
+ }