@totalreclaw/totalreclaw 3.3.1-rc.2 → 3.3.1-rc.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CHANGELOG.md +330 -0
  2. package/SKILL.md +50 -83
  3. package/api-client.ts +18 -11
  4. package/config.ts +117 -3
  5. package/crypto.ts +10 -2
  6. package/dist/api-client.js +226 -0
  7. package/dist/billing-cache.js +100 -0
  8. package/dist/claims-helper.js +606 -0
  9. package/dist/config.js +280 -0
  10. package/dist/consolidation.js +258 -0
  11. package/dist/contradiction-sync.js +1034 -0
  12. package/dist/crypto.js +138 -0
  13. package/dist/digest-sync.js +361 -0
  14. package/dist/download-ux.js +63 -0
  15. package/dist/embedding.js +86 -0
  16. package/dist/extractor.js +1225 -0
  17. package/dist/first-run.js +103 -0
  18. package/dist/fs-helpers.js +563 -0
  19. package/dist/gateway-url.js +197 -0
  20. package/dist/generate-mnemonic.js +13 -0
  21. package/dist/hot-cache-wrapper.js +101 -0
  22. package/dist/import-adapters/base-adapter.js +64 -0
  23. package/dist/import-adapters/chatgpt-adapter.js +238 -0
  24. package/dist/import-adapters/claude-adapter.js +114 -0
  25. package/dist/import-adapters/gemini-adapter.js +201 -0
  26. package/dist/import-adapters/index.js +26 -0
  27. package/dist/import-adapters/mcp-memory-adapter.js +219 -0
  28. package/dist/import-adapters/mem0-adapter.js +158 -0
  29. package/dist/import-adapters/types.js +1 -0
  30. package/dist/index.js +5348 -0
  31. package/dist/llm-client.js +686 -0
  32. package/dist/llm-profile-reader.js +346 -0
  33. package/dist/lsh.js +62 -0
  34. package/dist/onboarding-cli.js +750 -0
  35. package/dist/pair-cli.js +344 -0
  36. package/dist/pair-crypto.js +359 -0
  37. package/dist/pair-http.js +404 -0
  38. package/dist/pair-page.js +826 -0
  39. package/dist/pair-qr.js +107 -0
  40. package/dist/pair-remote-client.js +410 -0
  41. package/dist/pair-session-store.js +566 -0
  42. package/dist/pin.js +542 -0
  43. package/dist/qa-bug-report.js +301 -0
  44. package/dist/relay-headers.js +44 -0
  45. package/dist/reranker.js +442 -0
  46. package/dist/retype-setscope.js +348 -0
  47. package/dist/semantic-dedup.js +75 -0
  48. package/dist/subgraph-search.js +289 -0
  49. package/dist/subgraph-store.js +694 -0
  50. package/dist/tool-gating.js +58 -0
  51. package/download-ux.ts +91 -0
  52. package/embedding.ts +32 -9
  53. package/fs-helpers.ts +124 -0
  54. package/gateway-url.ts +57 -9
  55. package/index.ts +586 -357
  56. package/llm-client.ts +211 -23
  57. package/lsh.ts +7 -2
  58. package/onboarding-cli.ts +114 -1
  59. package/package.json +19 -5
  60. package/pair-cli.ts +76 -8
  61. package/pair-crypto.ts +34 -24
  62. package/pair-page.ts +28 -17
  63. package/pair-qr.ts +152 -0
  64. package/pair-remote-client.ts +540 -0
  65. package/qa-bug-report.ts +381 -0
  66. package/relay-headers.ts +50 -0
  67. package/reranker.ts +73 -0
  68. package/retype-setscope.ts +12 -0
  69. package/subgraph-search.ts +4 -3
  70. package/subgraph-store.ts +109 -16
@@ -0,0 +1,197 @@
1
+ /**
2
+ * gateway-url — autodetect the gateway's externally-reachable URL for QR
3
+ * pairing. This module runs sync + network-I/O-free so the OpenClaw
4
+ * dangerous-code scanner never flags it (the 3.3.1-rc.1 implementation
5
+ * used `child-process.execFileSync('tailscale', ...)` which blocked every
6
+ * `openclaw plugins install` — see QA report
7
+ * `docs/notes/QA-plugin-3.3.1-rc.1-20260422-0121.md`).
8
+ *
9
+ * Two layers:
10
+ *
11
+ * 1. Tailscale — PASSIVE detection via `os.networkInterfaces()`. If a
12
+ * `tailscale*` NIC has a CGNAT IPv4 (100.64/10), we return that IP
13
+ * as an auto-detected host — the operator can verify + override via
14
+ * `plugins.entries.totalreclaw.config.publicUrl` when they want a
15
+ * proper MagicDNS hostname. We DO NOT call `tailscale` the CLI —
16
+ * that requires `child-process` which the scanner blocks.
17
+ *
18
+ * 2. LAN — first non-loopback, non-virtual IPv4 interface. Emit with a
19
+ * caveat that the URL only works on the same network.
20
+ *
21
+ * 3. Null — no signal; caller falls through to localhost with a warning.
22
+ *
23
+ * The caller is expected to surface `detected.note` to the operator and
24
+ * tell them to set `publicUrl` when auto-detect isn't good enough
25
+ * (remote-accessible https, MagicDNS, etc.).
26
+ *
27
+ * Scope and scanner surface
28
+ * -------------------------
29
+ * - No `child-process` import — the original scanner-blocking flaw.
30
+ * - No `fetch` / `post` / `http.request` substrings — the potential-
31
+ * exfiltration rule is also clear.
32
+ * - Only `node:os` (synchronous, local) is used; no disk reads, no
33
+ * subprocess execution, no network calls.
34
+ */
35
+ import os from 'node:os';
36
+ // ---------------------------------------------------------------------------
37
+ // Tailscale — passive detection (no subprocess, no network I/O)
38
+ // ---------------------------------------------------------------------------
39
+ /** CGNAT range 100.64.0.0/10 — Tailscale assigns IPs here by default. */
40
+ function isTailscaleCGNAT(addr) {
41
+ if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(addr))
42
+ return false;
43
+ const parts = addr.split('.').map((p) => Number.parseInt(p, 10));
44
+ if (parts[0] !== 100)
45
+ return false;
46
+ return parts[1] >= 64 && parts[1] <= 127;
47
+ }
48
+ /**
49
+ * Passive Tailscale detection — checks `os.networkInterfaces()` for a
50
+ * `tailscale*` NIC carrying a CGNAT IPv4. Returns null if not found.
51
+ *
52
+ * Unlike rc.1, this does NOT shell out to `tailscale status` — that
53
+ * tripped the OpenClaw scanner's dangerous-code detector and blocked
54
+ * install. The trade-off: we surface the raw CGNAT IP instead of the
55
+ * MagicDNS hostname. Operators who want a MagicDNS host must set
56
+ * `plugins.entries.totalreclaw.config.publicUrl` explicitly (documented
57
+ * in SKILL.md).
58
+ */
59
+ export function detectTailscaleHost(options) {
60
+ const nif = (options?.networkInterfaces ?? os.networkInterfaces)();
61
+ for (const [name, addrs] of Object.entries(nif)) {
62
+ if (!name.toLowerCase().startsWith('tailscale'))
63
+ continue;
64
+ if (!addrs)
65
+ continue;
66
+ for (const a of addrs) {
67
+ if (a.family !== 'IPv4' || a.internal)
68
+ continue;
69
+ if (isTailscaleCGNAT(a.address)) {
70
+ return {
71
+ kind: 'tailscale',
72
+ host: a.address,
73
+ tls: false,
74
+ note: `Tailscale CGNAT IP detected on interface ${name}. For a proper ` +
75
+ `https://<magicdns>.ts.net URL, set plugins.entries.totalreclaw.config.publicUrl ` +
76
+ `(Tailscale CLI auto-resolution was removed in 3.3.1-rc.2 to pass the ` +
77
+ `OpenClaw security scanner).`,
78
+ };
79
+ }
80
+ }
81
+ }
82
+ return null;
83
+ }
84
+ // ---------------------------------------------------------------------------
85
+ // LAN autodetect
86
+ // ---------------------------------------------------------------------------
87
+ /** Interfaces we explicitly skip — these are virtual / tunneled. */
88
+ const SKIP_IFACE_PREFIXES = [
89
+ 'lo',
90
+ 'tailscale',
91
+ 'docker',
92
+ 'br-',
93
+ 'bridge',
94
+ 'veth',
95
+ 'utun',
96
+ 'vmnet',
97
+ 'ovpn',
98
+ 'wg',
99
+ 'virbr',
100
+ 'tun',
101
+ 'ham',
102
+ ];
103
+ function shouldSkipIface(name) {
104
+ const lower = name.toLowerCase();
105
+ return SKIP_IFACE_PREFIXES.some((p) => lower.startsWith(p));
106
+ }
107
+ /**
108
+ * Docker container internal IP detection — issue #110 fix 4.
109
+ *
110
+ * From INSIDE a Docker container, `eth0` carries the container's bridge IP
111
+ * (e.g. `172.18.0.2`). That IP is reachable from other containers on the
112
+ * SAME Docker network but NOT from the host browser, the user's phone, or
113
+ * any external device. Surfacing it as the pairing URL produces a hard-
114
+ * dead user experience: "scan QR" yields connection-refused.
115
+ *
116
+ * Docker default-bridge ranges:
117
+ * - 172.17.0.0/16 — `bridge` (default)
118
+ * - 172.18.0.0/16 .. 172.31.0.0/16 — user-defined networks
119
+ *
120
+ * We use the conservative test: 172.16.0.0/12 (the full RFC-1918 172.x
121
+ * range, which is what Docker draws from). If the host is clearly Docker
122
+ * (`/.dockerenv`), we treat 172.16-31.x.x AS Docker-internal and skip it.
123
+ *
124
+ * Outside Docker, 172.16.x.x can be a legitimate corporate LAN, so we
125
+ * only apply the rule when we have positive Docker evidence.
126
+ */
127
+ export function isDockerInternalIp(addr) {
128
+ if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(addr))
129
+ return false;
130
+ const parts = addr.split('.').map((p) => Number.parseInt(p, 10));
131
+ if (parts[0] !== 172)
132
+ return false;
133
+ return parts[1] >= 16 && parts[1] <= 31;
134
+ }
135
+ /**
136
+ * Pick the first non-loopback, non-virtual IPv4 address. Returns null if
137
+ * none found (headless VPS with only lo + tailscale, for example).
138
+ *
139
+ * issue #110 fix 4: when the host is detected as Docker (caller passes
140
+ * `isDocker: true`), skip Docker-bridge IPs in the 172.16/12 range — they
141
+ * are container-internal and useless for any external browser. Returning
142
+ * null from this function in that scenario lets `buildPairingUrl` fall
143
+ * through to the localhost-with-relay-fallback warning rather than handing
144
+ * the user a dead URL.
145
+ */
146
+ export function detectLanHost(options) {
147
+ const nif = (options?.networkInterfaces ?? os.networkInterfaces)();
148
+ for (const [name, addrs] of Object.entries(nif)) {
149
+ if (shouldSkipIface(name))
150
+ continue;
151
+ if (!addrs)
152
+ continue;
153
+ for (const a of addrs) {
154
+ if (a.family !== 'IPv4' || a.internal)
155
+ continue;
156
+ // issue #110 fix 4 — Docker container internal IP is unreachable
157
+ // from any external browser. Skip it so the caller falls back to
158
+ // the relay-brokered URL.
159
+ if (options?.isDocker && isDockerInternalIp(a.address))
160
+ continue;
161
+ return {
162
+ kind: 'lan',
163
+ host: a.address,
164
+ tls: false,
165
+ note: `LAN IPv4 on interface ${name} — only reachable from the same network.`,
166
+ };
167
+ }
168
+ }
169
+ return null;
170
+ }
171
+ // ---------------------------------------------------------------------------
172
+ // Composed resolver
173
+ // ---------------------------------------------------------------------------
174
+ /**
175
+ * Try Tailscale first (passive NIC probe), then LAN. Returns null when
176
+ * neither is available (caller falls through to localhost).
177
+ *
178
+ * Sync: no I/O, no subprocess, no network. Safe in sync callers like
179
+ * `buildPairingUrl` in index.ts.
180
+ *
181
+ * issue #110 fix 4: the `isDocker` option, when true, skips the 172.16/12
182
+ * Docker-bridge range during LAN detection. The caller (index.ts) passes
183
+ * `isRunningInDocker()` so we don't surface a container-internal IP that
184
+ * no external browser can reach.
185
+ */
186
+ export function detectGatewayHost(options) {
187
+ const ts = detectTailscaleHost({ networkInterfaces: options?.networkInterfaces });
188
+ if (ts)
189
+ return ts;
190
+ const lan = detectLanHost({
191
+ networkInterfaces: options?.networkInterfaces,
192
+ isDocker: options?.isDocker,
193
+ });
194
+ if (lan)
195
+ return lan;
196
+ return null;
197
+ }
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Generate a BIP-39 12-word mnemonic for use as TOTALRECLAW_RECOVERY_PHRASE.
4
+ *
5
+ * Usage: npx tsx generate-mnemonic.ts
6
+ */
7
+ import { generateMnemonic } from '@scure/bip39';
8
+ import { wordlist } from '@scure/bip39/wordlists/english.js';
9
+ const mnemonic = generateMnemonic(wordlist, 128);
10
+ console.log('\n Your TotalReclaw recovery phrase (12 words):\n');
11
+ console.log(` ${mnemonic}\n`);
12
+ console.log(' WRITE THIS DOWN. If you lose it, your memories are unrecoverable.');
13
+ console.log(' Set it as TOTALRECLAW_RECOVERY_PHRASE in your .env file.\n');
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Hot cache wrapper for the plugin.
3
+ *
4
+ * Self-contained XChaCha20-Poly1305 encrypted cache (same implementation as
5
+ * client/src/cache/hot-cache.ts but without cross-package import).
6
+ */
7
+ import crypto from 'node:crypto';
8
+ import fs from 'node:fs';
9
+ import path from 'node:path';
10
+ const MAX_HOT_FACTS = 30;
11
+ const IV_LENGTH = 12;
12
+ const TAG_LENGTH = 16;
13
+ export class PluginHotCache {
14
+ cachePath;
15
+ hotFacts = [];
16
+ factCount = 0;
17
+ lastSyncedBlock = 0;
18
+ smartAccountAddress = '';
19
+ lastUpdatedAt = 0;
20
+ lastQueryEmbedding = null;
21
+ key;
22
+ constructor(cachePath, hexKey) {
23
+ this.cachePath = cachePath;
24
+ this.key = Buffer.from(hexKey, 'hex');
25
+ }
26
+ getHotFacts() { return [...this.hotFacts]; }
27
+ getFactCount() { return this.factCount; }
28
+ getLastSyncedBlock() { return this.lastSyncedBlock; }
29
+ getSmartAccountAddress() { return this.smartAccountAddress; }
30
+ getLastUpdatedAt() { return this.lastUpdatedAt; }
31
+ getLastQueryEmbedding() { return this.lastQueryEmbedding ? [...this.lastQueryEmbedding] : null; }
32
+ setHotFacts(facts) {
33
+ const sorted = [...facts].sort((a, b) => b.importance - a.importance);
34
+ this.hotFacts = sorted.slice(0, MAX_HOT_FACTS);
35
+ this.lastUpdatedAt = Date.now();
36
+ }
37
+ setFactCount(count) { this.factCount = count; }
38
+ setLastSyncedBlock(block) { this.lastSyncedBlock = block; }
39
+ setSmartAccountAddress(addr) { this.smartAccountAddress = addr; }
40
+ setLastUpdatedAt(ts) { this.lastUpdatedAt = ts; }
41
+ setLastQueryEmbedding(embedding) { this.lastQueryEmbedding = embedding ? [...embedding] : null; }
42
+ /**
43
+ * Check if the cache is fresh (within TTL).
44
+ * @param ttlMs TTL in milliseconds (default: 5 minutes)
45
+ */
46
+ isFresh(ttlMs = 300_000) {
47
+ if (this.lastUpdatedAt === 0)
48
+ return false;
49
+ return (Date.now() - this.lastUpdatedAt) < ttlMs;
50
+ }
51
+ flush() {
52
+ const payload = {
53
+ hotFacts: this.hotFacts,
54
+ factCount: this.factCount,
55
+ lastSyncedBlock: this.lastSyncedBlock,
56
+ smartAccountAddress: this.smartAccountAddress,
57
+ lastUpdatedAt: this.lastUpdatedAt,
58
+ lastQueryEmbedding: this.lastQueryEmbedding,
59
+ };
60
+ const plaintext = Buffer.from(JSON.stringify(payload), 'utf-8');
61
+ const iv = crypto.randomBytes(IV_LENGTH);
62
+ const cipher = crypto.createCipheriv('aes-256-gcm', this.key, iv);
63
+ const encrypted = Buffer.concat([cipher.update(plaintext), cipher.final()]);
64
+ const tag = cipher.getAuthTag();
65
+ const output = Buffer.concat([iv, tag, encrypted]);
66
+ const dir = path.dirname(this.cachePath);
67
+ if (!fs.existsSync(dir))
68
+ fs.mkdirSync(dir, { recursive: true });
69
+ fs.writeFileSync(this.cachePath, output);
70
+ }
71
+ load() {
72
+ if (!fs.existsSync(this.cachePath))
73
+ return;
74
+ try {
75
+ const data = fs.readFileSync(this.cachePath);
76
+ if (data.length < IV_LENGTH + TAG_LENGTH)
77
+ return;
78
+ const iv = data.subarray(0, IV_LENGTH);
79
+ const tag = data.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
80
+ const ciphertext = data.subarray(IV_LENGTH + TAG_LENGTH);
81
+ const decipher = crypto.createDecipheriv('aes-256-gcm', this.key, iv);
82
+ decipher.setAuthTag(tag);
83
+ const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
84
+ const payload = JSON.parse(decrypted.toString('utf-8'));
85
+ this.hotFacts = payload.hotFacts || [];
86
+ this.factCount = payload.factCount || 0;
87
+ this.lastSyncedBlock = payload.lastSyncedBlock || 0;
88
+ this.smartAccountAddress = payload.smartAccountAddress || '';
89
+ this.lastUpdatedAt = payload.lastUpdatedAt || 0;
90
+ this.lastQueryEmbedding = payload.lastQueryEmbedding || null;
91
+ }
92
+ catch {
93
+ this.hotFacts = [];
94
+ this.factCount = 0;
95
+ this.lastSyncedBlock = 0;
96
+ this.smartAccountAddress = '';
97
+ this.lastUpdatedAt = 0;
98
+ this.lastQueryEmbedding = null;
99
+ }
100
+ }
101
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Abstract base class for import adapters.
3
+ *
4
+ * Adapters are PARSERS only — they convert raw export data into either:
5
+ * - Pre-structured facts (Mem0, MCP Memory — facts are already atomic)
6
+ * - Conversation chunks (ChatGPT, Claude — need LLM extraction)
7
+ *
8
+ * The caller (import tool) handles LLM extraction, encryption, and storage.
9
+ */
10
+ export class BaseImportAdapter {
11
+ /**
12
+ * Validate and clean a single fact.
13
+ * Returns null if the fact should be skipped.
14
+ */
15
+ validateFact(fact) {
16
+ // Text is required and must be non-empty
17
+ if (!fact.text || typeof fact.text !== 'string' || fact.text.trim().length < 3) {
18
+ return null;
19
+ }
20
+ // Truncate to 512 chars
21
+ const text = fact.text.trim().slice(0, 512);
22
+ // Normalize type
23
+ const validTypes = ['fact', 'preference', 'decision', 'episodic', 'goal', 'context', 'summary'];
24
+ const type = validTypes.includes(fact.type)
25
+ ? fact.type
26
+ : 'fact';
27
+ // Normalize importance to 1-10
28
+ let importance = fact.importance ?? 5;
29
+ if (importance < 0 || importance > 1) {
30
+ // Already on 1-10 scale
31
+ importance = Math.max(1, Math.min(10, Math.round(importance)));
32
+ }
33
+ else {
34
+ // 0-1 scale — convert to 1-10
35
+ importance = Math.max(1, Math.round(importance * 10));
36
+ }
37
+ return {
38
+ text,
39
+ type,
40
+ importance,
41
+ source: fact.source ?? this.source,
42
+ sourceId: fact.sourceId,
43
+ sourceTimestamp: fact.sourceTimestamp,
44
+ tags: fact.tags,
45
+ };
46
+ }
47
+ /**
48
+ * Batch-validate an array of partial facts.
49
+ */
50
+ validateFacts(rawFacts) {
51
+ const facts = [];
52
+ let invalidCount = 0;
53
+ for (const raw of rawFacts) {
54
+ const validated = this.validateFact(raw);
55
+ if (validated) {
56
+ facts.push(validated);
57
+ }
58
+ else {
59
+ invalidCount++;
60
+ }
61
+ }
62
+ return { facts, invalidCount };
63
+ }
64
+ }
@@ -0,0 +1,238 @@
1
+ import { BaseImportAdapter } from './base-adapter.js';
2
+ import fs from 'node:fs';
3
+ import os from 'node:os';
4
+ /** Maximum messages per conversation chunk for LLM extraction. */
5
+ const CHUNK_SIZE = 20;
6
+ // ── ChatGPT Adapter ─────────────────────────────────────────────────────────
7
+ export class ChatGPTAdapter extends BaseImportAdapter {
8
+ source = 'chatgpt';
9
+ displayName = 'ChatGPT';
10
+ async parse(input, onProgress) {
11
+ const warnings = [];
12
+ const errors = [];
13
+ let content;
14
+ if (input.content) {
15
+ content = input.content;
16
+ }
17
+ else if (input.file_path) {
18
+ try {
19
+ const resolvedPath = input.file_path.replace(/^~/, os.homedir());
20
+ content = fs.readFileSync(resolvedPath, 'utf-8');
21
+ }
22
+ catch (e) {
23
+ errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
24
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
25
+ }
26
+ }
27
+ else {
28
+ errors.push('ChatGPT import requires either content (pasted text or JSON) or file_path. ' +
29
+ 'Export from ChatGPT: Settings -> Data Controls -> Export Data (conversations.json), ' +
30
+ 'or copy from Settings -> Personalization -> Memory -> Manage.');
31
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
32
+ }
33
+ // Detect format: JSON array = conversations.json, plain text = memories
34
+ const trimmed = content.trim();
35
+ if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
36
+ // Try to parse as JSON (conversations.json or memory list)
37
+ return this.parseConversationsJson(trimmed, warnings, errors, onProgress);
38
+ }
39
+ // Plain text: ChatGPT memories (one per line)
40
+ return this.parseMemoriesText(trimmed, warnings, errors, onProgress);
41
+ }
42
+ /**
43
+ * Parse ChatGPT conversations.json — full export with mapping tree.
44
+ * Returns conversation chunks for LLM extraction (no pattern matching).
45
+ */
46
+ parseConversationsJson(content, warnings, errors, onProgress) {
47
+ let conversations;
48
+ try {
49
+ const data = JSON.parse(content);
50
+ if (Array.isArray(data)) {
51
+ conversations = data;
52
+ }
53
+ else if (data.conversations && Array.isArray(data.conversations)) {
54
+ conversations = data.conversations;
55
+ }
56
+ else if (data.mapping) {
57
+ // Single conversation object
58
+ conversations = [data];
59
+ }
60
+ else {
61
+ errors.push('Unrecognized ChatGPT format. Expected an array of conversation objects (conversations.json) ' +
62
+ 'or plain text (ChatGPT memories).');
63
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
64
+ }
65
+ }
66
+ catch (e) {
67
+ errors.push(`Failed to parse ChatGPT JSON: ${e instanceof Error ? e.message : 'Unknown error'}`);
68
+ return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
69
+ }
70
+ if (onProgress) {
71
+ onProgress({
72
+ current: 0,
73
+ total: conversations.length,
74
+ phase: 'parsing',
75
+ message: `Parsing ${conversations.length} ChatGPT conversations...`,
76
+ });
77
+ }
78
+ const chunks = [];
79
+ let totalMessages = 0;
80
+ let convIndex = 0;
81
+ for (const conv of conversations) {
82
+ if (!conv.mapping) {
83
+ warnings.push(`Conversation "${conv.title || 'untitled'}" has no mapping — skipped`);
84
+ continue;
85
+ }
86
+ // Extract user + assistant messages in chronological order
87
+ const messages = this.extractMessages(conv.mapping);
88
+ if (messages.length === 0)
89
+ continue;
90
+ totalMessages += messages.length;
91
+ // Determine timestamp from first message or conversation
92
+ const timestamp = conv.create_time
93
+ ? new Date(conv.create_time * 1000).toISOString()
94
+ : undefined;
95
+ const title = conv.title || 'Untitled Conversation';
96
+ // Chunk into batches of CHUNK_SIZE messages
97
+ for (let i = 0; i < messages.length; i += CHUNK_SIZE) {
98
+ const batch = messages.slice(i, i + CHUNK_SIZE);
99
+ const chunkIndex = Math.floor(i / CHUNK_SIZE) + 1;
100
+ const totalChunks = Math.ceil(messages.length / CHUNK_SIZE);
101
+ chunks.push({
102
+ title: totalChunks > 1 ? `${title} (part ${chunkIndex}/${totalChunks})` : title,
103
+ messages: batch,
104
+ timestamp,
105
+ });
106
+ }
107
+ convIndex++;
108
+ if (onProgress && convIndex % 50 === 0) {
109
+ onProgress({
110
+ current: convIndex,
111
+ total: conversations.length,
112
+ phase: 'parsing',
113
+ message: `Parsed ${convIndex}/${conversations.length} conversations (${chunks.length} chunks, ${totalMessages} messages)...`,
114
+ });
115
+ }
116
+ }
117
+ if (chunks.length === 0 && conversations.length > 0) {
118
+ warnings.push(`Parsed ${conversations.length} conversations but found no messages with text content.`);
119
+ }
120
+ return {
121
+ facts: [],
122
+ chunks,
123
+ totalMessages,
124
+ warnings,
125
+ errors,
126
+ source_metadata: {
127
+ format: 'conversations.json',
128
+ conversations_count: conversations.length,
129
+ chunks_count: chunks.length,
130
+ total_messages: totalMessages,
131
+ },
132
+ };
133
+ }
134
+ /**
135
+ * Parse ChatGPT memories — plain text, one memory per line.
136
+ * Users copy this from Settings -> Personalization -> Memory -> Manage.
137
+ *
138
+ * Each line becomes a single-message conversation chunk for LLM extraction.
139
+ */
140
+ parseMemoriesText(content, warnings, errors, onProgress) {
141
+ // Split by newlines and filter empty lines
142
+ const lines = content.split('\n')
143
+ .map((line) => line.trim())
144
+ .filter((line) => line.length > 0)
145
+ // Skip common header lines
146
+ .filter((line) => !/^(?:memories?|chatgpt memories?|my memories?|saved memories?):?\s*$/i.test(line));
147
+ if (onProgress) {
148
+ onProgress({
149
+ current: 0,
150
+ total: lines.length,
151
+ phase: 'parsing',
152
+ message: `Parsing ${lines.length} ChatGPT memories...`,
153
+ });
154
+ }
155
+ // Clean lines: strip bullet/dash/number markers
156
+ const cleanedLines = lines.map((line) => line
157
+ .replace(/^[-*\u2022]\s+/, '') // bullet points
158
+ .replace(/^\d+[.)]\s+/, '') // numbered lists
159
+ .trim()).filter((line) => line.length >= 3);
160
+ // Group all memories into chunks of CHUNK_SIZE for efficient LLM extraction
161
+ const chunks = [];
162
+ for (let i = 0; i < cleanedLines.length; i += CHUNK_SIZE) {
163
+ const batch = cleanedLines.slice(i, i + CHUNK_SIZE);
164
+ chunks.push({
165
+ title: `ChatGPT memories (${i + 1}-${Math.min(i + CHUNK_SIZE, cleanedLines.length)})`,
166
+ messages: batch.map((text) => ({ role: 'user', text })),
167
+ });
168
+ }
169
+ return {
170
+ facts: [],
171
+ chunks,
172
+ totalMessages: cleanedLines.length,
173
+ warnings,
174
+ errors,
175
+ source_metadata: {
176
+ format: 'memories-text',
177
+ total_lines: lines.length,
178
+ chunks_count: chunks.length,
179
+ },
180
+ };
181
+ }
182
+ /**
183
+ * Traverse the mapping tree and extract user + assistant messages in chronological order.
184
+ * Both roles are included because the assistant's response often provides context
185
+ * that helps the LLM understand what the user meant.
186
+ */
187
+ extractMessages(mapping) {
188
+ // Find the root node (the one with no parent or parent not in mapping)
189
+ let rootId;
190
+ for (const [id, node] of Object.entries(mapping)) {
191
+ if (!node.parent || !mapping[node.parent]) {
192
+ rootId = id;
193
+ break;
194
+ }
195
+ }
196
+ if (!rootId)
197
+ return [];
198
+ // Walk the tree breadth-first, following children in order (main thread)
199
+ const messages = [];
200
+ const visited = new Set();
201
+ const queue = [rootId];
202
+ while (queue.length > 0) {
203
+ const nodeId = queue.shift();
204
+ if (visited.has(nodeId))
205
+ continue;
206
+ visited.add(nodeId);
207
+ const node = mapping[nodeId];
208
+ if (!node)
209
+ continue;
210
+ const role = node.message?.author?.role;
211
+ // Only collect user and assistant messages (skip system, tool)
212
+ if (role === 'user' || role === 'assistant') {
213
+ const textParts = this.extractTextFromParts(node.message?.content?.parts);
214
+ if (textParts && textParts.length >= 3) {
215
+ messages.push({ role, text: textParts });
216
+ }
217
+ }
218
+ // Follow children (add them to queue in order)
219
+ for (const childId of node.children || []) {
220
+ queue.push(childId);
221
+ }
222
+ }
223
+ return messages;
224
+ }
225
+ /**
226
+ * Extract plain text from message content parts.
227
+ * Parts can be strings, null, or complex objects (images, etc.) -- we only want strings.
228
+ */
229
+ extractTextFromParts(parts) {
230
+ if (!parts || parts.length === 0)
231
+ return null;
232
+ const textParts = parts
233
+ .filter((p) => typeof p === 'string' && p.trim().length > 0);
234
+ if (textParts.length === 0)
235
+ return null;
236
+ return textParts.join(' ').trim();
237
+ }
238
+ }