@totalreclaw/totalreclaw 3.3.1-rc.2 → 3.3.1-rc.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +330 -0
- package/SKILL.md +50 -83
- package/api-client.ts +18 -11
- package/config.ts +117 -3
- package/crypto.ts +10 -2
- package/dist/api-client.js +226 -0
- package/dist/billing-cache.js +100 -0
- package/dist/claims-helper.js +606 -0
- package/dist/config.js +280 -0
- package/dist/consolidation.js +258 -0
- package/dist/contradiction-sync.js +1034 -0
- package/dist/crypto.js +138 -0
- package/dist/digest-sync.js +361 -0
- package/dist/download-ux.js +63 -0
- package/dist/embedding.js +86 -0
- package/dist/extractor.js +1225 -0
- package/dist/first-run.js +103 -0
- package/dist/fs-helpers.js +563 -0
- package/dist/gateway-url.js +197 -0
- package/dist/generate-mnemonic.js +13 -0
- package/dist/hot-cache-wrapper.js +101 -0
- package/dist/import-adapters/base-adapter.js +64 -0
- package/dist/import-adapters/chatgpt-adapter.js +238 -0
- package/dist/import-adapters/claude-adapter.js +114 -0
- package/dist/import-adapters/gemini-adapter.js +201 -0
- package/dist/import-adapters/index.js +26 -0
- package/dist/import-adapters/mcp-memory-adapter.js +219 -0
- package/dist/import-adapters/mem0-adapter.js +158 -0
- package/dist/import-adapters/types.js +1 -0
- package/dist/index.js +5348 -0
- package/dist/llm-client.js +686 -0
- package/dist/llm-profile-reader.js +346 -0
- package/dist/lsh.js +62 -0
- package/dist/onboarding-cli.js +750 -0
- package/dist/pair-cli.js +344 -0
- package/dist/pair-crypto.js +359 -0
- package/dist/pair-http.js +404 -0
- package/dist/pair-page.js +826 -0
- package/dist/pair-qr.js +107 -0
- package/dist/pair-remote-client.js +410 -0
- package/dist/pair-session-store.js +566 -0
- package/dist/pin.js +542 -0
- package/dist/qa-bug-report.js +301 -0
- package/dist/relay-headers.js +44 -0
- package/dist/reranker.js +442 -0
- package/dist/retype-setscope.js +348 -0
- package/dist/semantic-dedup.js +75 -0
- package/dist/subgraph-search.js +289 -0
- package/dist/subgraph-store.js +694 -0
- package/dist/tool-gating.js +58 -0
- package/download-ux.ts +91 -0
- package/embedding.ts +32 -9
- package/fs-helpers.ts +124 -0
- package/gateway-url.ts +57 -9
- package/index.ts +586 -357
- package/llm-client.ts +211 -23
- package/lsh.ts +7 -2
- package/onboarding-cli.ts +114 -1
- package/package.json +19 -5
- package/pair-cli.ts +76 -8
- package/pair-crypto.ts +34 -24
- package/pair-page.ts +28 -17
- package/pair-qr.ts +152 -0
- package/pair-remote-client.ts +540 -0
- package/qa-bug-report.ts +381 -0
- package/relay-headers.ts +50 -0
- package/reranker.ts +73 -0
- package/retype-setscope.ts +12 -0
- package/subgraph-search.ts +4 -3
- package/subgraph-store.ts +109 -16
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* gateway-url — autodetect the gateway's externally-reachable URL for QR
|
|
3
|
+
* pairing. This module runs sync + network-I/O-free so the OpenClaw
|
|
4
|
+
* dangerous-code scanner never flags it (the 3.3.1-rc.1 implementation
|
|
5
|
+
* used `child-process.execFileSync('tailscale', ...)` which blocked every
|
|
6
|
+
* `openclaw plugins install` — see QA report
|
|
7
|
+
* `docs/notes/QA-plugin-3.3.1-rc.1-20260422-0121.md`).
|
|
8
|
+
*
|
|
9
|
+
* Two layers:
|
|
10
|
+
*
|
|
11
|
+
* 1. Tailscale — PASSIVE detection via `os.networkInterfaces()`. If a
|
|
12
|
+
* `tailscale*` NIC has a CGNAT IPv4 (100.64/10), we return that IP
|
|
13
|
+
* as an auto-detected host — the operator can verify + override via
|
|
14
|
+
* `plugins.entries.totalreclaw.config.publicUrl` when they want a
|
|
15
|
+
* proper MagicDNS hostname. We DO NOT call `tailscale` the CLI —
|
|
16
|
+
* that requires `child-process` which the scanner blocks.
|
|
17
|
+
*
|
|
18
|
+
* 2. LAN — first non-loopback, non-virtual IPv4 interface. Emit with a
|
|
19
|
+
* caveat that the URL only works on the same network.
|
|
20
|
+
*
|
|
21
|
+
* 3. Null — no signal; caller falls through to localhost with a warning.
|
|
22
|
+
*
|
|
23
|
+
* The caller is expected to surface `detected.note` to the operator and
|
|
24
|
+
* tell them to set `publicUrl` when auto-detect isn't good enough
|
|
25
|
+
* (remote-accessible https, MagicDNS, etc.).
|
|
26
|
+
*
|
|
27
|
+
* Scope and scanner surface
|
|
28
|
+
* -------------------------
|
|
29
|
+
* - No `child-process` import — the original scanner-blocking flaw.
|
|
30
|
+
* - No `fetch` / `post` / `http.request` substrings — the potential-
|
|
31
|
+
* exfiltration rule is also clear.
|
|
32
|
+
* - Only `node:os` (synchronous, local) is used; no disk reads, no
|
|
33
|
+
* subprocess execution, no network calls.
|
|
34
|
+
*/
|
|
35
|
+
import os from 'node:os';
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Tailscale — passive detection (no subprocess, no network I/O)
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
/** CGNAT range 100.64.0.0/10 — Tailscale assigns IPs here by default. */
|
|
40
|
+
function isTailscaleCGNAT(addr) {
|
|
41
|
+
if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(addr))
|
|
42
|
+
return false;
|
|
43
|
+
const parts = addr.split('.').map((p) => Number.parseInt(p, 10));
|
|
44
|
+
if (parts[0] !== 100)
|
|
45
|
+
return false;
|
|
46
|
+
return parts[1] >= 64 && parts[1] <= 127;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Passive Tailscale detection — checks `os.networkInterfaces()` for a
|
|
50
|
+
* `tailscale*` NIC carrying a CGNAT IPv4. Returns null if not found.
|
|
51
|
+
*
|
|
52
|
+
* Unlike rc.1, this does NOT shell out to `tailscale status` — that
|
|
53
|
+
* tripped the OpenClaw scanner's dangerous-code detector and blocked
|
|
54
|
+
* install. The trade-off: we surface the raw CGNAT IP instead of the
|
|
55
|
+
* MagicDNS hostname. Operators who want a MagicDNS host must set
|
|
56
|
+
* `plugins.entries.totalreclaw.config.publicUrl` explicitly (documented
|
|
57
|
+
* in SKILL.md).
|
|
58
|
+
*/
|
|
59
|
+
export function detectTailscaleHost(options) {
|
|
60
|
+
const nif = (options?.networkInterfaces ?? os.networkInterfaces)();
|
|
61
|
+
for (const [name, addrs] of Object.entries(nif)) {
|
|
62
|
+
if (!name.toLowerCase().startsWith('tailscale'))
|
|
63
|
+
continue;
|
|
64
|
+
if (!addrs)
|
|
65
|
+
continue;
|
|
66
|
+
for (const a of addrs) {
|
|
67
|
+
if (a.family !== 'IPv4' || a.internal)
|
|
68
|
+
continue;
|
|
69
|
+
if (isTailscaleCGNAT(a.address)) {
|
|
70
|
+
return {
|
|
71
|
+
kind: 'tailscale',
|
|
72
|
+
host: a.address,
|
|
73
|
+
tls: false,
|
|
74
|
+
note: `Tailscale CGNAT IP detected on interface ${name}. For a proper ` +
|
|
75
|
+
`https://<magicdns>.ts.net URL, set plugins.entries.totalreclaw.config.publicUrl ` +
|
|
76
|
+
`(Tailscale CLI auto-resolution was removed in 3.3.1-rc.2 to pass the ` +
|
|
77
|
+
`OpenClaw security scanner).`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
// LAN autodetect
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
/** Interfaces we explicitly skip — these are virtual / tunneled. */
|
|
88
|
+
const SKIP_IFACE_PREFIXES = [
|
|
89
|
+
'lo',
|
|
90
|
+
'tailscale',
|
|
91
|
+
'docker',
|
|
92
|
+
'br-',
|
|
93
|
+
'bridge',
|
|
94
|
+
'veth',
|
|
95
|
+
'utun',
|
|
96
|
+
'vmnet',
|
|
97
|
+
'ovpn',
|
|
98
|
+
'wg',
|
|
99
|
+
'virbr',
|
|
100
|
+
'tun',
|
|
101
|
+
'ham',
|
|
102
|
+
];
|
|
103
|
+
function shouldSkipIface(name) {
|
|
104
|
+
const lower = name.toLowerCase();
|
|
105
|
+
return SKIP_IFACE_PREFIXES.some((p) => lower.startsWith(p));
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Docker container internal IP detection — issue #110 fix 4.
|
|
109
|
+
*
|
|
110
|
+
* From INSIDE a Docker container, `eth0` carries the container's bridge IP
|
|
111
|
+
* (e.g. `172.18.0.2`). That IP is reachable from other containers on the
|
|
112
|
+
* SAME Docker network but NOT from the host browser, the user's phone, or
|
|
113
|
+
* any external device. Surfacing it as the pairing URL produces a hard-
|
|
114
|
+
* dead user experience: "scan QR" yields connection-refused.
|
|
115
|
+
*
|
|
116
|
+
* Docker default-bridge ranges:
|
|
117
|
+
* - 172.17.0.0/16 — `bridge` (default)
|
|
118
|
+
* - 172.18.0.0/16 .. 172.31.0.0/16 — user-defined networks
|
|
119
|
+
*
|
|
120
|
+
* We use the conservative test: 172.16.0.0/12 (the full RFC-1918 172.x
|
|
121
|
+
* range, which is what Docker draws from). If the host is clearly Docker
|
|
122
|
+
* (`/.dockerenv`), we treat 172.16-31.x.x AS Docker-internal and skip it.
|
|
123
|
+
*
|
|
124
|
+
* Outside Docker, 172.16.x.x can be a legitimate corporate LAN, so we
|
|
125
|
+
* only apply the rule when we have positive Docker evidence.
|
|
126
|
+
*/
|
|
127
|
+
export function isDockerInternalIp(addr) {
|
|
128
|
+
if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(addr))
|
|
129
|
+
return false;
|
|
130
|
+
const parts = addr.split('.').map((p) => Number.parseInt(p, 10));
|
|
131
|
+
if (parts[0] !== 172)
|
|
132
|
+
return false;
|
|
133
|
+
return parts[1] >= 16 && parts[1] <= 31;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Pick the first non-loopback, non-virtual IPv4 address. Returns null if
|
|
137
|
+
* none found (headless VPS with only lo + tailscale, for example).
|
|
138
|
+
*
|
|
139
|
+
* issue #110 fix 4: when the host is detected as Docker (caller passes
|
|
140
|
+
* `isDocker: true`), skip Docker-bridge IPs in the 172.16/12 range — they
|
|
141
|
+
* are container-internal and useless for any external browser. Returning
|
|
142
|
+
* null from this function in that scenario lets `buildPairingUrl` fall
|
|
143
|
+
* through to the localhost-with-relay-fallback warning rather than handing
|
|
144
|
+
* the user a dead URL.
|
|
145
|
+
*/
|
|
146
|
+
export function detectLanHost(options) {
|
|
147
|
+
const nif = (options?.networkInterfaces ?? os.networkInterfaces)();
|
|
148
|
+
for (const [name, addrs] of Object.entries(nif)) {
|
|
149
|
+
if (shouldSkipIface(name))
|
|
150
|
+
continue;
|
|
151
|
+
if (!addrs)
|
|
152
|
+
continue;
|
|
153
|
+
for (const a of addrs) {
|
|
154
|
+
if (a.family !== 'IPv4' || a.internal)
|
|
155
|
+
continue;
|
|
156
|
+
// issue #110 fix 4 — Docker container internal IP is unreachable
|
|
157
|
+
// from any external browser. Skip it so the caller falls back to
|
|
158
|
+
// the relay-brokered URL.
|
|
159
|
+
if (options?.isDocker && isDockerInternalIp(a.address))
|
|
160
|
+
continue;
|
|
161
|
+
return {
|
|
162
|
+
kind: 'lan',
|
|
163
|
+
host: a.address,
|
|
164
|
+
tls: false,
|
|
165
|
+
note: `LAN IPv4 on interface ${name} — only reachable from the same network.`,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
// ---------------------------------------------------------------------------
|
|
172
|
+
// Composed resolver
|
|
173
|
+
// ---------------------------------------------------------------------------
|
|
174
|
+
/**
|
|
175
|
+
* Try Tailscale first (passive NIC probe), then LAN. Returns null when
|
|
176
|
+
* neither is available (caller falls through to localhost).
|
|
177
|
+
*
|
|
178
|
+
* Sync: no I/O, no subprocess, no network. Safe in sync callers like
|
|
179
|
+
* `buildPairingUrl` in index.ts.
|
|
180
|
+
*
|
|
181
|
+
* issue #110 fix 4: the `isDocker` option, when true, skips the 172.16/12
|
|
182
|
+
* Docker-bridge range during LAN detection. The caller (index.ts) passes
|
|
183
|
+
* `isRunningInDocker()` so we don't surface a container-internal IP that
|
|
184
|
+
* no external browser can reach.
|
|
185
|
+
*/
|
|
186
|
+
export function detectGatewayHost(options) {
|
|
187
|
+
const ts = detectTailscaleHost({ networkInterfaces: options?.networkInterfaces });
|
|
188
|
+
if (ts)
|
|
189
|
+
return ts;
|
|
190
|
+
const lan = detectLanHost({
|
|
191
|
+
networkInterfaces: options?.networkInterfaces,
|
|
192
|
+
isDocker: options?.isDocker,
|
|
193
|
+
});
|
|
194
|
+
if (lan)
|
|
195
|
+
return lan;
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Generate a BIP-39 12-word mnemonic for use as TOTALRECLAW_RECOVERY_PHRASE.
|
|
4
|
+
*
|
|
5
|
+
* Usage: npx tsx generate-mnemonic.ts
|
|
6
|
+
*/
|
|
7
|
+
import { generateMnemonic } from '@scure/bip39';
|
|
8
|
+
import { wordlist } from '@scure/bip39/wordlists/english.js';
|
|
9
|
+
const mnemonic = generateMnemonic(wordlist, 128);
|
|
10
|
+
console.log('\n Your TotalReclaw recovery phrase (12 words):\n');
|
|
11
|
+
console.log(` ${mnemonic}\n`);
|
|
12
|
+
console.log(' WRITE THIS DOWN. If you lose it, your memories are unrecoverable.');
|
|
13
|
+
console.log(' Set it as TOTALRECLAW_RECOVERY_PHRASE in your .env file.\n');
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hot cache wrapper for the plugin.
|
|
3
|
+
*
|
|
4
|
+
* Self-contained XChaCha20-Poly1305 encrypted cache (same implementation as
|
|
5
|
+
* client/src/cache/hot-cache.ts but without cross-package import).
|
|
6
|
+
*/
|
|
7
|
+
import crypto from 'node:crypto';
|
|
8
|
+
import fs from 'node:fs';
|
|
9
|
+
import path from 'node:path';
|
|
10
|
+
const MAX_HOT_FACTS = 30;
|
|
11
|
+
const IV_LENGTH = 12;
|
|
12
|
+
const TAG_LENGTH = 16;
|
|
13
|
+
export class PluginHotCache {
|
|
14
|
+
cachePath;
|
|
15
|
+
hotFacts = [];
|
|
16
|
+
factCount = 0;
|
|
17
|
+
lastSyncedBlock = 0;
|
|
18
|
+
smartAccountAddress = '';
|
|
19
|
+
lastUpdatedAt = 0;
|
|
20
|
+
lastQueryEmbedding = null;
|
|
21
|
+
key;
|
|
22
|
+
constructor(cachePath, hexKey) {
|
|
23
|
+
this.cachePath = cachePath;
|
|
24
|
+
this.key = Buffer.from(hexKey, 'hex');
|
|
25
|
+
}
|
|
26
|
+
getHotFacts() { return [...this.hotFacts]; }
|
|
27
|
+
getFactCount() { return this.factCount; }
|
|
28
|
+
getLastSyncedBlock() { return this.lastSyncedBlock; }
|
|
29
|
+
getSmartAccountAddress() { return this.smartAccountAddress; }
|
|
30
|
+
getLastUpdatedAt() { return this.lastUpdatedAt; }
|
|
31
|
+
getLastQueryEmbedding() { return this.lastQueryEmbedding ? [...this.lastQueryEmbedding] : null; }
|
|
32
|
+
setHotFacts(facts) {
|
|
33
|
+
const sorted = [...facts].sort((a, b) => b.importance - a.importance);
|
|
34
|
+
this.hotFacts = sorted.slice(0, MAX_HOT_FACTS);
|
|
35
|
+
this.lastUpdatedAt = Date.now();
|
|
36
|
+
}
|
|
37
|
+
setFactCount(count) { this.factCount = count; }
|
|
38
|
+
setLastSyncedBlock(block) { this.lastSyncedBlock = block; }
|
|
39
|
+
setSmartAccountAddress(addr) { this.smartAccountAddress = addr; }
|
|
40
|
+
setLastUpdatedAt(ts) { this.lastUpdatedAt = ts; }
|
|
41
|
+
setLastQueryEmbedding(embedding) { this.lastQueryEmbedding = embedding ? [...embedding] : null; }
|
|
42
|
+
/**
|
|
43
|
+
* Check if the cache is fresh (within TTL).
|
|
44
|
+
* @param ttlMs TTL in milliseconds (default: 5 minutes)
|
|
45
|
+
*/
|
|
46
|
+
isFresh(ttlMs = 300_000) {
|
|
47
|
+
if (this.lastUpdatedAt === 0)
|
|
48
|
+
return false;
|
|
49
|
+
return (Date.now() - this.lastUpdatedAt) < ttlMs;
|
|
50
|
+
}
|
|
51
|
+
flush() {
|
|
52
|
+
const payload = {
|
|
53
|
+
hotFacts: this.hotFacts,
|
|
54
|
+
factCount: this.factCount,
|
|
55
|
+
lastSyncedBlock: this.lastSyncedBlock,
|
|
56
|
+
smartAccountAddress: this.smartAccountAddress,
|
|
57
|
+
lastUpdatedAt: this.lastUpdatedAt,
|
|
58
|
+
lastQueryEmbedding: this.lastQueryEmbedding,
|
|
59
|
+
};
|
|
60
|
+
const plaintext = Buffer.from(JSON.stringify(payload), 'utf-8');
|
|
61
|
+
const iv = crypto.randomBytes(IV_LENGTH);
|
|
62
|
+
const cipher = crypto.createCipheriv('aes-256-gcm', this.key, iv);
|
|
63
|
+
const encrypted = Buffer.concat([cipher.update(plaintext), cipher.final()]);
|
|
64
|
+
const tag = cipher.getAuthTag();
|
|
65
|
+
const output = Buffer.concat([iv, tag, encrypted]);
|
|
66
|
+
const dir = path.dirname(this.cachePath);
|
|
67
|
+
if (!fs.existsSync(dir))
|
|
68
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
69
|
+
fs.writeFileSync(this.cachePath, output);
|
|
70
|
+
}
|
|
71
|
+
load() {
|
|
72
|
+
if (!fs.existsSync(this.cachePath))
|
|
73
|
+
return;
|
|
74
|
+
try {
|
|
75
|
+
const data = fs.readFileSync(this.cachePath);
|
|
76
|
+
if (data.length < IV_LENGTH + TAG_LENGTH)
|
|
77
|
+
return;
|
|
78
|
+
const iv = data.subarray(0, IV_LENGTH);
|
|
79
|
+
const tag = data.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
|
|
80
|
+
const ciphertext = data.subarray(IV_LENGTH + TAG_LENGTH);
|
|
81
|
+
const decipher = crypto.createDecipheriv('aes-256-gcm', this.key, iv);
|
|
82
|
+
decipher.setAuthTag(tag);
|
|
83
|
+
const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
84
|
+
const payload = JSON.parse(decrypted.toString('utf-8'));
|
|
85
|
+
this.hotFacts = payload.hotFacts || [];
|
|
86
|
+
this.factCount = payload.factCount || 0;
|
|
87
|
+
this.lastSyncedBlock = payload.lastSyncedBlock || 0;
|
|
88
|
+
this.smartAccountAddress = payload.smartAccountAddress || '';
|
|
89
|
+
this.lastUpdatedAt = payload.lastUpdatedAt || 0;
|
|
90
|
+
this.lastQueryEmbedding = payload.lastQueryEmbedding || null;
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
this.hotFacts = [];
|
|
94
|
+
this.factCount = 0;
|
|
95
|
+
this.lastSyncedBlock = 0;
|
|
96
|
+
this.smartAccountAddress = '';
|
|
97
|
+
this.lastUpdatedAt = 0;
|
|
98
|
+
this.lastQueryEmbedding = null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract base class for import adapters.
|
|
3
|
+
*
|
|
4
|
+
* Adapters are PARSERS only — they convert raw export data into either:
|
|
5
|
+
* - Pre-structured facts (Mem0, MCP Memory — facts are already atomic)
|
|
6
|
+
* - Conversation chunks (ChatGPT, Claude — need LLM extraction)
|
|
7
|
+
*
|
|
8
|
+
* The caller (import tool) handles LLM extraction, encryption, and storage.
|
|
9
|
+
*/
|
|
10
|
+
export class BaseImportAdapter {
|
|
11
|
+
/**
|
|
12
|
+
* Validate and clean a single fact.
|
|
13
|
+
* Returns null if the fact should be skipped.
|
|
14
|
+
*/
|
|
15
|
+
validateFact(fact) {
|
|
16
|
+
// Text is required and must be non-empty
|
|
17
|
+
if (!fact.text || typeof fact.text !== 'string' || fact.text.trim().length < 3) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
// Truncate to 512 chars
|
|
21
|
+
const text = fact.text.trim().slice(0, 512);
|
|
22
|
+
// Normalize type
|
|
23
|
+
const validTypes = ['fact', 'preference', 'decision', 'episodic', 'goal', 'context', 'summary'];
|
|
24
|
+
const type = validTypes.includes(fact.type)
|
|
25
|
+
? fact.type
|
|
26
|
+
: 'fact';
|
|
27
|
+
// Normalize importance to 1-10
|
|
28
|
+
let importance = fact.importance ?? 5;
|
|
29
|
+
if (importance < 0 || importance > 1) {
|
|
30
|
+
// Already on 1-10 scale
|
|
31
|
+
importance = Math.max(1, Math.min(10, Math.round(importance)));
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
// 0-1 scale — convert to 1-10
|
|
35
|
+
importance = Math.max(1, Math.round(importance * 10));
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
text,
|
|
39
|
+
type,
|
|
40
|
+
importance,
|
|
41
|
+
source: fact.source ?? this.source,
|
|
42
|
+
sourceId: fact.sourceId,
|
|
43
|
+
sourceTimestamp: fact.sourceTimestamp,
|
|
44
|
+
tags: fact.tags,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Batch-validate an array of partial facts.
|
|
49
|
+
*/
|
|
50
|
+
validateFacts(rawFacts) {
|
|
51
|
+
const facts = [];
|
|
52
|
+
let invalidCount = 0;
|
|
53
|
+
for (const raw of rawFacts) {
|
|
54
|
+
const validated = this.validateFact(raw);
|
|
55
|
+
if (validated) {
|
|
56
|
+
facts.push(validated);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
invalidCount++;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return { facts, invalidCount };
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { BaseImportAdapter } from './base-adapter.js';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
/** Maximum messages per conversation chunk for LLM extraction. */
|
|
5
|
+
const CHUNK_SIZE = 20;
|
|
6
|
+
// ── ChatGPT Adapter ─────────────────────────────────────────────────────────
|
|
7
|
+
export class ChatGPTAdapter extends BaseImportAdapter {
|
|
8
|
+
source = 'chatgpt';
|
|
9
|
+
displayName = 'ChatGPT';
|
|
10
|
+
async parse(input, onProgress) {
|
|
11
|
+
const warnings = [];
|
|
12
|
+
const errors = [];
|
|
13
|
+
let content;
|
|
14
|
+
if (input.content) {
|
|
15
|
+
content = input.content;
|
|
16
|
+
}
|
|
17
|
+
else if (input.file_path) {
|
|
18
|
+
try {
|
|
19
|
+
const resolvedPath = input.file_path.replace(/^~/, os.homedir());
|
|
20
|
+
content = fs.readFileSync(resolvedPath, 'utf-8');
|
|
21
|
+
}
|
|
22
|
+
catch (e) {
|
|
23
|
+
errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
24
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
errors.push('ChatGPT import requires either content (pasted text or JSON) or file_path. ' +
|
|
29
|
+
'Export from ChatGPT: Settings -> Data Controls -> Export Data (conversations.json), ' +
|
|
30
|
+
'or copy from Settings -> Personalization -> Memory -> Manage.');
|
|
31
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
32
|
+
}
|
|
33
|
+
// Detect format: JSON array = conversations.json, plain text = memories
|
|
34
|
+
const trimmed = content.trim();
|
|
35
|
+
if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
|
|
36
|
+
// Try to parse as JSON (conversations.json or memory list)
|
|
37
|
+
return this.parseConversationsJson(trimmed, warnings, errors, onProgress);
|
|
38
|
+
}
|
|
39
|
+
// Plain text: ChatGPT memories (one per line)
|
|
40
|
+
return this.parseMemoriesText(trimmed, warnings, errors, onProgress);
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Parse ChatGPT conversations.json — full export with mapping tree.
|
|
44
|
+
* Returns conversation chunks for LLM extraction (no pattern matching).
|
|
45
|
+
*/
|
|
46
|
+
parseConversationsJson(content, warnings, errors, onProgress) {
|
|
47
|
+
let conversations;
|
|
48
|
+
try {
|
|
49
|
+
const data = JSON.parse(content);
|
|
50
|
+
if (Array.isArray(data)) {
|
|
51
|
+
conversations = data;
|
|
52
|
+
}
|
|
53
|
+
else if (data.conversations && Array.isArray(data.conversations)) {
|
|
54
|
+
conversations = data.conversations;
|
|
55
|
+
}
|
|
56
|
+
else if (data.mapping) {
|
|
57
|
+
// Single conversation object
|
|
58
|
+
conversations = [data];
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
errors.push('Unrecognized ChatGPT format. Expected an array of conversation objects (conversations.json) ' +
|
|
62
|
+
'or plain text (ChatGPT memories).');
|
|
63
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
catch (e) {
|
|
67
|
+
errors.push(`Failed to parse ChatGPT JSON: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
68
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
69
|
+
}
|
|
70
|
+
if (onProgress) {
|
|
71
|
+
onProgress({
|
|
72
|
+
current: 0,
|
|
73
|
+
total: conversations.length,
|
|
74
|
+
phase: 'parsing',
|
|
75
|
+
message: `Parsing ${conversations.length} ChatGPT conversations...`,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
const chunks = [];
|
|
79
|
+
let totalMessages = 0;
|
|
80
|
+
let convIndex = 0;
|
|
81
|
+
for (const conv of conversations) {
|
|
82
|
+
if (!conv.mapping) {
|
|
83
|
+
warnings.push(`Conversation "${conv.title || 'untitled'}" has no mapping — skipped`);
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
// Extract user + assistant messages in chronological order
|
|
87
|
+
const messages = this.extractMessages(conv.mapping);
|
|
88
|
+
if (messages.length === 0)
|
|
89
|
+
continue;
|
|
90
|
+
totalMessages += messages.length;
|
|
91
|
+
// Determine timestamp from first message or conversation
|
|
92
|
+
const timestamp = conv.create_time
|
|
93
|
+
? new Date(conv.create_time * 1000).toISOString()
|
|
94
|
+
: undefined;
|
|
95
|
+
const title = conv.title || 'Untitled Conversation';
|
|
96
|
+
// Chunk into batches of CHUNK_SIZE messages
|
|
97
|
+
for (let i = 0; i < messages.length; i += CHUNK_SIZE) {
|
|
98
|
+
const batch = messages.slice(i, i + CHUNK_SIZE);
|
|
99
|
+
const chunkIndex = Math.floor(i / CHUNK_SIZE) + 1;
|
|
100
|
+
const totalChunks = Math.ceil(messages.length / CHUNK_SIZE);
|
|
101
|
+
chunks.push({
|
|
102
|
+
title: totalChunks > 1 ? `${title} (part ${chunkIndex}/${totalChunks})` : title,
|
|
103
|
+
messages: batch,
|
|
104
|
+
timestamp,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
convIndex++;
|
|
108
|
+
if (onProgress && convIndex % 50 === 0) {
|
|
109
|
+
onProgress({
|
|
110
|
+
current: convIndex,
|
|
111
|
+
total: conversations.length,
|
|
112
|
+
phase: 'parsing',
|
|
113
|
+
message: `Parsed ${convIndex}/${conversations.length} conversations (${chunks.length} chunks, ${totalMessages} messages)...`,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (chunks.length === 0 && conversations.length > 0) {
|
|
118
|
+
warnings.push(`Parsed ${conversations.length} conversations but found no messages with text content.`);
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
facts: [],
|
|
122
|
+
chunks,
|
|
123
|
+
totalMessages,
|
|
124
|
+
warnings,
|
|
125
|
+
errors,
|
|
126
|
+
source_metadata: {
|
|
127
|
+
format: 'conversations.json',
|
|
128
|
+
conversations_count: conversations.length,
|
|
129
|
+
chunks_count: chunks.length,
|
|
130
|
+
total_messages: totalMessages,
|
|
131
|
+
},
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Parse ChatGPT memories — plain text, one memory per line.
|
|
136
|
+
* Users copy this from Settings -> Personalization -> Memory -> Manage.
|
|
137
|
+
*
|
|
138
|
+
* Each line becomes a single-message conversation chunk for LLM extraction.
|
|
139
|
+
*/
|
|
140
|
+
parseMemoriesText(content, warnings, errors, onProgress) {
|
|
141
|
+
// Split by newlines and filter empty lines
|
|
142
|
+
const lines = content.split('\n')
|
|
143
|
+
.map((line) => line.trim())
|
|
144
|
+
.filter((line) => line.length > 0)
|
|
145
|
+
// Skip common header lines
|
|
146
|
+
.filter((line) => !/^(?:memories?|chatgpt memories?|my memories?|saved memories?):?\s*$/i.test(line));
|
|
147
|
+
if (onProgress) {
|
|
148
|
+
onProgress({
|
|
149
|
+
current: 0,
|
|
150
|
+
total: lines.length,
|
|
151
|
+
phase: 'parsing',
|
|
152
|
+
message: `Parsing ${lines.length} ChatGPT memories...`,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
// Clean lines: strip bullet/dash/number markers
|
|
156
|
+
const cleanedLines = lines.map((line) => line
|
|
157
|
+
.replace(/^[-*\u2022]\s+/, '') // bullet points
|
|
158
|
+
.replace(/^\d+[.)]\s+/, '') // numbered lists
|
|
159
|
+
.trim()).filter((line) => line.length >= 3);
|
|
160
|
+
// Group all memories into chunks of CHUNK_SIZE for efficient LLM extraction
|
|
161
|
+
const chunks = [];
|
|
162
|
+
for (let i = 0; i < cleanedLines.length; i += CHUNK_SIZE) {
|
|
163
|
+
const batch = cleanedLines.slice(i, i + CHUNK_SIZE);
|
|
164
|
+
chunks.push({
|
|
165
|
+
title: `ChatGPT memories (${i + 1}-${Math.min(i + CHUNK_SIZE, cleanedLines.length)})`,
|
|
166
|
+
messages: batch.map((text) => ({ role: 'user', text })),
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
facts: [],
|
|
171
|
+
chunks,
|
|
172
|
+
totalMessages: cleanedLines.length,
|
|
173
|
+
warnings,
|
|
174
|
+
errors,
|
|
175
|
+
source_metadata: {
|
|
176
|
+
format: 'memories-text',
|
|
177
|
+
total_lines: lines.length,
|
|
178
|
+
chunks_count: chunks.length,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Traverse the mapping tree and extract user + assistant messages in chronological order.
|
|
184
|
+
* Both roles are included because the assistant's response often provides context
|
|
185
|
+
* that helps the LLM understand what the user meant.
|
|
186
|
+
*/
|
|
187
|
+
extractMessages(mapping) {
|
|
188
|
+
// Find the root node (the one with no parent or parent not in mapping)
|
|
189
|
+
let rootId;
|
|
190
|
+
for (const [id, node] of Object.entries(mapping)) {
|
|
191
|
+
if (!node.parent || !mapping[node.parent]) {
|
|
192
|
+
rootId = id;
|
|
193
|
+
break;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
if (!rootId)
|
|
197
|
+
return [];
|
|
198
|
+
// Walk the tree breadth-first, following children in order (main thread)
|
|
199
|
+
const messages = [];
|
|
200
|
+
const visited = new Set();
|
|
201
|
+
const queue = [rootId];
|
|
202
|
+
while (queue.length > 0) {
|
|
203
|
+
const nodeId = queue.shift();
|
|
204
|
+
if (visited.has(nodeId))
|
|
205
|
+
continue;
|
|
206
|
+
visited.add(nodeId);
|
|
207
|
+
const node = mapping[nodeId];
|
|
208
|
+
if (!node)
|
|
209
|
+
continue;
|
|
210
|
+
const role = node.message?.author?.role;
|
|
211
|
+
// Only collect user and assistant messages (skip system, tool)
|
|
212
|
+
if (role === 'user' || role === 'assistant') {
|
|
213
|
+
const textParts = this.extractTextFromParts(node.message?.content?.parts);
|
|
214
|
+
if (textParts && textParts.length >= 3) {
|
|
215
|
+
messages.push({ role, text: textParts });
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// Follow children (add them to queue in order)
|
|
219
|
+
for (const childId of node.children || []) {
|
|
220
|
+
queue.push(childId);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return messages;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Extract plain text from message content parts.
|
|
227
|
+
* Parts can be strings, null, or complex objects (images, etc.) -- we only want strings.
|
|
228
|
+
*/
|
|
229
|
+
extractTextFromParts(parts) {
|
|
230
|
+
if (!parts || parts.length === 0)
|
|
231
|
+
return null;
|
|
232
|
+
const textParts = parts
|
|
233
|
+
.filter((p) => typeof p === 'string' && p.trim().length > 0);
|
|
234
|
+
if (textParts.length === 0)
|
|
235
|
+
return null;
|
|
236
|
+
return textParts.join(' ').trim();
|
|
237
|
+
}
|
|
238
|
+
}
|