@totalreclaw/totalreclaw 1.6.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAWHUB.md +134 -0
- package/README.md +407 -64
- package/SKILL.md +1032 -0
- package/api-client.ts +5 -5
- package/claims-helper.ts +686 -0
- package/config.ts +211 -0
- package/consolidation.ts +141 -33
- package/contradiction-sync.ts +1389 -0
- package/crypto.ts +63 -261
- package/digest-sync.ts +516 -0
- package/embedding.ts +69 -46
- package/extractor.ts +1307 -84
- package/hot-cache-wrapper.ts +1 -1
- package/import-adapters/gemini-adapter.ts +243 -0
- package/import-adapters/index.ts +3 -0
- package/import-adapters/types.ts +1 -1
- package/index.ts +1887 -323
- package/llm-client.ts +106 -53
- package/lsh.ts +21 -210
- package/package.json +20 -7
- package/pin.ts +502 -0
- package/reranker.ts +96 -124
- package/skill.json +213 -0
- package/subgraph-search.ts +112 -5
- package/subgraph-store.ts +559 -275
- package/consolidation.test.ts +0 -356
- package/extractor-dedup.test.ts +0 -168
- package/import-adapters/import-adapters.test.ts +0 -1123
- package/lsh.test.ts +0 -463
- package/pocv2-e2e-test.ts +0 -917
- package/porter-stemmer.d.ts +0 -4
- package/reranker.test.ts +0 -594
- package/semantic-dedup.test.ts +0 -392
- package/setup.sh +0 -19
- package/store-dedup-wiring.test.ts +0 -186
package/hot-cache-wrapper.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Hot cache wrapper for the plugin.
|
|
3
3
|
*
|
|
4
|
-
* Self-contained
|
|
4
|
+
* Self-contained XChaCha20-Poly1305 encrypted cache (same implementation as
|
|
5
5
|
* client/src/cache/hot-cache.ts but without cross-package import).
|
|
6
6
|
*/
|
|
7
7
|
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import { BaseImportAdapter } from './base-adapter.js';
|
|
2
|
+
import type {
|
|
3
|
+
ImportSource,
|
|
4
|
+
AdapterParseResult,
|
|
5
|
+
ConversationChunk,
|
|
6
|
+
ProgressCallback,
|
|
7
|
+
} from './types.js';
|
|
8
|
+
import fs from 'node:fs';
|
|
9
|
+
import os from 'node:os';
|
|
10
|
+
|
|
11
|
+
/** Maximum messages per conversation chunk for LLM extraction. */
|
|
12
|
+
const CHUNK_SIZE = 20;
|
|
13
|
+
|
|
14
|
+
/** Gap (in minutes) between entries that starts a new pseudo-session. */
|
|
15
|
+
const SESSION_GAP_MINUTES = 30;
|
|
16
|
+
|
|
17
|
+
// ── Timestamp Parsing ────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
const MONTHS: Record<string, number> = {
|
|
20
|
+
Jan: 0, Feb: 1, Mar: 2, Apr: 3, May: 4, Jun: 5,
|
|
21
|
+
Jul: 6, Aug: 7, Sep: 8, Oct: 9, Nov: 10, Dec: 11,
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Parse Gemini timestamp: "1 Apr 2026, 18:39:35 WEST" → ISO 8601.
|
|
26
|
+
* Timezone is treated as UTC (all entries use the same TZ, preserving order).
|
|
27
|
+
*/
|
|
28
|
+
function parseTimestamp(raw: string): string | undefined {
|
|
29
|
+
const m = raw.match(/^(\d{1,2})\s+(\w{3})\s+(\d{4}),\s+(\d{2}):(\d{2}):(\d{2})\s+/);
|
|
30
|
+
if (!m || MONTHS[m[2]] === undefined) return undefined;
|
|
31
|
+
const d = new Date(Date.UTC(+m[3], MONTHS[m[2]], +m[1], +m[4], +m[5], +m[6]));
|
|
32
|
+
return d.toISOString();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ── HTML Helpers ─────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
function decodeEntities(t: string): string {
|
|
38
|
+
return t.replace(/'/g, "'").replace(/"/g, '"').replace(/&/g, '&')
|
|
39
|
+
.replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function stripHTML(html: string): string {
|
|
43
|
+
return html.replace(/<br\s*\/?>/gi, '\n').replace(/<\/p>/gi, '\n')
|
|
44
|
+
.replace(/<\/li>/gi, '\n').replace(/<\/h[1-6]>/gi, '\n')
|
|
45
|
+
.replace(/<hr\s*\/?>/gi, '\n---\n').replace(/<[^>]+>/g, '')
|
|
46
|
+
.replace(/\n{3,}/g, '\n\n').trim();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ── Entry Types ──────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
interface GeminiEntry {
|
|
52
|
+
userPrompt: string;
|
|
53
|
+
aiResponse: string;
|
|
54
|
+
timestampISO: string;
|
|
55
|
+
timestampUnix: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── Gemini Adapter ───────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
export class GeminiAdapter extends BaseImportAdapter {
|
|
61
|
+
readonly source: ImportSource = 'gemini';
|
|
62
|
+
readonly displayName = 'Google Gemini';
|
|
63
|
+
|
|
64
|
+
async parse(
|
|
65
|
+
input: { content?: string; file_path?: string },
|
|
66
|
+
onProgress?: ProgressCallback,
|
|
67
|
+
): Promise<AdapterParseResult> {
|
|
68
|
+
const warnings: string[] = [];
|
|
69
|
+
const errors: string[] = [];
|
|
70
|
+
|
|
71
|
+
let content: string;
|
|
72
|
+
|
|
73
|
+
if (input.content) {
|
|
74
|
+
content = input.content;
|
|
75
|
+
} else if (input.file_path) {
|
|
76
|
+
try {
|
|
77
|
+
const resolved = input.file_path.replace(/^~/, os.homedir());
|
|
78
|
+
content = fs.readFileSync(resolved, 'utf-8');
|
|
79
|
+
} catch (e) {
|
|
80
|
+
errors.push(`Failed to read file: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
81
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
82
|
+
}
|
|
83
|
+
} else {
|
|
84
|
+
errors.push(
|
|
85
|
+
'Gemini import requires either content or file_path. ' +
|
|
86
|
+
'Export from Google Takeout: takeout.google.com → select Gemini Apps → export. ' +
|
|
87
|
+
'Provide the "My Activity.html" file path.',
|
|
88
|
+
);
|
|
89
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (onProgress) {
|
|
93
|
+
onProgress({ current: 0, total: 0, phase: 'parsing', message: 'Parsing Gemini HTML...' });
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Parse HTML into entries
|
|
97
|
+
const entries = this.parseHTML(content);
|
|
98
|
+
if (entries.length === 0) {
|
|
99
|
+
warnings.push('No conversation entries found in the HTML file.');
|
|
100
|
+
return { facts: [], chunks: [], totalMessages: 0, warnings, errors };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Group into pseudo-sessions by temporal proximity
|
|
104
|
+
const sessions = this.groupSessions(entries);
|
|
105
|
+
|
|
106
|
+
if (onProgress) {
|
|
107
|
+
onProgress({
|
|
108
|
+
current: 0,
|
|
109
|
+
total: sessions.length,
|
|
110
|
+
phase: 'parsing',
|
|
111
|
+
message: `Parsed ${entries.length} entries into ${sessions.length} sessions`,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Build conversation chunks from sessions
|
|
116
|
+
const chunks: ConversationChunk[] = [];
|
|
117
|
+
let totalMessages = 0;
|
|
118
|
+
|
|
119
|
+
for (const session of sessions) {
|
|
120
|
+
const messages: Array<{ role: 'user' | 'assistant'; text: string }> = [];
|
|
121
|
+
for (const entry of session) {
|
|
122
|
+
if (entry.userPrompt) messages.push({ role: 'user', text: entry.userPrompt });
|
|
123
|
+
if (entry.aiResponse) messages.push({ role: 'assistant', text: entry.aiResponse });
|
|
124
|
+
}
|
|
125
|
+
if (messages.length === 0) continue;
|
|
126
|
+
|
|
127
|
+
totalMessages += messages.length;
|
|
128
|
+
const timestamp = session[0].timestampISO;
|
|
129
|
+
|
|
130
|
+
// Sub-chunk large sessions
|
|
131
|
+
for (let i = 0; i < messages.length; i += CHUNK_SIZE) {
|
|
132
|
+
const batch = messages.slice(i, i + CHUNK_SIZE);
|
|
133
|
+
const chunkIdx = Math.floor(i / CHUNK_SIZE) + 1;
|
|
134
|
+
const totalChunks = Math.ceil(messages.length / CHUNK_SIZE);
|
|
135
|
+
const title = totalChunks > 1
|
|
136
|
+
? `Gemini session (part ${chunkIdx}/${totalChunks})`
|
|
137
|
+
: 'Gemini session';
|
|
138
|
+
|
|
139
|
+
chunks.push({ title, messages: batch, timestamp });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
facts: [],
|
|
145
|
+
chunks,
|
|
146
|
+
totalMessages,
|
|
147
|
+
warnings,
|
|
148
|
+
errors,
|
|
149
|
+
source_metadata: {
|
|
150
|
+
format: 'gemini-takeout-html',
|
|
151
|
+
total_entries: entries.length,
|
|
152
|
+
sessions_count: sessions.length,
|
|
153
|
+
chunks_count: chunks.length,
|
|
154
|
+
total_messages: totalMessages,
|
|
155
|
+
date_range: {
|
|
156
|
+
earliest: entries[0]?.timestampISO,
|
|
157
|
+
latest: entries[entries.length - 1]?.timestampISO,
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parse Gemini Takeout HTML into structured entries.
|
|
165
|
+
*
|
|
166
|
+
* Each outer-cell div contains: "Prompted USER_TEXT<br>TIMESTAMP<br>RESPONSE_HTML"
|
|
167
|
+
* all within one content-cell.
|
|
168
|
+
*/
|
|
169
|
+
private parseHTML(html: string): GeminiEntry[] {
|
|
170
|
+
const entries: GeminiEntry[] = [];
|
|
171
|
+
const cellPattern = /<div class="outer-cell[^"]*">([\s\S]*?)(?=<div class="outer-cell|$)/g;
|
|
172
|
+
let match: RegExpExecArray | null;
|
|
173
|
+
|
|
174
|
+
while ((match = cellPattern.exec(html)) !== null) {
|
|
175
|
+
const cell = match[1];
|
|
176
|
+
|
|
177
|
+
// Only process "Prompted" entries (skip canvas, feedback)
|
|
178
|
+
const promptedIdx = cell.indexOf('Prompted\u00a0');
|
|
179
|
+
if (promptedIdx === -1) continue;
|
|
180
|
+
|
|
181
|
+
// Extract timestamp
|
|
182
|
+
const tsMatch = cell.match(/(\d{1,2}\s+\w{3}\s+\d{4},\s+\d{2}:\d{2}:\d{2}\s+\w+)/);
|
|
183
|
+
if (!tsMatch) continue;
|
|
184
|
+
const timestampISO = parseTimestamp(tsMatch[1]);
|
|
185
|
+
if (!timestampISO) continue;
|
|
186
|
+
|
|
187
|
+
// Split on timestamp to separate user prompt from AI response
|
|
188
|
+
const afterPrompted = cell.substring(promptedIdx + 'Prompted\u00a0'.length);
|
|
189
|
+
const tsPattern = /(\d{1,2}\s+\w{3}\s+\d{4},\s+\d{2}:\d{2}:\d{2}\s+\w+)/;
|
|
190
|
+
const tsIdx = afterPrompted.search(tsPattern);
|
|
191
|
+
|
|
192
|
+
let userPrompt = '';
|
|
193
|
+
let aiResponse = '';
|
|
194
|
+
|
|
195
|
+
if (tsIdx > 0) {
|
|
196
|
+
userPrompt = stripHTML(decodeEntities(afterPrompted.substring(0, tsIdx))).trim();
|
|
197
|
+
|
|
198
|
+
const tsInner = afterPrompted.match(tsPattern);
|
|
199
|
+
if (tsInner) {
|
|
200
|
+
const afterTs = afterPrompted.substring(tsIdx + tsInner[0].length)
|
|
201
|
+
.replace(/^\s*<br\s*\/?>\s*/i, '');
|
|
202
|
+
const endDiv = afterTs.search(/<\/div>\s*<div class="content-cell/);
|
|
203
|
+
const rawResp = endDiv !== -1 ? afterTs.substring(0, endDiv) : afterTs;
|
|
204
|
+
aiResponse = stripHTML(decodeEntities(rawResp)).trim();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (userPrompt.length < 3 && aiResponse.length < 3) continue;
|
|
209
|
+
|
|
210
|
+
entries.push({
|
|
211
|
+
userPrompt,
|
|
212
|
+
aiResponse,
|
|
213
|
+
timestampISO,
|
|
214
|
+
timestampUnix: Math.floor(new Date(timestampISO).getTime() / 1000),
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Sort chronologically (HTML is newest-first)
|
|
219
|
+
entries.sort((a, b) => a.timestampUnix - b.timestampUnix);
|
|
220
|
+
return entries;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Group entries into pseudo-sessions by temporal proximity.
|
|
225
|
+
*/
|
|
226
|
+
private groupSessions(entries: GeminiEntry[]): GeminiEntry[][] {
|
|
227
|
+
if (entries.length === 0) return [];
|
|
228
|
+
const sessions: GeminiEntry[][] = [];
|
|
229
|
+
let current: GeminiEntry[] = [entries[0]];
|
|
230
|
+
|
|
231
|
+
for (let i = 1; i < entries.length; i++) {
|
|
232
|
+
const gap = entries[i].timestampUnix - entries[i - 1].timestampUnix;
|
|
233
|
+
if (gap > SESSION_GAP_MINUTES * 60) {
|
|
234
|
+
sessions.push(current);
|
|
235
|
+
current = [entries[i]];
|
|
236
|
+
} else {
|
|
237
|
+
current.push(entries[i]);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (current.length > 0) sessions.push(current);
|
|
241
|
+
return sessions;
|
|
242
|
+
}
|
|
243
|
+
}
|
package/import-adapters/index.ts
CHANGED
|
@@ -4,12 +4,14 @@ export { Mem0Adapter } from './mem0-adapter.js';
|
|
|
4
4
|
export { MCPMemoryAdapter } from './mcp-memory-adapter.js';
|
|
5
5
|
export { ChatGPTAdapter } from './chatgpt-adapter.js';
|
|
6
6
|
export { ClaudeAdapter } from './claude-adapter.js';
|
|
7
|
+
export { GeminiAdapter } from './gemini-adapter.js';
|
|
7
8
|
|
|
8
9
|
import type { ImportSource } from './types.js';
|
|
9
10
|
import { Mem0Adapter } from './mem0-adapter.js';
|
|
10
11
|
import { MCPMemoryAdapter } from './mcp-memory-adapter.js';
|
|
11
12
|
import { ChatGPTAdapter } from './chatgpt-adapter.js';
|
|
12
13
|
import { ClaudeAdapter } from './claude-adapter.js';
|
|
14
|
+
import { GeminiAdapter } from './gemini-adapter.js';
|
|
13
15
|
import type { BaseImportAdapter } from './base-adapter.js';
|
|
14
16
|
|
|
15
17
|
const ADAPTERS: Partial<Record<ImportSource, () => BaseImportAdapter>> = {
|
|
@@ -17,6 +19,7 @@ const ADAPTERS: Partial<Record<ImportSource, () => BaseImportAdapter>> = {
|
|
|
17
19
|
'mcp-memory': () => new MCPMemoryAdapter(),
|
|
18
20
|
'chatgpt': () => new ChatGPTAdapter(),
|
|
19
21
|
'claude': () => new ClaudeAdapter(),
|
|
22
|
+
'gemini': () => new GeminiAdapter(),
|
|
20
23
|
};
|
|
21
24
|
|
|
22
25
|
export function getAdapter(source: ImportSource): BaseImportAdapter {
|
package/import-adapters/types.ts
CHANGED
|
@@ -19,7 +19,7 @@ export interface NormalizedFact {
|
|
|
19
19
|
tags?: string[];
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
export type ImportSource = 'mem0' | 'mcp-memory' | 'chatgpt' | 'claude' | 'memoclaw' | 'generic-json' | 'generic-csv';
|
|
22
|
+
export type ImportSource = 'mem0' | 'mcp-memory' | 'chatgpt' | 'claude' | 'gemini' | 'memoclaw' | 'generic-json' | 'generic-csv';
|
|
23
23
|
|
|
24
24
|
/**
|
|
25
25
|
* What the user passes to the import tool.
|