parallelclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +204 -0
  2. package/HELP.md +600 -0
  3. package/LICENSE +21 -0
  4. package/MULTI_MACHINE.md +152 -0
  5. package/README.md +417 -0
  6. package/README.ru.md +740 -0
  7. package/SYNC.md +844 -0
  8. package/bot/README.md +173 -0
  9. package/bot/config.js +66 -0
  10. package/bot/inbox.js +153 -0
  11. package/bot/index.js +294 -0
  12. package/bot/nexara.js +61 -0
  13. package/bot/poll.js +304 -0
  14. package/bot/search.js +155 -0
  15. package/bot/telegram.js +96 -0
  16. package/ingest.js +2712 -0
  17. package/lib/cli/index.js +1987 -0
  18. package/lib/config.js +220 -0
  19. package/lib/db-init.js +158 -0
  20. package/lib/hook/install.js +268 -0
  21. package/lib/import-telegram.js +158 -0
  22. package/lib/ingest-file.js +779 -0
  23. package/lib/notify-click-action.js +281 -0
  24. package/lib/openclaw-channel.js +643 -0
  25. package/lib/parse-cursor.js +172 -0
  26. package/lib/parse-obsidian.js +256 -0
  27. package/lib/parse-telegram-html.js +384 -0
  28. package/lib/parse.js +175 -0
  29. package/lib/render-markdown.js +0 -0
  30. package/lib/store-doc/canonicalize.js +116 -0
  31. package/lib/store-doc/detect.js +209 -0
  32. package/lib/store-doc/extract-title.js +162 -0
  33. package/lib/sync/auth.js +80 -0
  34. package/lib/sync/cert.js +144 -0
  35. package/lib/sync/cli.js +906 -0
  36. package/lib/sync/client.js +138 -0
  37. package/lib/sync/config.js +130 -0
  38. package/lib/sync/pair.js +145 -0
  39. package/lib/sync/pull.js +158 -0
  40. package/lib/sync/push.js +305 -0
  41. package/lib/sync/replicate.js +335 -0
  42. package/lib/sync/server.js +224 -0
  43. package/lib/sync/service.js +726 -0
  44. package/lib/tasks.js +215 -0
  45. package/lib/telegram-decisions.js +165 -0
  46. package/lib/telegram-discovery.js +373 -0
  47. package/lib/telegram-notify.js +272 -0
  48. package/lib/telegram-pending.js +200 -0
  49. package/lib/web/index.js +265 -0
  50. package/lib/web/routes/conversation.js +193 -0
  51. package/lib/web/routes/conversations.js +180 -0
  52. package/lib/web/routes/dashboard.js +175 -0
  53. package/lib/web/routes/pending.js +277 -0
  54. package/lib/web/routes/settings.js +226 -0
  55. package/lib/web/static/style.css +393 -0
  56. package/lib/web/templates.js +234 -0
  57. package/package.json +84 -0
  58. package/server.js +3816 -0
  59. package/skills/install-memex/README.md +109 -0
  60. package/skills/install-memex/SKILL.md +342 -0
  61. package/skills/install-memex/examples.md +294 -0
  62. package/skills/install-memex-claw/SKILL.md +423 -0
@@ -0,0 +1,779 @@
1
+ /**
2
+ * Single-call ingest of an arbitrary file path.
3
+ *
4
+ * Solves the v0.10.x onboarding gap: users put their Telegram/Claude exports
5
+ * in *natural* places like ~/projects/memex/ or ~/Desktop/ rather than the
6
+ * memex-magic paths (~/.memex/inbox/, ~/Downloads/Telegram Desktop/). The
7
+ * old answer was "drop it where memex watches" — but users don't know those
8
+ * paths and AI agents had to fall back to ~10k tokens of bash file-ops
9
+ * (mv to inbox, poll ingest.log, check DB count, …) just to get one file in.
10
+ *
11
+ * This helper lets the agent (or CLI) pass any path and get a single
12
+ * structured response. Auto-detects format, respects Telegram privacy
13
+ * decisions, returns precise insert/duplicate counts.
14
+ *
15
+ * Returns one of:
16
+ * { status: 'imported', ... } — records landed in DB
17
+ * { status: 'needs_consent',... } — new Telegram chat, ask user before forcing
18
+ * { status: 'skipped', ... } — user previously skipped/blocked this chat
19
+ * { status: 'error', ... } — file unreadable / format unknown / parse failed
20
+ */
21
+
22
+ import { existsSync, readFileSync, statSync } from 'node:fs';
23
+ import { homedir } from 'node:os';
24
+ import { basename, isAbsolute, join } from 'node:path';
25
+ import { getOrigin } from './config.js';
26
+
27
+ const SUPPORTED_FORMATS = ['telegram-json', 'telegram-html', 'claude-jsonl', 'cowork-jsonl', 'openclaw-jsonl'];
28
+
29
+ /**
30
+ * Expand ~ and resolve relative paths against cwd. We DON'T realpath here —
31
+ * symlinks in the user's home are valid (e.g. iCloud-synced ~/Downloads).
32
+ */
33
+ export function resolvePath(p) {
34
+ if (!p || typeof p !== 'string') return null;
35
+ let s = p.trim();
36
+ if (s.startsWith('~/')) s = join(homedir(), s.slice(2));
37
+ else if (s === '~') s = homedir();
38
+ if (!isAbsolute(s)) s = join(process.cwd(), s);
39
+ return s;
40
+ }
41
+
42
+ /**
43
+ * Sniff format from path + content. Returns null when we can't tell.
44
+ *
45
+ * Heuristics:
46
+ * - directory containing messages.html → telegram-html (Desktop export dir)
47
+ * - .html / .htm → telegram-html
48
+ * - .json with Telegram markers in first 8KB → telegram-json
49
+ * - .jsonl starting with cowork- → cowork-jsonl
50
+ * - other .jsonl → claude-jsonl
51
+ */
52
+ export function detectFormat(absPath) {
53
+ if (!existsSync(absPath)) return null;
54
+ const stat = statSync(absPath);
55
+
56
+ if (stat.isDirectory()) {
57
+ // Telegram Desktop HTML export = directory with messages.html at root
58
+ if (existsSync(join(absPath, 'messages.html'))) return 'telegram-html';
59
+ // Some exports nest result.json inside
60
+ if (existsSync(join(absPath, 'result.json'))) return 'telegram-json-in-dir';
61
+ return null;
62
+ }
63
+
64
+ const lower = absPath.toLowerCase();
65
+ if (lower.endsWith('.html') || lower.endsWith('.htm')) return 'telegram-html';
66
+
67
+ if (lower.endsWith('.json')) {
68
+ let head = '';
69
+ try { head = readFileSync(absPath, 'utf-8').slice(0, 8192); }
70
+ catch (_) { return null; }
71
+ if (
72
+ head.includes('"messages"') ||
73
+ head.includes('"chats"') ||
74
+ head.includes('"personal_information"')
75
+ ) return 'telegram-json';
76
+ return null;
77
+ }
78
+
79
+ if (lower.endsWith('.jsonl')) {
80
+ const name = basename(lower);
81
+ if (name.startsWith('cowork-')) return 'cowork-jsonl';
82
+ if (name.startsWith('openclaw-')) return 'openclaw-jsonl';
83
+ // On-disk OpenClaw sessions live at ~/.openclaw/agents/main/sessions/<uuid>.jsonl
84
+ // with no filename prefix — detect via path instead.
85
+ if (lower.includes('/.openclaw/')) return 'openclaw-jsonl';
86
+ return 'claude-jsonl';
87
+ }
88
+
89
+ return null;
90
+ }
91
+
92
+ /**
93
+ * Main entry. Caller is responsible for opening a writable better-sqlite3 Database
94
+ * handle and passing it in — we don't open/close the DB here.
95
+ *
96
+ * opts:
97
+ * format: one of SUPPORTED_FORMATS, or 'auto' (default).
98
+ * force: bypass Telegram privacy gate (skip/block decisions). Default false.
99
+ */
100
+ export async function ingestFile(db, rawPath, opts = {}) {
101
+ const path = resolvePath(rawPath);
102
+ if (!path) return { status: 'error', error: 'path required' };
103
+ if (!existsSync(path)) return { status: 'error', error: `file not found: ${path}` };
104
+
105
+ const format = opts.format && opts.format !== 'auto' ? opts.format : detectFormat(path);
106
+ if (!format) {
107
+ return {
108
+ status: 'error',
109
+ error: `unable to detect format; supported: ${SUPPORTED_FORMATS.join(', ')}. ` +
110
+ `Pass an explicit format= if the file has no extension hint.`,
111
+ };
112
+ }
113
+
114
+ if (format === 'telegram-json' || format === 'telegram-json-in-dir' || format === 'telegram-html') {
115
+ return ingestTelegram(db, path, format, opts);
116
+ }
117
+ if (format === 'claude-jsonl' || format === 'cowork-jsonl' || format === 'openclaw-jsonl') {
118
+ return ingestClaudeJsonl(db, path, format, opts);
119
+ }
120
+ return { status: 'error', error: `unsupported format: ${format}` };
121
+ }
122
+
123
+ // ----- Telegram path -----
124
+
125
+ async function ingestTelegram(db, path, format, opts = {}) {
126
+ // Parse raw → { chats: { list: [...] }, ... }
127
+ let raw;
128
+ try {
129
+ if (format === 'telegram-html') {
130
+ const { parseTelegramHtmlExport } = await import('./parse-telegram-html.js');
131
+ raw = parseTelegramHtmlExport(path);
132
+ } else if (format === 'telegram-json-in-dir') {
133
+ raw = JSON.parse(readFileSync(join(path, 'result.json'), 'utf-8'));
134
+ } else {
135
+ raw = JSON.parse(readFileSync(path, 'utf-8'));
136
+ }
137
+ } catch (e) {
138
+ return { status: 'error', error: `parse failed: ${e.message}` };
139
+ }
140
+ if (!raw) return { status: 'error', error: 'parse returned empty' };
141
+
142
+ // Find the chat title — for single-chat exports, it's the only chat in the list
143
+ const chats = Array.isArray(raw.chats?.list)
144
+ ? raw.chats.list
145
+ : Array.isArray(raw.list)
146
+ ? raw.list
147
+ : raw.messages
148
+ ? [raw]
149
+ : [];
150
+ if (chats.length === 0) {
151
+ return { status: 'error', error: 'no chats found in file' };
152
+ }
153
+
154
+ // Privacy gate — single chat exports route through user's decision state;
155
+ // multi-chat exports (whole-Telegram-archive case, rare) skip the gate and
156
+ // import everything by default (the agent already had to coordinate that).
157
+ const decisionsMod = await import('./telegram-decisions.js');
158
+ const state = decisionsMod.loadDecisions();
159
+
160
+ if (chats.length === 1 && !opts.force) {
161
+ const chat = chats[0];
162
+ const title =
163
+ chat.name ||
164
+ (chat.type === 'saved_messages' ? 'Saved Messages' : `Telegram chat ${chat.id}`);
165
+
166
+ if (decisionsMod.isBlocked(state, title)) {
167
+ return {
168
+ status: 'skipped',
169
+ format,
170
+ chat_title: title,
171
+ reason: 'matches a block pattern in your decision state',
172
+ message: `Chat "${title}" matches a block pattern. To override, call again with force=true (and consider unblocking via memex telegram unblock <pattern>).`,
173
+ };
174
+ }
175
+ if (decisionsMod.isSkipped(state, title)) {
176
+ return {
177
+ status: 'skipped',
178
+ format,
179
+ chat_title: title,
180
+ reason: 'previously skipped by user',
181
+ message: `Chat "${title}" was previously skipped. To override, call again with force=true.`,
182
+ };
183
+ }
184
+ if (!decisionsMod.isAllowed(state, title)) {
185
+ // New chat — needs consent. Surface preview so the agent can ask the user.
186
+ const msgs = Array.isArray(chat.messages) ? chat.messages : [];
187
+ const realMsgs = msgs.filter((m) => m.type === 'message');
188
+ const senders = new Set();
189
+ let firstTs = Infinity, lastTs = 0;
190
+ for (const m of realMsgs.slice(0, 200)) {
191
+ if (m.from) senders.add(m.from);
192
+ const ts = parseInt(m.date_unixtime || '0', 10);
193
+ if (ts) { firstTs = Math.min(firstTs, ts); lastTs = Math.max(lastTs, ts); }
194
+ }
195
+ return {
196
+ status: 'needs_consent',
197
+ format,
198
+ chat_title: title,
199
+ chat_type: chat.type || 'unknown',
200
+ message_count: realMsgs.length,
201
+ senders_sample: Array.from(senders).slice(0, 5),
202
+ date_first: isFinite(firstTs) ? new Date(firstTs * 1000).toISOString().slice(0, 10) : null,
203
+ date_last: lastTs ? new Date(lastTs * 1000).toISOString().slice(0, 10) : null,
204
+ message:
205
+ `New Telegram chat "${title}" (${realMsgs.length} msgs). ` +
206
+ `Ask the user before importing — privacy gate. ` +
207
+ `To proceed: call memex_import_file again with force=true. ` +
208
+ `To never auto-import this chat: memex telegram skip "${title}".`,
209
+ };
210
+ }
211
+ }
212
+
213
+ // Allowed (or forced, or multi-chat archive) → run the upsert.
214
+ const { importTelegramRaw } = await import('./import-telegram.js');
215
+ let result;
216
+ try {
217
+ result = importTelegramRaw(db, raw);
218
+ } catch (e) {
219
+ return { status: 'error', error: `import failed: ${e.message}` };
220
+ }
221
+
222
+ // Mark chats as allowed for future re-exports
223
+ for (const c of result.chats) {
224
+ decisionsMod.allowChat(state, c.title);
225
+ }
226
+ decisionsMod.saveDecisions(state);
227
+
228
+ return {
229
+ status: 'imported',
230
+ format,
231
+ chats: result.chats.map((c) => ({
232
+ conversation_id: c.conversation_id,
233
+ title: c.title,
234
+ msg_count: c.msg_count,
235
+ date_first: c.first_ts ? new Date(c.first_ts * 1000).toISOString().slice(0, 10) : null,
236
+ date_last: c.last_ts ? new Date(c.last_ts * 1000).toISOString().slice(0, 10) : null,
237
+ })),
238
+ total_imported: result.totalImported,
239
+ };
240
+ }
241
+
242
+ // ----- Claude Code / Cowork JSONL path -----
243
+
244
+ async function ingestClaudeJsonl(db, path, format, _opts) {
245
+ // v0.11+: OpenClaw gets its own ingest path with channel-aware routing —
246
+ // Telegram messages go to per-sender conversations, Kimi-web messages to
247
+ // per-session conversations, system messages segregated. See
248
+ // ingestOpenclawJsonl below for the full logic.
249
+ if (format === 'openclaw-jsonl') {
250
+ return ingestOpenclawJsonl(db, path, _opts);
251
+ }
252
+
253
+ const source = format === 'cowork-jsonl' ? 'claude-cowork' : 'claude-code';
254
+ const fileName = basename(path, '.jsonl');
255
+ const conversationId = `${source}-${fileName}`;
256
+
257
+ // We replicate the inner logic of server.js's importClaudeCodeJsonl so this
258
+ // module stays self-contained. (Extracting that function from server.js is
259
+ // a bigger refactor we don't need right now.)
260
+ const { extractMessageFromRecord, extractCompactBoundary, isContinuationBoilerplate } =
261
+ await import('./parse.js');
262
+
263
+ const insertMessage = db.prepare(`
264
+ INSERT INTO messages (source, conversation_id, msg_id, role, sender, text, ts, metadata, edited_at, uuid, origin)
265
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, '${getOrigin()}')
266
+ ON CONFLICT(source, conversation_id, msg_id) DO UPDATE SET
267
+ text = excluded.text,
268
+ uuid = COALESCE(messages.uuid, excluded.uuid),
269
+ origin = COALESCE(messages.origin, excluded.origin)
270
+ `);
271
+ const upsertConversation = db.prepare(`
272
+ INSERT INTO conversations (conversation_id, source, title, first_ts, last_ts, message_count, parent_conversation_id, project_path)
273
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
274
+ ON CONFLICT(conversation_id) DO UPDATE SET
275
+ title = COALESCE(excluded.title, title),
276
+ first_ts = MIN(first_ts, excluded.first_ts),
277
+ last_ts = MAX(last_ts, excluded.last_ts),
278
+ project_path = COALESCE(excluded.project_path, project_path),
279
+ message_count = (
280
+ SELECT COUNT(*) FROM messages WHERE messages.conversation_id = conversations.conversation_id
281
+ )
282
+ `);
283
+
284
+ let lines;
285
+ try {
286
+ lines = readFileSync(path, 'utf-8').split('\n').filter(Boolean);
287
+ } catch (e) {
288
+ return { status: 'error', error: `read failed: ${e.message}` };
289
+ }
290
+
291
+ let aiTitle = null;
292
+ let firstUserText = null;
293
+ let projectPath = null;
294
+ let first_ts = Infinity, last_ts = 0;
295
+ let imported = 0;
296
+
297
+ const tx = db.transaction(() => {
298
+ for (const line of lines) {
299
+ let obj;
300
+ try { obj = JSON.parse(line); } catch (_) { continue; }
301
+
302
+ if (obj.type === 'ai-title' && typeof obj.aiTitle === 'string' && obj.aiTitle.trim()) {
303
+ aiTitle = obj.aiTitle.trim();
304
+ continue;
305
+ }
306
+ if (obj.cwd && !projectPath) projectPath = obj.cwd;
307
+
308
+ const boundary = extractCompactBoundary(obj);
309
+ if (boundary) {
310
+ const ts = boundary.timestamp ? Math.floor(new Date(boundary.timestamp).getTime() / 1000) : 0;
311
+ if (ts) { first_ts = Math.min(first_ts, ts); last_ts = Math.max(last_ts, ts); }
312
+ const msgId =
313
+ boundary.id ||
314
+ (boundary.uuid ? `boundary-${boundary.uuid}` : null) ||
315
+ (boundary.timestamp ? `boundary-${boundary.timestamp}` : 'boundary-unknown');
316
+ try {
317
+ insertMessage.run(
318
+ source, conversationId, msgId, 'boundary', 'compact',
319
+ JSON.stringify(boundary.metadata || {}), ts,
320
+ JSON.stringify({ raw_type: 'compact_boundary', parentUuid: boundary.parentUuid || null }),
321
+ null, boundary.uuid || null,
322
+ );
323
+ imported++;
324
+ } catch (_) { /* dupe */ }
325
+ continue;
326
+ }
327
+
328
+ const m = extractMessageFromRecord(obj);
329
+ if (!m) continue;
330
+ if (!['user', 'assistant', 'summary'].includes(m.role)) continue;
331
+
332
+ const ts = m.timestamp ? Math.floor(new Date(m.timestamp).getTime() / 1000) : 0;
333
+ if (ts) { first_ts = Math.min(first_ts, ts); last_ts = Math.max(last_ts, ts); }
334
+
335
+ if (m.role === 'user' && !firstUserText) {
336
+ const text = m.text.trim().replace(/\s+/g, ' ');
337
+ if (text && !isContinuationBoilerplate(text)) firstUserText = text.slice(0, 80);
338
+ }
339
+
340
+ const sender =
341
+ m.role === 'user' ? 'me'
342
+ : m.role === 'summary' ? 'compact-summary'
343
+ : source;
344
+
345
+ try {
346
+ insertMessage.run(
347
+ source, conversationId,
348
+ obj.id || `${source}-${fileName}-${(m.uuid || ts).toString().slice(0, 16)}`,
349
+ m.role, sender, m.text, ts,
350
+ JSON.stringify({ raw_type: obj.type || null, parentUuid: m.parentUuid || null }),
351
+ null, m.uuid || null,
352
+ );
353
+ imported++;
354
+ } catch (_) { /* dupe */ }
355
+ }
356
+
357
+ const baseTitle = aiTitle || firstUserText || fileName;
358
+ upsertConversation.run(
359
+ conversationId, source, baseTitle,
360
+ isFinite(first_ts) ? first_ts : null,
361
+ last_ts || null,
362
+ 0, // message_count gets recomputed by the trigger expression above
363
+ null, projectPath,
364
+ );
365
+ });
366
+
367
+ try {
368
+ tx();
369
+ } catch (e) {
370
+ return { status: 'error', error: `transaction failed: ${e.message}` };
371
+ }
372
+
373
+ return {
374
+ status: 'imported',
375
+ format,
376
+ conversation_id: conversationId,
377
+ total_imported: imported,
378
+ title: aiTitle || firstUserText || fileName,
379
+ };
380
+ }
381
+
382
+ // ----- OpenClaw JSONL — channel-aware per-record routing (v0.11+) -----
383
+
384
+ /**
385
+ * Ingest an OpenClaw session file with proper channel splitting.
386
+ *
387
+ * Key differences from the claude/cowork path above:
388
+ * 1. Per-record channel detection (kimi-web / telegram / system / null)
389
+ * 2. Telegram "[Queued messages while agent was busy]" records are
390
+ * UNPACKED into N individual memex messages (each Queued #i block).
391
+ * 3. conversation_id is derived per-channel, not per-file:
392
+ * telegram → openclaw-tg-<sender_id> (per-user thread)
393
+ * kimi-web → openclaw-kimi-<file8> (per-session thread)
394
+ * system → openclaw-sys-<file8>
395
+ * null → openclaw-<file8> (fallback)
396
+ * 4. Per-conversation title/first_ts/last_ts/message_count tracked in
397
+ * a Map and upserted at end. One source file can produce 2-5+ convs.
398
+ * 5. `channel` column populated.
399
+ * 6. Sender metadata (sender_id, username, reply_to_id) for Telegram
400
+ * stored in `metadata` JSON for future queries.
401
+ */
402
+ async function ingestOpenclawJsonl(db, path, _opts) {
403
+ const source = 'openclaw';
404
+ const fileName = basename(path, '.jsonl');
405
+
406
+ const { extractMessageFromRecord } = await import('./parse.js');
407
+ const channelMod = await import('./openclaw-channel.js');
408
+ const {
409
+ findChannelDef,
410
+ getOrAutoRegister,
411
+ detectChannel,
412
+ findSessionsJson,
413
+ loadSessionsJsonChannelMap,
414
+ lookupChannel,
415
+ detectSessionType,
416
+ isCheckpointFile,
417
+ deriveOpenclawConvId,
418
+ titlePrefixFor,
419
+ baseUuid8,
420
+ } = channelMod;
421
+
422
+ // Sessions.json discovery — sibling walk-up + standard OpenClaw path
423
+ // fallback (so backfill from archive can still find it). v0.11.2.
424
+ const sessionsJsonPath = findSessionsJson(path);
425
+ const channelMap = sessionsJsonPath
426
+ ? loadSessionsJsonChannelMap(sessionsJsonPath)
427
+ : new Map();
428
+ // Multi-key lookup: full path → basename → uuid8. The v0.11.2 fix —
429
+ // archive-staged files (openclaw-3824f87a.jsonl) now match against
430
+ // sessions.json entries whose sessionFile is the original path
431
+ // (/root/.openclaw/.../3824f87a-…-c596288bcfe3.jsonl).
432
+ const fileFallbackChannel = lookupChannel(channelMap, path);
433
+
434
+ const fileUuid8 = baseUuid8(fileName);
435
+
436
+ // === v0.11.2: two-mode routing ===
437
+ // Kimi-Claw and self-hosted OpenClaw have different file layouts and
438
+ // checkpoint semantics. Detect once per file, branch behaviour:
439
+ // kimi-claw → ingest checkpoints (they hold busy-arrived TG msgs)
440
+ // self-hosted → SKIP checkpoints (they're pure snapshots → would
441
+ // duplicate content 30-40×)
442
+ // unknown → default to self-hosted (safer for the 90% case)
443
+ //
444
+ // The `--mode` flag on `memex-sync backfill-channels` lets users
445
+ // override (kimi-claw / self-hosted / auto). _opts.mode = 'auto'
446
+ // by default.
447
+ const forcedMode = _opts && _opts.mode && _opts.mode !== 'auto' ? _opts.mode : null;
448
+ const sessionType = forcedMode || detectSessionType(path, channelMap);
449
+ const checkpoint = isCheckpointFile(fileName);
450
+ if (checkpoint && sessionType !== 'kimi-claw') {
451
+ return {
452
+ status: 'skipped',
453
+ format: 'openclaw-jsonl',
454
+ file_uuid8: fileUuid8,
455
+ reason:
456
+ `checkpoint file in ${sessionType} mode — skipped to avoid ` +
457
+ `duplicate explosion. Override with --mode kimi-claw if you ` +
458
+ `know this is a Moonshot Kimi-Claw deployment.`,
459
+ session_type: sessionType,
460
+ };
461
+ }
462
+
463
+ let lines;
464
+ try {
465
+ lines = readFileSync(path, 'utf-8').split('\n').filter(Boolean);
466
+ } catch (e) {
467
+ return { status: 'error', error: `read failed: ${e.message}` };
468
+ }
469
+
470
+ // Prepared statements with channel column
471
+ const insertMessage = db.prepare(`
472
+ INSERT INTO messages (source, conversation_id, msg_id, role, sender, text, ts, metadata, edited_at, uuid, channel, origin)
473
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, '${getOrigin()}')
474
+ ON CONFLICT(source, conversation_id, msg_id) DO UPDATE SET
475
+ text = excluded.text,
476
+ channel = COALESCE(excluded.channel, messages.channel),
477
+ uuid = COALESCE(messages.uuid, excluded.uuid),
478
+ origin = COALESCE(messages.origin, excluded.origin)
479
+ `);
480
+ const upsertConversation = db.prepare(`
481
+ INSERT INTO conversations (conversation_id, source, title, first_ts, last_ts, message_count, parent_conversation_id, project_path)
482
+ VALUES (?, ?, ?, ?, ?, 0, NULL, NULL)
483
+ ON CONFLICT(conversation_id) DO UPDATE SET
484
+ title = COALESCE(excluded.title, conversations.title),
485
+ first_ts = MIN(conversations.first_ts, excluded.first_ts),
486
+ last_ts = MAX(conversations.last_ts, excluded.last_ts),
487
+ message_count = (
488
+ SELECT COUNT(*) FROM messages WHERE messages.conversation_id = conversations.conversation_id
489
+ )
490
+ `);
491
+
492
+ // Per-conversation stats — title / first_ts / last_ts / channel /
493
+ // message_count. Computed during ingest, applied once via
494
+ // upsertConversation at the end. Channel is remembered so the title
495
+ // prefix ("[Telegram]" / "[Kimi-web]" / "[<auto-registered>]") can be
496
+ // derived even for self-hosted channels whose conv_id doesn't follow
497
+ // the built-in openclaw-tg-* / openclaw-kimi-* / openclaw-sys-*
498
+ // prefixes.
499
+ const convStats = new Map();
500
+
501
+ // v0.11.2 (C): system-prompt prefixes that should NOT be used as
502
+ // conversation titles. Subagent sessions in self-hosted OpenClaw
503
+ // start with `[Subagent Context] You are running as a subagent...`
504
+ // which gets picked as title — pollutes the conv list. Filter these
505
+ // and prefer the first REAL user message instead.
506
+ const SYSTEM_PROMPT_PREFIXES = [
507
+ /^\[Subagent Context\]/i,
508
+ /^\[System\]/i,
509
+ /^<system>/i,
510
+ /^You are /i,
511
+ /^System: /i,
512
+ ];
513
+ function isSystemPromptish(text) {
514
+ if (!text || typeof text !== 'string') return false;
515
+ const head = text.trim().slice(0, 120);
516
+ return SYSTEM_PROMPT_PREFIXES.some((re) => re.test(head));
517
+ }
518
+
519
+ function bump(convId, ts, candidateTitle, channel) {
520
+ let s = convStats.get(convId);
521
+ if (!s) {
522
+ s = { first_ts: Infinity, last_ts: 0, title: null, channel: null, msgs: 0 };
523
+ convStats.set(convId, s);
524
+ }
525
+ s.msgs += 1;
526
+ if (ts && ts > 0) {
527
+ s.first_ts = Math.min(s.first_ts, ts);
528
+ s.last_ts = Math.max(s.last_ts, ts);
529
+ }
530
+ if (!s.title && candidateTitle && !isSystemPromptish(candidateTitle)) {
531
+ s.title = candidateTitle.trim().replace(/\s+/g, ' ').slice(0, 80);
532
+ }
533
+ if (!s.channel && channel) s.channel = channel;
534
+ }
535
+
536
+ let imported = 0;
537
+
538
+ // === Sticky channel + conv pointer ===
539
+ //
540
+ // Tracks the channel + conv_id of the most recent DETECTED user
541
+ // message. Used to route two kinds of records that have no channel
542
+ // marker of their own:
543
+ //
544
+ // 1. assistant replies — semantically belong to the thread
545
+ // they answer
546
+ // 2. tool-result records — role='user' in the OpenClaw schema
547
+ // but actually Bash/Read/etc. output;
548
+ // these are part of the agent's
549
+ // reasoning chain in the same thread
550
+ //
551
+ // The sticky pointer is initialised from the file-level
552
+ // sessions.json fallback (so a pure single-channel file routes even
553
+ // its very first record). It only ADVANCES on detected user
554
+ // messages — never on tool-results or assistants — so those don't
555
+ // shadow the real conversation owner. (v0.11.0 bug: tool-results
556
+ // reset sticky to null and orphaned ~170 messages into a fallback
557
+ // bucket. v0.11.1 fixes this.)
558
+ let lastUserChannel = fileFallbackChannel;
559
+ let lastUserConvId = fileFallbackChannel
560
+ ? deriveOpenclawConvId(fileFallbackChannel, null, fileUuid8)
561
+ : null;
562
+
563
+ const tx = db.transaction(() => {
564
+ for (const line of lines) {
565
+ let obj;
566
+ try { obj = JSON.parse(line); } catch (_) { continue; }
567
+
568
+ // OpenClaw uses {type: "message", message: {role, content}, ...} —
569
+ // extractMessageFromRecord handles this shape (it's the same as
570
+ // Claude Code's nested-content form).
571
+ const m = extractMessageFromRecord(obj);
572
+ if (!m) continue;
573
+ if (!['user', 'assistant'].includes(m.role)) continue;
574
+
575
+ const baseTs = m.timestamp
576
+ ? Math.floor(new Date(m.timestamp).getTime() / 1000)
577
+ : 0;
578
+
579
+ // === Determine routing ===
580
+ // Text-pattern detection only — file-level fallback handled below
581
+ // so we can distinguish "detected" from "inherited".
582
+ const detectedChannel = m.role === 'user'
583
+ ? detectChannel(m.text, null)
584
+ : null;
585
+
586
+ let channel, channelDef;
587
+ if (detectedChannel) {
588
+ // Real user message with a channel marker.
589
+ channel = detectedChannel;
590
+ channelDef = findChannelDef(channel);
591
+ } else if (lastUserConvId) {
592
+ // Synthetic user (tool_result) or assistant — inherit from last
593
+ // detected user message.
594
+ channel = lastUserChannel;
595
+ channelDef = channel
596
+ ? (findChannelDef(channel) || getOrAutoRegister(channel))
597
+ : null;
598
+ } else {
599
+ // Early in file, no sticky yet — use file-level fallback (from
600
+ // sessions.json). For known channels this is null since the
601
+ // pointer was initialised from fileFallbackChannel; this branch
602
+ // mainly covers files with no sessions.json sibling.
603
+ channel = fileFallbackChannel;
604
+ channelDef = channel
605
+ ? (findChannelDef(channel) || getOrAutoRegister(channel))
606
+ : null;
607
+ }
608
+
609
+ // === Batched record (Telegram today; pluggable for future) ===
610
+ if (
611
+ channelDef?.parseBatch &&
612
+ m.role === 'user' &&
613
+ /^\[Queued messages while agent was busy\]/i.test(m.text)
614
+ ) {
615
+ const batches = channelDef.parseBatch(m.text);
616
+ for (const b of batches) {
617
+ if (!b.sender_id) continue;
618
+ const convId =
619
+ channelDef.convIdFor({ sender_id: b.sender_id }, { fileUuid8 }) ||
620
+ `openclaw-${fileUuid8}`;
621
+ const msgId = b.message_id ? `tg-${b.message_id}` : `tg-${b.sender_id}-${b.ts || baseTs}`;
622
+ const ts = b.ts || baseTs;
623
+ const metadata = JSON.stringify({
624
+ raw_type: 'openclaw-telegram-batched',
625
+ telegram_message_id: b.message_id,
626
+ sender_id: b.sender_id,
627
+ sender: b.sender,
628
+ username: b.username,
629
+ reply_to_id: b.reply_to_id,
630
+ source_file: fileName,
631
+ parentId: obj.parentId || null,
632
+ });
633
+ try {
634
+ const r = insertMessage.run(
635
+ source, convId, msgId, m.role,
636
+ b.sender || (b.username ? `@${b.username}` : channel),
637
+ b.text || '', ts, metadata, null,
638
+ null,
639
+ channel,
640
+ );
641
+ if (r.changes > 0) imported++;
642
+ bump(convId, ts, b.text, channel);
643
+ } catch (_) { /* dupe via UNIQUE */ }
644
+ // Advance sticky to last batched sender
645
+ lastUserChannel = channel;
646
+ lastUserConvId = convId;
647
+ }
648
+ continue;
649
+ }
650
+
651
+ // === Single-record metadata extraction (e.g. one TG without batch header) ===
652
+ if (channelDef?.parseSingle && m.role === 'user' && detectedChannel) {
653
+ const parsed = channelDef.parseSingle(m.text);
654
+ if (parsed && parsed.sender_id) {
655
+ const convId =
656
+ channelDef.convIdFor({ sender_id: parsed.sender_id }, { fileUuid8 }) ||
657
+ `openclaw-${fileUuid8}`;
658
+ const msgId = parsed.message_id ? `tg-${parsed.message_id}` : (m.id || `openclaw-${fileName}-${baseTs}`);
659
+ const ts = parsed.ts || baseTs;
660
+ const metadata = JSON.stringify({
661
+ raw_type: 'openclaw-telegram-single',
662
+ telegram_message_id: parsed.message_id,
663
+ sender_id: parsed.sender_id,
664
+ sender: parsed.sender,
665
+ username: parsed.username,
666
+ reply_to_id: parsed.reply_to_id,
667
+ source_file: fileName,
668
+ parentId: obj.parentId || null,
669
+ });
670
+ try {
671
+ const r = insertMessage.run(
672
+ source, convId, msgId, m.role,
673
+ parsed.sender || (parsed.username ? `@${parsed.username}` : channel),
674
+ parsed.text || '', ts, metadata, null,
675
+ m.uuid || null,
676
+ channel,
677
+ );
678
+ if (r.changes > 0) imported++;
679
+ bump(convId, ts, parsed.text, channel);
680
+ } catch (_) { /* dupe */ }
681
+ lastUserChannel = channel;
682
+ lastUserConvId = convId;
683
+ continue;
684
+ }
685
+ // parseSingle failed → fall through to default path
686
+ }
687
+
688
+ // === Default path ===
689
+ // - Detected user message on a channel without special parser
690
+ // (e.g. Kimi-web, system, auto-registered self-hosted channel):
691
+ // derive a fresh conv_id from the channel def.
692
+ // - Synthetic user (tool_result) or assistant: inherit from
693
+ // sticky pointer.
694
+ let convId;
695
+ if (detectedChannel && channelDef) {
696
+ convId =
697
+ channelDef.convIdFor({}, { fileUuid8 }) ||
698
+ `openclaw-${fileUuid8}`;
699
+ } else if (lastUserConvId) {
700
+ convId = lastUserConvId;
701
+ } else if (channelDef) {
702
+ convId =
703
+ channelDef.convIdFor({}, { fileUuid8 }) ||
704
+ `openclaw-${fileUuid8}`;
705
+ } else {
706
+ convId = `openclaw-${fileUuid8}`;
707
+ }
708
+
709
+ // Channel-specific header strip (Kimi today; pluggable via stripHeader)
710
+ let text = m.text;
711
+ if (channelDef?.stripHeader && m.role === 'user' && detectedChannel) {
712
+ text = channelDef.stripHeader(text);
713
+ }
714
+ if (!text || !text.trim()) continue;
715
+
716
+ const sender = m.role === 'user' ? 'me' : source;
717
+ const msgId = m.id || obj.id || `openclaw-${fileName}-${baseTs}`;
718
+ const metadata = JSON.stringify({
719
+ raw_type: obj.type || null,
720
+ parentId: obj.parentId || null,
721
+ parentUuid: m.parentUuid || null,
722
+ source_file: fileName,
723
+ auto_channel: channelDef?.isAutoRegistered || false,
724
+ });
725
+ try {
726
+ const r = insertMessage.run(
727
+ source, convId, msgId, m.role, sender, text, baseTs, metadata, null,
728
+ m.uuid || null,
729
+ channel, // may be null — that's fine
730
+ );
731
+ if (r.changes > 0) imported++;
732
+ bump(convId, baseTs, m.role === 'user' ? text : null, channel);
733
+ } catch (_) { /* dupe */ }
734
+
735
+ // Advance sticky ONLY for detected user messages. Tool_results
736
+ // (role='user' without a channel marker) and assistant records
737
+ // do NOT advance — they inherit and pass through.
738
+ if (detectedChannel && m.role === 'user') {
739
+ lastUserChannel = channel;
740
+ lastUserConvId = convId;
741
+ }
742
+ }
743
+
744
+ // Upsert all conversations seen in this file. Title prefix comes
745
+ // from the channel def (covers auto-registered self-hosted
746
+ // channels too — e.g. "[discord] What did Mara say?").
747
+ //
748
+ // v0.11.2 (D): skip conv-rows that ended up with 0 actual messages
749
+ // (e.g. subagent system-init files where every record was filtered
750
+ // out as role!='user'/'assistant'). Those produced ghost convs
751
+ // with message_count=0 and "[Subagent Context] You are running…"
752
+ // titles on the test VPS.
753
+ for (const [convId, s] of convStats.entries()) {
754
+ if (s.msgs === 0) continue;
755
+ const prefix = s.channel ? (titlePrefixFor(s.channel) || '') : '';
756
+ const titleText = s.title || fileName;
757
+ const title = prefix ? `${prefix} ${titleText}` : titleText;
758
+ upsertConversation.run(
759
+ convId, source, title,
760
+ isFinite(s.first_ts) ? s.first_ts : null,
761
+ s.last_ts || null,
762
+ );
763
+ }
764
+ });
765
+
766
+ try {
767
+ tx();
768
+ } catch (e) {
769
+ return { status: 'error', error: `transaction failed: ${e.message}` };
770
+ }
771
+
772
+ return {
773
+ status: 'imported',
774
+ format: 'openclaw-jsonl',
775
+ total_imported: imported,
776
+ conversations: Array.from(convStats.keys()),
777
+ file_uuid8: fileUuid8,
778
+ };
779
+ }