parallelclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +204 -0
  2. package/HELP.md +600 -0
  3. package/LICENSE +21 -0
  4. package/MULTI_MACHINE.md +152 -0
  5. package/README.md +417 -0
  6. package/README.ru.md +740 -0
  7. package/SYNC.md +844 -0
  8. package/bot/README.md +173 -0
  9. package/bot/config.js +66 -0
  10. package/bot/inbox.js +153 -0
  11. package/bot/index.js +294 -0
  12. package/bot/nexara.js +61 -0
  13. package/bot/poll.js +304 -0
  14. package/bot/search.js +155 -0
  15. package/bot/telegram.js +96 -0
  16. package/ingest.js +2712 -0
  17. package/lib/cli/index.js +1987 -0
  18. package/lib/config.js +220 -0
  19. package/lib/db-init.js +158 -0
  20. package/lib/hook/install.js +268 -0
  21. package/lib/import-telegram.js +158 -0
  22. package/lib/ingest-file.js +779 -0
  23. package/lib/notify-click-action.js +281 -0
  24. package/lib/openclaw-channel.js +643 -0
  25. package/lib/parse-cursor.js +172 -0
  26. package/lib/parse-obsidian.js +256 -0
  27. package/lib/parse-telegram-html.js +384 -0
  28. package/lib/parse.js +175 -0
  29. package/lib/render-markdown.js +0 -0
  30. package/lib/store-doc/canonicalize.js +116 -0
  31. package/lib/store-doc/detect.js +209 -0
  32. package/lib/store-doc/extract-title.js +162 -0
  33. package/lib/sync/auth.js +80 -0
  34. package/lib/sync/cert.js +144 -0
  35. package/lib/sync/cli.js +906 -0
  36. package/lib/sync/client.js +138 -0
  37. package/lib/sync/config.js +130 -0
  38. package/lib/sync/pair.js +145 -0
  39. package/lib/sync/pull.js +158 -0
  40. package/lib/sync/push.js +305 -0
  41. package/lib/sync/replicate.js +335 -0
  42. package/lib/sync/server.js +224 -0
  43. package/lib/sync/service.js +726 -0
  44. package/lib/tasks.js +215 -0
  45. package/lib/telegram-decisions.js +165 -0
  46. package/lib/telegram-discovery.js +373 -0
  47. package/lib/telegram-notify.js +272 -0
  48. package/lib/telegram-pending.js +200 -0
  49. package/lib/web/index.js +265 -0
  50. package/lib/web/routes/conversation.js +193 -0
  51. package/lib/web/routes/conversations.js +180 -0
  52. package/lib/web/routes/dashboard.js +175 -0
  53. package/lib/web/routes/pending.js +277 -0
  54. package/lib/web/routes/settings.js +226 -0
  55. package/lib/web/static/style.css +393 -0
  56. package/lib/web/templates.js +234 -0
  57. package/package.json +84 -0
  58. package/server.js +3816 -0
  59. package/skills/install-memex/README.md +109 -0
  60. package/skills/install-memex/SKILL.md +342 -0
  61. package/skills/install-memex/examples.md +294 -0
  62. package/skills/install-memex-claw/SKILL.md +423 -0
@@ -0,0 +1,643 @@
1
+ /**
2
+ * OpenClaw channel detection + Telegram batch unpacking.
3
+ *
4
+ * Background (from disk-surveys by OpenClaw agents on live VPSes,
5
+ * 2026-05-19 → 2026-05-20):
6
+ *
7
+ * OpenClaw stores every channel (Kimi-web, Telegram, future Slack/WA/
8
+ * self-hosted Discord/etc.) in the SAME directory
9
+ * `~/.openclaw/agents/main/sessions/`. There is NO `channel` field at
10
+ * the JSON-record level. The transport is identifiable by two means:
11
+ *
12
+ * 1. `sessions.json` — authoritative routing registry. Maps each
13
+ * sessionFile path to its `deliveryContext.channel` value (e.g.
14
+ * "kimi-claw", "telegram", or any self-hosted name like "discord",
15
+ * "matrix", "custom-web-ui"). USEFUL as the default channel for a
16
+ * whole file — but DOES NOT help with CHECKPOINT files which mix
17
+ * messages from multiple channels interleaved.
18
+ *
19
+ * 2. Text patterns inside `content[0].text` — embedded Markdown
20
+ * preambles + JSON metadata blocks. Reliable signals:
21
+ *
22
+ * "User Message From Kimi:\n[Time: …]?\n…" → kimi-web
23
+ * (Time block is OPTIONAL — short messages skip it)
24
+ * "[Queued messages while agent was busy]\n…" → telegram (batched)
25
+ * "Conversation info (untrusted metadata):" → telegram (single)
26
+ * "System: …" → system
27
+ *
28
+ * v0.11.1: channels live in a `CHANNELS` array (not hardcoded if/else).
29
+ * Adding a new known channel = one entry. Unknown channels from
30
+ * sessions.json get auto-registered with sensible defaults
31
+ * (per-account routing if accountId available, else per-file) — so
32
+ * self-hosted OpenClaw with custom channel names "just works".
33
+ *
34
+ * Telegram messages in checkpoint files are BATCHED — one OpenClaw record
35
+ * may pack 1..N Telegram messages as `Queued #1` / `Queued #2` / … blocks
36
+ * with embedded JSON metadata + the actual user text. This module unpacks
37
+ * those batches into separate logical messages.
38
+ *
39
+ * Public API:
40
+ * • CHANNELS — array of known-channel definitions
41
+ * • findChannelDef(name) — look up by canonical or alias name
42
+ * • getOrAutoRegister(rawChannel) — return def or synthesize generic
43
+ * • detectChannel(text, fallback?) — text-pattern detection
44
+ * • deriveOpenclawConvId(channel, senderInfo, fileUuid8)
45
+ * — conv_id (uses CHANNELS + auto-disc)
46
+ * • titlePrefixFor(channel) — "[Telegram]" / "[Kimi-web]" / etc.
47
+ * • parseBatchedTelegram / parseSingleTelegram / parseTelegramTimestamp
48
+ * • stripKimiHeader(text) — drop "User Message From Kimi:\n…"
49
+ * • loadSessionsJsonChannelMap(path) — file → channel from sessions.json
50
+ * • findSessionsJson(sessionFilePath)— walk up to discover sessions.json
51
+ * • baseUuid8(fileName) — extract 8-char base UUID
52
+ */
53
+
54
+ import { readFileSync, existsSync, openSync, readSync, closeSync } from 'node:fs';
55
+ import { join, dirname, basename } from 'node:path';
56
+
57
+ // ----- Channel registry -----
58
+ //
59
+ // Each entry describes one known channel. Adding a new well-known
60
+ // channel (e.g. "slack" with its own batched format) = one block here +
61
+ // optional spec-parser. Unknown channels surfaced via sessions.json get
62
+ // auto-registered at runtime in `getOrAutoRegister` (see below) — no
63
+ // changes needed for self-hosted custom channel names.
64
+ //
65
+ // Fields:
66
+ // name — canonical identifier stored in `messages.channel`
67
+ // sessionsJsonAlias — strings that sessions.json may use for the same
68
+ // channel (e.g. OpenClaw writes "kimi-claw" while
69
+ // we store "kimi-web" for readability)
70
+ // detect(head) — text-pattern detector; returns true if the first
71
+ // 512 chars of a message look like this channel.
72
+ // Used PER-MESSAGE because checkpoint files mix
73
+ // channels even when sessions.json reports a
74
+ // single channel.
75
+ // parseBatch(text) — optional batched-message parser (Telegram only
76
+ // today). Used when the channel can pack multiple
77
+ // user messages into one OpenClaw record.
78
+ // parseSingle(text) — optional single-message metadata extractor
79
+ // stripHeader(text) — optional preamble-removal for cleaner storage
80
+ // (e.g. drop "User Message From Kimi:\n[Time:…]\n")
81
+ // convIdFor(info, ctx)
82
+ // — function returning the conv_id for a record on
83
+ // this channel. `info` may contain sender_id /
84
+ // accountId / etc. `ctx.fileUuid8` is the 8-char
85
+ // base session UUID. Returns null if not routable;
86
+ // caller then falls back to "openclaw-<file8>".
87
+ // titlePrefix — "[Telegram]" / "[Kimi-web]" — prepended to conv
88
+ // titles so the UI shows channel at a glance.
89
+ export const CHANNELS = [
90
+ {
91
+ name: 'telegram',
92
+ sessionsJsonAlias: ['telegram'],
93
+ detect: (head) =>
94
+ /^\[Queued messages while agent was busy\]/i.test(head) ||
95
+ (/Conversation info \(untrusted metadata\):/i.test(head) && /"sender_id"/.test(head)),
96
+ parseBatch: parseBatchedTelegram,
97
+ parseSingle: parseSingleTelegram,
98
+ convIdFor: (info, _ctx) =>
99
+ info?.sender_id ? `openclaw-tg-${info.sender_id}` : null,
100
+ titlePrefix: '[Telegram]',
101
+ },
102
+ {
103
+ name: 'kimi-web',
104
+ sessionsJsonAlias: ['kimi-claw', 'kimi-web'],
105
+ detect: (head) => /^User Message From Kimi:/i.test(head),
106
+ stripHeader: (text) => stripKimiHeader(text),
107
+ convIdFor: (_info, ctx) => `openclaw-kimi-${ctx.fileUuid8}`,
108
+ titlePrefix: '[Kimi-web]',
109
+ },
110
+ {
111
+ name: 'system',
112
+ sessionsJsonAlias: ['system'],
113
+ detect: (head) => /^System: /i.test(head),
114
+ convIdFor: (_info, ctx) => `openclaw-sys-${ctx.fileUuid8}`,
115
+ titlePrefix: '[System]',
116
+ },
117
+ ];
118
+
119
+ /**
120
+ * Look up a channel definition by canonical name OR sessions.json alias.
121
+ * Returns null if not found.
122
+ */
123
+ export function findChannelDef(name) {
124
+ if (!name) return null;
125
+ return CHANNELS.find(
126
+ (c) => c.name === name || (c.sessionsJsonAlias || []).includes(name),
127
+ ) || null;
128
+ }
129
+
130
+ /**
131
+ * Auto-discovery for self-hosted OpenClaw setups.
132
+ *
133
+ * If sessions.json reports `deliveryContext.channel: "discord"` (or
134
+ * any other name we don't know), build a generic channel def on the
135
+ * fly: per-account routing if `accountId` is available, else per-file.
136
+ * Title prefix is just `[<name>]`. This means custom self-hosted
137
+ * channels work out of the box — no memex code changes required.
138
+ *
139
+ * Returns null if `rawChannelName` is empty or invalid.
140
+ */
141
+ export function getOrAutoRegister(rawChannelName) {
142
+ const known = findChannelDef(rawChannelName);
143
+ if (known) return known;
144
+ if (!rawChannelName || typeof rawChannelName !== 'string') return null;
145
+ // Sanitise: only [a-z0-9_-] in the channel name (no spaces, no slashes —
146
+ // these become part of conv_id and table values).
147
+ const safe = rawChannelName.replace(/[^a-z0-9_-]/gi, '').toLowerCase();
148
+ if (!safe) return null;
149
+ return {
150
+ name: safe,
151
+ detect: () => false, // no text-pattern detector — relies on sessions.json
152
+ convIdFor: (info, ctx) =>
153
+ info?.accountId
154
+ ? `openclaw-${safe}-${info.accountId}`
155
+ : info?.sender_id
156
+ ? `openclaw-${safe}-${info.sender_id}`
157
+ : `openclaw-${safe}-${ctx.fileUuid8}`,
158
+ titlePrefix: `[${safe}]`,
159
+ isAutoRegistered: true,
160
+ };
161
+ }
162
+
163
+ // ----- Channel detection -----
164
+
165
+ /**
166
+ * Text-pattern channel detection. Iterates CHANNELS in order and
167
+ * returns the first match. Returns `fallback` if nothing matches.
168
+ *
169
+ * `fallback` is typically the file-level channel from sessions.json
170
+ * (so a single-channel session whose user messages don't carry a
171
+ * preamble — e.g. Kimi short replies "тут?" — still routes correctly).
172
+ */
173
+ export function detectChannel(text, fallback = null) {
174
+ if (typeof text !== 'string' || !text) return fallback;
175
+ const head = text.slice(0, 512);
176
+ for (const ch of CHANNELS) {
177
+ if (ch.detect && ch.detect(head)) return ch.name;
178
+ }
179
+ return fallback;
180
+ }
181
+
182
+ // ----- Telegram batch + metadata parsing -----
183
+
184
+ /**
185
+ * Parse a "[Queued messages while agent was busy]" batched-TG record into
186
+ * an array of individual messages. Each item:
187
+ * { text, message_id, sender_id, sender, username, reply_to_id, ts }
188
+ *
189
+ * `ts` is unix-seconds (parsed from the Telegram-formatted "Fri 2026-05-01
190
+ * 20:03 GMT+8" string in the metadata block).
191
+ *
192
+ * Robust against:
193
+ * • A single batch (one `Queued #1` block) or many
194
+ * • Missing Sender metadata block (rare, falls back to Conversation info)
195
+ * • User text containing arbitrary Markdown including its own ```fenced
196
+ * blocks — we anchor on the EXACTLY-formatted "Conversation info" /
197
+ * "Sender (untrusted metadata)" labels rather than ``` itself.
198
+ */
199
+ export function parseBatchedTelegram(text) {
200
+ if (typeof text !== 'string' || !text) return [];
201
+
202
+ // Split on "\n---\nQueued #<N>\n" — this is the daemon-emitted separator.
203
+ // The first chunk before the first separator is the "[Queued messages …]"
204
+ // header, which we discard.
205
+ const parts = text.split(/\n---\nQueued #\d+\n/);
206
+ if (parts.length < 2) {
207
+ // No batch separator found — maybe just one TG message wrapped without
208
+ // the "Queued #1" header? Try single-message parsing.
209
+ const single = parseSingleTelegram(text);
210
+ return single ? [single] : [];
211
+ }
212
+ // Drop the [Queued messages while agent was busy] header (parts[0]).
213
+ return parts.slice(1).map(parseSingleTelegram).filter(Boolean);
214
+ }
215
+
216
+ /**
217
+ * Parse a single Telegram-formatted message block (with or without
218
+ * `Queued #N` header already stripped).
219
+ *
220
+ * Looks for two ```json blocks in this order:
221
+ * Conversation info (untrusted metadata) — message_id, sender_id,
222
+ * sender, reply_to_id, timestamp
223
+ * Sender (untrusted metadata) — id, name, username, label
224
+ *
225
+ * Returns the actual user text (everything after the last metadata block)
226
+ * along with the extracted fields. Returns null if no Telegram metadata
227
+ * found.
228
+ */
229
+ export function parseSingleTelegram(block) {
230
+ if (typeof block !== 'string' || !block) return null;
231
+
232
+ const convInfo = extractJsonBlock(block, 'Conversation info');
233
+ const senderInfo = extractJsonBlock(block, 'Sender');
234
+
235
+ if (!convInfo && !senderInfo) return null;
236
+
237
+ // The user's actual text starts AFTER the closing ``` of the LAST metadata
238
+ // block. For each label we must skip past the OPENING ```json fence before
239
+ // looking for the closing ``` — otherwise indexOf('\n```', labelIdx) lands
240
+ // on the opening fence itself.
241
+ let textStart = 0;
242
+ for (const label of ['Conversation info', 'Sender']) {
243
+ const labelIdx = block.indexOf(`${label} (untrusted metadata)`);
244
+ if (labelIdx < 0) continue;
245
+ const openFence = block.indexOf('```json', labelIdx);
246
+ if (openFence < 0) continue;
247
+ const closingIdx = block.indexOf('\n```', openFence + '```json'.length);
248
+ if (closingIdx < 0) continue;
249
+ const blockEnd = closingIdx + 4; // past '\n```'
250
+ if (blockEnd > textStart) textStart = blockEnd;
251
+ }
252
+ const userText = block.slice(textStart).replace(/^[\s\n]+/, '').trimEnd();
253
+
254
+ return {
255
+ text: userText || '',
256
+ message_id: convInfo?.message_id ?? null,
257
+ sender_id: convInfo?.sender_id ?? senderInfo?.id ?? null,
258
+ sender: convInfo?.sender ?? senderInfo?.name ?? null,
259
+ username: senderInfo?.username ?? null,
260
+ reply_to_id: convInfo?.reply_to_id ?? null,
261
+ ts: parseTelegramTimestamp(convInfo?.timestamp),
262
+ raw_timestamp: convInfo?.timestamp ?? null,
263
+ };
264
+ }
265
+
266
+ /**
267
+ * Extract the JSON object from a Markdown "<label> (untrusted metadata):"
268
+ * + ```json … ``` block. Returns the parsed object or null.
269
+ */
270
+ function extractJsonBlock(text, label) {
271
+ const labelIdx = text.indexOf(`${label} (untrusted metadata)`);
272
+ if (labelIdx < 0) return null;
273
+ const openFence = text.indexOf('```json', labelIdx);
274
+ if (openFence < 0) return null;
275
+ const jsonStart = openFence + '```json'.length;
276
+ // Skip whitespace/newline after ```json
277
+ let cursor = jsonStart;
278
+ while (cursor < text.length && /[\s\n]/.test(text[cursor])) cursor++;
279
+ const closeFence = text.indexOf('\n```', cursor);
280
+ if (closeFence < 0) return null;
281
+ const jsonStr = text.slice(cursor, closeFence).trim();
282
+ try { return JSON.parse(jsonStr); }
283
+ catch (_) { return null; }
284
+ }
285
+
286
+ /**
287
+ * Convert Telegram's text-format timestamp to unix-seconds.
288
+ *
289
+ * Input examples: "Fri 2026-05-01 20:03 GMT+8"
290
+ * "Wed 2026-05-20 02:24 GMT+8"
291
+ * Output: unix epoch seconds (UTC)
292
+ *
293
+ * Returns 0 if unparseable.
294
+ */
295
+ export function parseTelegramTimestamp(s) {
296
+ if (typeof s !== 'string' || !s) return 0;
297
+ const m = s.match(/(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})\s+GMT([+-]\d+)/);
298
+ if (!m) return 0;
299
+ const [, y, mo, d, h, mi, tz] = m;
300
+ // GMT+8 → -8 hours from local to get UTC
301
+ const offsetHours = parseInt(tz, 10);
302
+ const localDate = Date.UTC(+y, +mo - 1, +d, +h, +mi);
303
+ return Math.floor((localDate - offsetHours * 3600_000) / 1000);
304
+ }
305
+
306
+ // ----- Kimi-web utilities -----
307
+
308
+ /**
309
+ * Strip the canonical Kimi preamble from text. Used at ingest time so the
310
+ * stored text is the clean user content, not the daemon's framing.
311
+ *
312
+ * "User Message From Kimi:\n[Time: [...]]\nMain text here" → "Main text here"
313
+ * "User Message From Kimi:\nMain text here" → "Main text here"
314
+ * ↑ Time block
315
+ * is OPTIONAL
316
+ *
317
+ * v0.11.1 fix: production OpenClaw omits the `[Time: …]` block for
318
+ * short messages. The v0.11.0 regex required it and silently failed
319
+ * for everything else. The new regex makes the Time line optional.
320
+ */
321
+ export function stripKimiHeader(text) {
322
+ if (typeof text !== 'string' || !text) return text;
323
+ return text
324
+ .replace(/^User Message From Kimi:\s*\n(?:\[Time:[^\n]*\]\s*\n)?/i, '')
325
+ .trim();
326
+ }
327
+
328
+ // ----- sessions.json reader -----
329
+
330
+ /**
331
+ * Read OpenClaw's sessions.json registry and return a Map keyed by
332
+ * sessionFile absolute path → channel name (e.g. "telegram", "kimi-claw").
333
+ *
334
+ * The schema (per OpenClaw maintainer):
335
+ * {
336
+ * "agent:main:main": {
337
+ * "deliveryContext": { "channel": "kimi-claw", ... },
338
+ * "sessionFile": "/root/.openclaw/agents/main/sessions/<uuid>.jsonl",
339
+ * ...
340
+ * },
341
+ * "agent:main:subagent:<uuid>": {
342
+ * "deliveryContext": { "channel": "telegram", ... },
343
+ * "sessionFile": "...",
344
+ * ...
345
+ * }
346
+ * }
347
+ *
348
+ * `sessionsJsonPath` may be passed explicitly OR auto-discovered from a
349
+ * sessionFile path (we walk up to find a sibling `sessions.json`).
350
+ * Returns an empty Map if not found / unreadable.
351
+ */
352
+ export function loadSessionsJsonChannelMap(sessionsJsonPath) {
353
+ const map = new Map();
354
+ if (!sessionsJsonPath || !existsSync(sessionsJsonPath)) return map;
355
+ let raw;
356
+ try { raw = JSON.parse(readFileSync(sessionsJsonPath, 'utf-8')); }
357
+ catch (_) { return map; }
358
+ for (const key of Object.keys(raw || {})) {
359
+ const entry = raw[key];
360
+ const ch = entry?.deliveryContext?.channel || entry?.lastChannel || entry?.channel;
361
+ const file = entry?.sessionFile;
362
+ if (ch && file) {
363
+ // Normalise OpenClaw's channel names to our enum
364
+ const normalized =
365
+ ch === 'kimi-claw' ? 'kimi-web'
366
+ : ch; // 'telegram' stays, others pass through
367
+ // v0.11.2: index by THREE keys so archive-staged files (whose
368
+ // names differ from the original sessionFile path) still match:
369
+ // 1. Full absolute path (as written in sessions.json)
370
+ // 2. Basename (e.g. "3824f87a-ea6e-4e08-…-c596288bcfe3.jsonl")
371
+ // 3. uuid8:<base-uuid> (extracted via baseUuid8 — survives renaming
372
+ // into inbox-staged "openclaw-3824f87a.jsonl")
373
+ map.set(file, normalized);
374
+ const bn = basename(file);
375
+ if (bn && bn !== file) map.set(bn, normalized);
376
+ const uid = baseUuid8(bn);
377
+ if (uid && uid.length === 8) map.set(`uuid8:${uid}`, normalized);
378
+ }
379
+ }
380
+ return map;
381
+ }
382
+
383
+ /**
384
+ * Multi-key channel lookup. Tries (in order):
385
+ * 1. Exact file path — for live ingest from ~/.openclaw/...
386
+ * 2. Basename — for partial-path scenarios
387
+ * 3. uuid8:<8 hex> — for archive-staged or inbox-renamed files
388
+ * Returns the channel string (e.g. "telegram" / "kimi-web") or null.
389
+ *
390
+ * The v0.11.2 fix that turns file-level channel detection from "matches
391
+ * 1 of 59 self-hosted sessions" into "matches all 59".
392
+ */
393
+ export function lookupChannel(channelMap, filePath) {
394
+ if (!channelMap || !filePath) return null;
395
+ let v = channelMap.get(filePath);
396
+ if (v) return v;
397
+ const bn = basename(filePath);
398
+ v = channelMap.get(bn);
399
+ if (v) return v;
400
+ const uid = baseUuid8(bn);
401
+ if (uid) v = channelMap.get(`uuid8:${uid}`);
402
+ return v || null;
403
+ }
404
+
405
+ /**
406
+ * Detect what kind of OpenClaw deployment a session file belongs to.
407
+ * Two modes, decided by sessions.json contents:
408
+ *
409
+ * 'kimi-claw' — Moonshot's hosted Kimi-Claw service. ONE merged
410
+ * session file packs multiple channels (Kimi-web user
411
+ * chats + Telegram batched messages received while
412
+ * busy). Per-message text-pattern detection critical;
413
+ * .checkpoint files contain NEW Telegram messages
414
+ * that arrived during checkpoint window so we ingest
415
+ * them.
416
+ *
417
+ * 'self-hosted' — Standard OpenClaw on user's VPS/box (90% of
418
+ * installs per maintainer feedback, 2026-05). Each
419
+ * session = one separate <uuid>.jsonl with a SINGLE
420
+ * channel for its entire lifetime. .checkpoint files
421
+ * are pure snapshots — duplicating their content
422
+ * would explode the DB 30-40× without adding info.
423
+ *
424
+ * 'unknown' — sessions.json missing or file not registered.
425
+ * Default behaviour (caller decides): we treat
426
+ * unknown as self-hosted (skip checkpoints) since
427
+ * self-hosted is the dominant case.
428
+ *
429
+ * The signal: if the file's channel resolves to 'kimi-web' (or
430
+ * 'kimi-claw' raw) via lookupChannel → it's a Kimi-Claw session. Any
431
+ * other channel value → self-hosted. No channel found → unknown.
432
+ */
433
+ export function detectSessionType(filePath, channelMap) {
434
+ // 1. sessions.json lookup (fast, authoritative for active sessions)
435
+ const ch = lookupChannel(channelMap, filePath);
436
+ if (ch === 'kimi-web' || ch === 'kimi-claw') return 'kimi-claw';
437
+ if (ch) return 'self-hosted';
438
+
439
+ // 2. v0.11.3: content-based fallback. sessions.json only tracks
440
+ // CURRENT live sessions — archived/rotated session files have no
441
+ // entry, so the lookup fails for everything in ~/.memex/archive/
442
+ // older than the current main session. Scan the file head to find
443
+ // channel markers and classify by their COMBINATION.
444
+ //
445
+ // For checkpoint files, prefer to scan the corresponding MAIN file
446
+ // (same baseUuid8) — a checkpoint may contain only busy-arrived TG
447
+ // without Kimi markers, but the main session would be a Kimi-Claw
448
+ // merged file with both. Without this hop, every Kimi-Claw checkpoint
449
+ // looks self-hosted and gets skipped (causing data loss on backfill).
450
+ if (!filePath) return 'unknown';
451
+ let probeFile = filePath;
452
+ if (isCheckpointFile(basename(filePath))) {
453
+ const uid = baseUuid8(basename(filePath));
454
+ if (uid) {
455
+ // Try common main-file naming in the same dir
456
+ const dir = dirname(filePath);
457
+ const candidates = [
458
+ join(dir, `openclaw-${uid}.jsonl`), // inbox-staged
459
+ ];
460
+ for (const c of candidates) {
461
+ if (existsSync(c)) { probeFile = c; break; }
462
+ }
463
+ }
464
+ }
465
+ return detectSessionTypeFromContent(probeFile);
466
+ }
467
+
468
+ /**
469
+ * Inspect the first ~64 KB of a session file and decide its type from
470
+ * the channel markers present:
471
+ *
472
+ * has Kimi marker AND a Telegram marker → 'kimi-claw' (merged file —
473
+ * the Moonshot signature)
474
+ * has any single marker → 'self-hosted' (one channel
475
+ * per file is the standard
476
+ * self-hosted layout)
477
+ * no markers at all → 'unknown'
478
+ *
479
+ * Used as the fallback when sessions.json doesn't know about the file
480
+ * (e.g. old archived sessions after main-session rotation).
481
+ */
482
+ export function detectSessionTypeFromContent(filePath) {
483
+ if (!filePath || !existsSync(filePath)) return 'unknown';
484
+ let head = '';
485
+ try {
486
+ const fd = openSync(filePath, 'r');
487
+ const buf = Buffer.alloc(64 * 1024);
488
+ const n = readSync(fd, buf, 0, buf.length, 0);
489
+ closeSync(fd);
490
+ head = buf.toString('utf-8', 0, n);
491
+ } catch (_) {
492
+ return 'unknown';
493
+ }
494
+ if (!head) return 'unknown';
495
+ const hasKimi = /User Message From Kimi:/i.test(head);
496
+ const hasBatchedTg = /\[Queued messages while agent was busy\]/i.test(head);
497
+ const hasSingleTg = /Conversation info \(untrusted metadata\)/i.test(head);
498
+ if (hasKimi && (hasBatchedTg || hasSingleTg)) return 'kimi-claw';
499
+ if (hasKimi || hasBatchedTg || hasSingleTg) return 'self-hosted';
500
+ return 'unknown';
501
+ }
502
+
503
+ /**
504
+ * Detect checkpoint file by name. Matches both source naming
505
+ * "<uuid>.checkpoint.<chkpt-uuid>.jsonl" (on-disk OpenClaw source)
506
+ * and inbox-staged naming
507
+ * "openclaw-<base8>-ckpt-<chkpt8>.jsonl" (renamed by memex-sync).
508
+ */
509
+ export function isCheckpointFile(fileName) {
510
+ if (!fileName) return false;
511
+ const stem = fileName.replace(/\.jsonl$/i, '');
512
+ return /\.checkpoint\./i.test(stem) || /-ckpt-[0-9a-f]+$/i.test(stem);
513
+ }
514
+
515
+ /**
516
+ * Discover sessions.json given the path of a JSONL session file.
517
+ *
518
+ * 1. Walk up from the file's directory looking for a sessions.json
519
+ * sibling (up to 5 levels). Works for live ingest from
520
+ * ~/.openclaw/agents/main/sessions/<uuid>.jsonl.
521
+ *
522
+ * 2. v0.11.2: when ingesting from ARCHIVE (~/.memex/archive/openclaw/),
523
+ * step 1 fails — there's no sessions.json next to archived files.
524
+ * Fall back to standard OpenClaw install locations:
525
+ * $HOME/.openclaw/agents/main/sessions/sessions.json
526
+ * So `backfill-channels` can still detect session type for
527
+ * archived files (which is critical for the Kimi-Claw vs
528
+ * self-hosted decision).
529
+ *
530
+ * Returns the absolute path or null if nothing found.
531
+ */
532
+ export function findSessionsJson(sessionFilePath, options = {}) {
533
+ if (!sessionFilePath) return null;
534
+ // 1. Walk up
535
+ let dir = dirname(sessionFilePath);
536
+ for (let i = 0; i < 5; i++) {
537
+ const candidate = join(dir, 'sessions.json');
538
+ if (existsSync(candidate)) return candidate;
539
+ const parent = dirname(dir);
540
+ if (parent === dir) break;
541
+ dir = parent;
542
+ }
543
+ // 2. Fall back to standard OpenClaw locations
544
+ const home = options.home || process.env.HOME || '';
545
+ if (home) {
546
+ const standard = [
547
+ join(home, '.openclaw', 'agents', 'main', 'sessions', 'sessions.json'),
548
+ ];
549
+ for (const p of standard) {
550
+ if (existsSync(p)) return p;
551
+ }
552
+ }
553
+ return null;
554
+ }
555
+
556
+ // ----- conversation_id derivation -----
557
+
558
+ /**
559
+ * Pick the right conversation_id for an OpenClaw message given its
560
+ * detected channel and any extracted sender info.
561
+ *
562
+ * v0.11.1: routes through the CHANNELS registry and falls back to
563
+ * `getOrAutoRegister` for unknown self-hosted channels. Backward-
564
+ * compatible with v0.11.0 calling convention — the second arg may be
565
+ * either a string `sender_id` or an object `{sender_id, accountId, …}`.
566
+ *
567
+ * Examples (built-in channels):
568
+ * telegram + sender_id → "openclaw-tg-<sender_id>"
569
+ * kimi-web → "openclaw-kimi-<fileUuid8>"
570
+ * system → "openclaw-sys-<fileUuid8>"
571
+ *
572
+ * Auto-discovered (self-hosted; e.g. channel="discord"):
573
+ * discord + accountId → "openclaw-discord-<accountId>"
574
+ * discord (no info) → "openclaw-discord-<fileUuid8>"
575
+ *
576
+ * Fallback:
577
+ * anything else / null → "openclaw-<fileUuid8>"
578
+ *
579
+ * `fileUuid8` is the first 8 hex chars of the SOURCE FILE uuid (NOT the
580
+ * inbox-staged name) — this stays stable across checkpoints since they
581
+ * derive from the same base.
582
+ */
583
+ export function deriveOpenclawConvId(channel, senderInfoOrId, fileUuid8) {
584
+ if (!channel) return `openclaw-${fileUuid8}`;
585
+ const def = findChannelDef(channel) || getOrAutoRegister(channel);
586
+ if (!def) return `openclaw-${fileUuid8}`;
587
+ const info =
588
+ senderInfoOrId == null
589
+ ? {}
590
+ : typeof senderInfoOrId === 'string'
591
+ ? { sender_id: senderInfoOrId }
592
+ : senderInfoOrId;
593
+ return def.convIdFor(info, { fileUuid8 }) || `openclaw-${fileUuid8}`;
594
+ }
595
+
596
+ /**
597
+ * Title prefix for a channel — "[Telegram]" / "[Kimi-web]" / "[discord]".
598
+ * Returns "" when channel is null or has no known/auto-registered def.
599
+ */
600
+ export function titlePrefixFor(channel) {
601
+ if (!channel) return '';
602
+ const def = findChannelDef(channel) || getOrAutoRegister(channel);
603
+ return def?.titlePrefix || '';
604
+ }
605
+
606
+ /**
607
+ * Extract the base session uuid8 from an OpenClaw inbox/source filename:
608
+ * "openclaw-3824f87a.jsonl" → "3824f87a"
609
+ * "openclaw-3824f87a-ckpt-e6c37ac7.jsonl" → "3824f87a"
610
+ * "openclaw-3824f87a-reset-deadbeef.jsonl" → "3824f87a" (v0.11.4)
611
+ * "3824f87a-ea6e-4e08-…-c596288bcfe3.jsonl" → "3824f87a"
612
+ * "3824f87a.checkpoint.e6c37ac7.jsonl" → "3824f87a"
613
+ * "3824f87a.reset.deadbeef.jsonl" → "3824f87a" (v0.11.4)
614
+ */
615
+ export function baseUuid8(fileName) {
616
+ const stem = fileName.replace(/\.jsonl$/, '');
617
+ // OpenClaw inbox-staged forms
618
+ let m = stem.match(/^openclaw-([0-9a-f]{8})(?:-(?:ckpt|reset)-[0-9a-f]{8})?$/i);
619
+ if (m) return m[1];
620
+ // Source-file form
621
+ m = stem.match(/^([0-9a-f]{8})/i);
622
+ if (m) return m[1];
623
+ // Fallback: first 8 chars (alphanumeric).
624
+ return stem.replace(/[^0-9a-z]/gi, '').slice(0, 8);
625
+ }
626
+
627
+ /**
628
+ * Detect reset file by name. Matches both source naming
629
+ * "<uuid>.reset.<reset-uuid>.jsonl"
630
+ * and inbox-staged naming
631
+ * "openclaw-<base8>-reset-<reset8>.jsonl"
632
+ *
633
+ * Reset files are FULL archives of a session at the moment of reset —
634
+ * NOT snapshots like checkpoints. They contain unique conversation
635
+ * history that exists nowhere else once the main session is gone.
636
+ * Always ingested regardless of session-type detection (kimi-claw or
637
+ * self-hosted).
638
+ */
639
+ export function isResetFile(fileName) {
640
+ if (!fileName) return false;
641
+ const stem = fileName.replace(/\.jsonl$/i, '');
642
+ return /\.reset\./i.test(stem) || /-reset-[0-9a-f]+$/i.test(stem);
643
+ }