parallelclaw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +204 -0
- package/HELP.md +600 -0
- package/LICENSE +21 -0
- package/MULTI_MACHINE.md +152 -0
- package/README.md +417 -0
- package/README.ru.md +740 -0
- package/SYNC.md +844 -0
- package/bot/README.md +173 -0
- package/bot/config.js +66 -0
- package/bot/inbox.js +153 -0
- package/bot/index.js +294 -0
- package/bot/nexara.js +61 -0
- package/bot/poll.js +304 -0
- package/bot/search.js +155 -0
- package/bot/telegram.js +96 -0
- package/ingest.js +2712 -0
- package/lib/cli/index.js +1987 -0
- package/lib/config.js +220 -0
- package/lib/db-init.js +158 -0
- package/lib/hook/install.js +268 -0
- package/lib/import-telegram.js +158 -0
- package/lib/ingest-file.js +779 -0
- package/lib/notify-click-action.js +281 -0
- package/lib/openclaw-channel.js +643 -0
- package/lib/parse-cursor.js +172 -0
- package/lib/parse-obsidian.js +256 -0
- package/lib/parse-telegram-html.js +384 -0
- package/lib/parse.js +175 -0
- package/lib/render-markdown.js +0 -0
- package/lib/store-doc/canonicalize.js +116 -0
- package/lib/store-doc/detect.js +209 -0
- package/lib/store-doc/extract-title.js +162 -0
- package/lib/sync/auth.js +80 -0
- package/lib/sync/cert.js +144 -0
- package/lib/sync/cli.js +906 -0
- package/lib/sync/client.js +138 -0
- package/lib/sync/config.js +130 -0
- package/lib/sync/pair.js +145 -0
- package/lib/sync/pull.js +158 -0
- package/lib/sync/push.js +305 -0
- package/lib/sync/replicate.js +335 -0
- package/lib/sync/server.js +224 -0
- package/lib/sync/service.js +726 -0
- package/lib/tasks.js +215 -0
- package/lib/telegram-decisions.js +165 -0
- package/lib/telegram-discovery.js +373 -0
- package/lib/telegram-notify.js +272 -0
- package/lib/telegram-pending.js +200 -0
- package/lib/web/index.js +265 -0
- package/lib/web/routes/conversation.js +193 -0
- package/lib/web/routes/conversations.js +180 -0
- package/lib/web/routes/dashboard.js +175 -0
- package/lib/web/routes/pending.js +277 -0
- package/lib/web/routes/settings.js +226 -0
- package/lib/web/static/style.css +393 -0
- package/lib/web/templates.js +234 -0
- package/package.json +84 -0
- package/server.js +3816 -0
- package/skills/install-memex/README.md +109 -0
- package/skills/install-memex/SKILL.md +342 -0
- package/skills/install-memex/examples.md +294 -0
- package/skills/install-memex-claw/SKILL.md +423 -0
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenClaw channel detection + Telegram batch unpacking.
|
|
3
|
+
*
|
|
4
|
+
* Background (from disk-surveys by OpenClaw agents on live VPSes,
|
|
5
|
+
* 2026-05-19 → 2026-05-20):
|
|
6
|
+
*
|
|
7
|
+
* OpenClaw stores every channel (Kimi-web, Telegram, future Slack/WA/
|
|
8
|
+
* self-hosted Discord/etc.) in the SAME directory
|
|
9
|
+
* `~/.openclaw/agents/main/sessions/`. There is NO `channel` field at
|
|
10
|
+
* the JSON-record level. The transport is identifiable by two means:
|
|
11
|
+
*
|
|
12
|
+
* 1. `sessions.json` — authoritative routing registry. Maps each
|
|
13
|
+
* sessionFile path to its `deliveryContext.channel` value (e.g.
|
|
14
|
+
* "kimi-claw", "telegram", or any self-hosted name like "discord",
|
|
15
|
+
* "matrix", "custom-web-ui"). USEFUL as the default channel for a
|
|
16
|
+
* whole file — but DOES NOT help with CHECKPOINT files which mix
|
|
17
|
+
* messages from multiple channels interleaved.
|
|
18
|
+
*
|
|
19
|
+
* 2. Text patterns inside `content[0].text` — embedded Markdown
|
|
20
|
+
* preambles + JSON metadata blocks. Reliable signals:
|
|
21
|
+
*
|
|
22
|
+
* "User Message From Kimi:\n[Time: …]?\n…" → kimi-web
|
|
23
|
+
* (Time block is OPTIONAL — short messages skip it)
|
|
24
|
+
* "[Queued messages while agent was busy]\n…" → telegram (batched)
|
|
25
|
+
* "Conversation info (untrusted metadata):" → telegram (single)
|
|
26
|
+
* "System: …" → system
|
|
27
|
+
*
|
|
28
|
+
* v0.11.1: channels live in a `CHANNELS` array (not hardcoded if/else).
|
|
29
|
+
* Adding a new known channel = one entry. Unknown channels from
|
|
30
|
+
* sessions.json get auto-registered with sensible defaults
|
|
31
|
+
* (per-account routing if accountId available, else per-file) — so
|
|
32
|
+
* self-hosted OpenClaw with custom channel names "just works".
|
|
33
|
+
*
|
|
34
|
+
* Telegram messages in checkpoint files are BATCHED — one OpenClaw record
|
|
35
|
+
* may pack 1..N Telegram messages as `Queued #1` / `Queued #2` / … blocks
|
|
36
|
+
* with embedded JSON metadata + the actual user text. This module unpacks
|
|
37
|
+
* those batches into separate logical messages.
|
|
38
|
+
*
|
|
39
|
+
* Public API:
|
|
40
|
+
* • CHANNELS — array of known-channel definitions
|
|
41
|
+
* • findChannelDef(name) — look up by canonical or alias name
|
|
42
|
+
* • getOrAutoRegister(rawChannel) — return def or synthesize generic
|
|
43
|
+
* • detectChannel(text, fallback?) — text-pattern detection
|
|
44
|
+
* • deriveOpenclawConvId(channel, senderInfo, fileUuid8)
|
|
45
|
+
* — conv_id (uses CHANNELS + auto-disc)
|
|
46
|
+
* • titlePrefixFor(channel) — "[Telegram]" / "[Kimi-web]" / etc.
|
|
47
|
+
* • parseBatchedTelegram / parseSingleTelegram / parseTelegramTimestamp
|
|
48
|
+
* • stripKimiHeader(text) — drop "User Message From Kimi:\n…"
|
|
49
|
+
* • loadSessionsJsonChannelMap(path) — file → channel from sessions.json
|
|
50
|
+
* • findSessionsJson(sessionFilePath)— walk up to discover sessions.json
|
|
51
|
+
* • baseUuid8(fileName) — extract 8-char base UUID
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
import { readFileSync, existsSync, openSync, readSync, closeSync } from 'node:fs';
|
|
55
|
+
import { join, dirname, basename } from 'node:path';
|
|
56
|
+
|
|
57
|
+
// ----- Channel registry -----
|
|
58
|
+
//
|
|
59
|
+
// Each entry describes one known channel. Adding a new well-known
|
|
60
|
+
// channel (e.g. "slack" with its own batched format) = one block here +
|
|
61
|
+
// optional spec-parser. Unknown channels surfaced via sessions.json get
|
|
62
|
+
// auto-registered at runtime in `getOrAutoRegister` (see below) — no
|
|
63
|
+
// changes needed for self-hosted custom channel names.
|
|
64
|
+
//
|
|
65
|
+
// Fields:
|
|
66
|
+
// name — canonical identifier stored in `messages.channel`
|
|
67
|
+
// sessionsJsonAlias — strings that sessions.json may use for the same
|
|
68
|
+
// channel (e.g. OpenClaw writes "kimi-claw" while
|
|
69
|
+
// we store "kimi-web" for readability)
|
|
70
|
+
// detect(head) — text-pattern detector; returns true if the first
|
|
71
|
+
// 512 chars of a message look like this channel.
|
|
72
|
+
// Used PER-MESSAGE because checkpoint files mix
|
|
73
|
+
// channels even when sessions.json reports a
|
|
74
|
+
// single channel.
|
|
75
|
+
// parseBatch(text) — optional batched-message parser (Telegram only
|
|
76
|
+
// today). Used when the channel can pack multiple
|
|
77
|
+
// user messages into one OpenClaw record.
|
|
78
|
+
// parseSingle(text) — optional single-message metadata extractor
|
|
79
|
+
// stripHeader(text) — optional preamble-removal for cleaner storage
|
|
80
|
+
// (e.g. drop "User Message From Kimi:\n[Time:…]\n")
|
|
81
|
+
// convIdFor(info, ctx)
|
|
82
|
+
// — function returning the conv_id for a record on
|
|
83
|
+
// this channel. `info` may contain sender_id /
|
|
84
|
+
// accountId / etc. `ctx.fileUuid8` is the 8-char
|
|
85
|
+
// base session UUID. Returns null if not routable;
|
|
86
|
+
// caller then falls back to "openclaw-<file8>".
|
|
87
|
+
// titlePrefix — "[Telegram]" / "[Kimi-web]" — prepended to conv
|
|
88
|
+
// titles so the UI shows channel at a glance.
|
|
89
|
+
export const CHANNELS = [
|
|
90
|
+
{
|
|
91
|
+
name: 'telegram',
|
|
92
|
+
sessionsJsonAlias: ['telegram'],
|
|
93
|
+
detect: (head) =>
|
|
94
|
+
/^\[Queued messages while agent was busy\]/i.test(head) ||
|
|
95
|
+
(/Conversation info \(untrusted metadata\):/i.test(head) && /"sender_id"/.test(head)),
|
|
96
|
+
parseBatch: parseBatchedTelegram,
|
|
97
|
+
parseSingle: parseSingleTelegram,
|
|
98
|
+
convIdFor: (info, _ctx) =>
|
|
99
|
+
info?.sender_id ? `openclaw-tg-${info.sender_id}` : null,
|
|
100
|
+
titlePrefix: '[Telegram]',
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
name: 'kimi-web',
|
|
104
|
+
sessionsJsonAlias: ['kimi-claw', 'kimi-web'],
|
|
105
|
+
detect: (head) => /^User Message From Kimi:/i.test(head),
|
|
106
|
+
stripHeader: (text) => stripKimiHeader(text),
|
|
107
|
+
convIdFor: (_info, ctx) => `openclaw-kimi-${ctx.fileUuid8}`,
|
|
108
|
+
titlePrefix: '[Kimi-web]',
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
name: 'system',
|
|
112
|
+
sessionsJsonAlias: ['system'],
|
|
113
|
+
detect: (head) => /^System: /i.test(head),
|
|
114
|
+
convIdFor: (_info, ctx) => `openclaw-sys-${ctx.fileUuid8}`,
|
|
115
|
+
titlePrefix: '[System]',
|
|
116
|
+
},
|
|
117
|
+
];
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Look up a channel definition by canonical name OR sessions.json alias.
|
|
121
|
+
* Returns null if not found.
|
|
122
|
+
*/
|
|
123
|
+
export function findChannelDef(name) {
|
|
124
|
+
if (!name) return null;
|
|
125
|
+
return CHANNELS.find(
|
|
126
|
+
(c) => c.name === name || (c.sessionsJsonAlias || []).includes(name),
|
|
127
|
+
) || null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Auto-discovery for self-hosted OpenClaw setups.
|
|
132
|
+
*
|
|
133
|
+
* If sessions.json reports `deliveryContext.channel: "discord"` (or
|
|
134
|
+
* any other name we don't know), build a generic channel def on the
|
|
135
|
+
* fly: per-account routing if `accountId` is available, else per-file.
|
|
136
|
+
* Title prefix is just `[<name>]`. This means custom self-hosted
|
|
137
|
+
* channels work out of the box — no memex code changes required.
|
|
138
|
+
*
|
|
139
|
+
* Returns null if `rawChannelName` is empty or invalid.
|
|
140
|
+
*/
|
|
141
|
+
export function getOrAutoRegister(rawChannelName) {
|
|
142
|
+
const known = findChannelDef(rawChannelName);
|
|
143
|
+
if (known) return known;
|
|
144
|
+
if (!rawChannelName || typeof rawChannelName !== 'string') return null;
|
|
145
|
+
// Sanitise: only [a-z0-9_-] in the channel name (no spaces, no slashes —
|
|
146
|
+
// these become part of conv_id and table values).
|
|
147
|
+
const safe = rawChannelName.replace(/[^a-z0-9_-]/gi, '').toLowerCase();
|
|
148
|
+
if (!safe) return null;
|
|
149
|
+
return {
|
|
150
|
+
name: safe,
|
|
151
|
+
detect: () => false, // no text-pattern detector — relies on sessions.json
|
|
152
|
+
convIdFor: (info, ctx) =>
|
|
153
|
+
info?.accountId
|
|
154
|
+
? `openclaw-${safe}-${info.accountId}`
|
|
155
|
+
: info?.sender_id
|
|
156
|
+
? `openclaw-${safe}-${info.sender_id}`
|
|
157
|
+
: `openclaw-${safe}-${ctx.fileUuid8}`,
|
|
158
|
+
titlePrefix: `[${safe}]`,
|
|
159
|
+
isAutoRegistered: true,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ----- Channel detection -----
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Text-pattern channel detection. Iterates CHANNELS in order and
|
|
167
|
+
* returns the first match. Returns `fallback` if nothing matches.
|
|
168
|
+
*
|
|
169
|
+
* `fallback` is typically the file-level channel from sessions.json
|
|
170
|
+
* (so a single-channel session whose user messages don't carry a
|
|
171
|
+
* preamble — e.g. Kimi short replies "тут?" — still routes correctly).
|
|
172
|
+
*/
|
|
173
|
+
export function detectChannel(text, fallback = null) {
|
|
174
|
+
if (typeof text !== 'string' || !text) return fallback;
|
|
175
|
+
const head = text.slice(0, 512);
|
|
176
|
+
for (const ch of CHANNELS) {
|
|
177
|
+
if (ch.detect && ch.detect(head)) return ch.name;
|
|
178
|
+
}
|
|
179
|
+
return fallback;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ----- Telegram batch + metadata parsing -----
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Parse a "[Queued messages while agent was busy]" batched-TG record into
|
|
186
|
+
* an array of individual messages. Each item:
|
|
187
|
+
* { text, message_id, sender_id, sender, username, reply_to_id, ts }
|
|
188
|
+
*
|
|
189
|
+
* `ts` is unix-seconds (parsed from the Telegram-formatted "Fri 2026-05-01
|
|
190
|
+
* 20:03 GMT+8" string in the metadata block).
|
|
191
|
+
*
|
|
192
|
+
* Robust against:
|
|
193
|
+
* • A single batch (one `Queued #1` block) or many
|
|
194
|
+
* • Missing Sender metadata block (rare, falls back to Conversation info)
|
|
195
|
+
* • User text containing arbitrary Markdown including its own ```fenced
|
|
196
|
+
* blocks — we anchor on the EXACTLY-formatted "Conversation info" /
|
|
197
|
+
* "Sender (untrusted metadata)" labels rather than ``` itself.
|
|
198
|
+
*/
|
|
199
|
+
export function parseBatchedTelegram(text) {
|
|
200
|
+
if (typeof text !== 'string' || !text) return [];
|
|
201
|
+
|
|
202
|
+
// Split on "\n---\nQueued #<N>\n" — this is the daemon-emitted separator.
|
|
203
|
+
// The first chunk before the first separator is the "[Queued messages …]"
|
|
204
|
+
// header, which we discard.
|
|
205
|
+
const parts = text.split(/\n---\nQueued #\d+\n/);
|
|
206
|
+
if (parts.length < 2) {
|
|
207
|
+
// No batch separator found — maybe just one TG message wrapped without
|
|
208
|
+
// the "Queued #1" header? Try single-message parsing.
|
|
209
|
+
const single = parseSingleTelegram(text);
|
|
210
|
+
return single ? [single] : [];
|
|
211
|
+
}
|
|
212
|
+
// Drop the [Queued messages while agent was busy] header (parts[0]).
|
|
213
|
+
return parts.slice(1).map(parseSingleTelegram).filter(Boolean);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Parse a single Telegram-formatted message block (with or without
|
|
218
|
+
* `Queued #N` header already stripped).
|
|
219
|
+
*
|
|
220
|
+
* Looks for two ```json blocks in this order:
|
|
221
|
+
* Conversation info (untrusted metadata) — message_id, sender_id,
|
|
222
|
+
* sender, reply_to_id, timestamp
|
|
223
|
+
* Sender (untrusted metadata) — id, name, username, label
|
|
224
|
+
*
|
|
225
|
+
* Returns the actual user text (everything after the last metadata block)
|
|
226
|
+
* along with the extracted fields. Returns null if no Telegram metadata
|
|
227
|
+
* found.
|
|
228
|
+
*/
|
|
229
|
+
export function parseSingleTelegram(block) {
|
|
230
|
+
if (typeof block !== 'string' || !block) return null;
|
|
231
|
+
|
|
232
|
+
const convInfo = extractJsonBlock(block, 'Conversation info');
|
|
233
|
+
const senderInfo = extractJsonBlock(block, 'Sender');
|
|
234
|
+
|
|
235
|
+
if (!convInfo && !senderInfo) return null;
|
|
236
|
+
|
|
237
|
+
// The user's actual text starts AFTER the closing ``` of the LAST metadata
|
|
238
|
+
// block. For each label we must skip past the OPENING ```json fence before
|
|
239
|
+
// looking for the closing ``` — otherwise indexOf('\n```', labelIdx) lands
|
|
240
|
+
// on the opening fence itself.
|
|
241
|
+
let textStart = 0;
|
|
242
|
+
for (const label of ['Conversation info', 'Sender']) {
|
|
243
|
+
const labelIdx = block.indexOf(`${label} (untrusted metadata)`);
|
|
244
|
+
if (labelIdx < 0) continue;
|
|
245
|
+
const openFence = block.indexOf('```json', labelIdx);
|
|
246
|
+
if (openFence < 0) continue;
|
|
247
|
+
const closingIdx = block.indexOf('\n```', openFence + '```json'.length);
|
|
248
|
+
if (closingIdx < 0) continue;
|
|
249
|
+
const blockEnd = closingIdx + 4; // past '\n```'
|
|
250
|
+
if (blockEnd > textStart) textStart = blockEnd;
|
|
251
|
+
}
|
|
252
|
+
const userText = block.slice(textStart).replace(/^[\s\n]+/, '').trimEnd();
|
|
253
|
+
|
|
254
|
+
return {
|
|
255
|
+
text: userText || '',
|
|
256
|
+
message_id: convInfo?.message_id ?? null,
|
|
257
|
+
sender_id: convInfo?.sender_id ?? senderInfo?.id ?? null,
|
|
258
|
+
sender: convInfo?.sender ?? senderInfo?.name ?? null,
|
|
259
|
+
username: senderInfo?.username ?? null,
|
|
260
|
+
reply_to_id: convInfo?.reply_to_id ?? null,
|
|
261
|
+
ts: parseTelegramTimestamp(convInfo?.timestamp),
|
|
262
|
+
raw_timestamp: convInfo?.timestamp ?? null,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Extract the JSON object from a Markdown "<label> (untrusted metadata):"
|
|
268
|
+
* + ```json … ``` block. Returns the parsed object or null.
|
|
269
|
+
*/
|
|
270
|
+
function extractJsonBlock(text, label) {
|
|
271
|
+
const labelIdx = text.indexOf(`${label} (untrusted metadata)`);
|
|
272
|
+
if (labelIdx < 0) return null;
|
|
273
|
+
const openFence = text.indexOf('```json', labelIdx);
|
|
274
|
+
if (openFence < 0) return null;
|
|
275
|
+
const jsonStart = openFence + '```json'.length;
|
|
276
|
+
// Skip whitespace/newline after ```json
|
|
277
|
+
let cursor = jsonStart;
|
|
278
|
+
while (cursor < text.length && /[\s\n]/.test(text[cursor])) cursor++;
|
|
279
|
+
const closeFence = text.indexOf('\n```', cursor);
|
|
280
|
+
if (closeFence < 0) return null;
|
|
281
|
+
const jsonStr = text.slice(cursor, closeFence).trim();
|
|
282
|
+
try { return JSON.parse(jsonStr); }
|
|
283
|
+
catch (_) { return null; }
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Convert Telegram's text-format timestamp to unix-seconds.
|
|
288
|
+
*
|
|
289
|
+
* Input examples: "Fri 2026-05-01 20:03 GMT+8"
|
|
290
|
+
* "Wed 2026-05-20 02:24 GMT+8"
|
|
291
|
+
* Output: unix epoch seconds (UTC)
|
|
292
|
+
*
|
|
293
|
+
* Returns 0 if unparseable.
|
|
294
|
+
*/
|
|
295
|
+
export function parseTelegramTimestamp(s) {
|
|
296
|
+
if (typeof s !== 'string' || !s) return 0;
|
|
297
|
+
const m = s.match(/(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2})\s+GMT([+-]\d+)/);
|
|
298
|
+
if (!m) return 0;
|
|
299
|
+
const [, y, mo, d, h, mi, tz] = m;
|
|
300
|
+
// GMT+8 → -8 hours from local to get UTC
|
|
301
|
+
const offsetHours = parseInt(tz, 10);
|
|
302
|
+
const localDate = Date.UTC(+y, +mo - 1, +d, +h, +mi);
|
|
303
|
+
return Math.floor((localDate - offsetHours * 3600_000) / 1000);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// ----- Kimi-web utilities -----
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Strip the canonical Kimi preamble from text. Used at ingest time so the
|
|
310
|
+
* stored text is the clean user content, not the daemon's framing.
|
|
311
|
+
*
|
|
312
|
+
* "User Message From Kimi:\n[Time: [...]]\nMain text here" → "Main text here"
|
|
313
|
+
* "User Message From Kimi:\nMain text here" → "Main text here"
|
|
314
|
+
* ↑ Time block
|
|
315
|
+
* is OPTIONAL
|
|
316
|
+
*
|
|
317
|
+
* v0.11.1 fix: production OpenClaw omits the `[Time: …]` block for
|
|
318
|
+
* short messages. The v0.11.0 regex required it and silently failed
|
|
319
|
+
* for everything else. The new regex makes the Time line optional.
|
|
320
|
+
*/
|
|
321
|
+
export function stripKimiHeader(text) {
|
|
322
|
+
if (typeof text !== 'string' || !text) return text;
|
|
323
|
+
return text
|
|
324
|
+
.replace(/^User Message From Kimi:\s*\n(?:\[Time:[^\n]*\]\s*\n)?/i, '')
|
|
325
|
+
.trim();
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// ----- sessions.json reader -----
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Read OpenClaw's sessions.json registry and return a Map keyed by
|
|
332
|
+
* sessionFile absolute path → channel name (e.g. "telegram", "kimi-claw").
|
|
333
|
+
*
|
|
334
|
+
* The schema (per OpenClaw maintainer):
|
|
335
|
+
* {
|
|
336
|
+
* "agent:main:main": {
|
|
337
|
+
* "deliveryContext": { "channel": "kimi-claw", ... },
|
|
338
|
+
* "sessionFile": "/root/.openclaw/agents/main/sessions/<uuid>.jsonl",
|
|
339
|
+
* ...
|
|
340
|
+
* },
|
|
341
|
+
* "agent:main:subagent:<uuid>": {
|
|
342
|
+
* "deliveryContext": { "channel": "telegram", ... },
|
|
343
|
+
* "sessionFile": "...",
|
|
344
|
+
* ...
|
|
345
|
+
* }
|
|
346
|
+
* }
|
|
347
|
+
*
|
|
348
|
+
* `sessionsJsonPath` may be passed explicitly OR auto-discovered from a
|
|
349
|
+
* sessionFile path (we walk up to find a sibling `sessions.json`).
|
|
350
|
+
* Returns an empty Map if not found / unreadable.
|
|
351
|
+
*/
|
|
352
|
+
export function loadSessionsJsonChannelMap(sessionsJsonPath) {
|
|
353
|
+
const map = new Map();
|
|
354
|
+
if (!sessionsJsonPath || !existsSync(sessionsJsonPath)) return map;
|
|
355
|
+
let raw;
|
|
356
|
+
try { raw = JSON.parse(readFileSync(sessionsJsonPath, 'utf-8')); }
|
|
357
|
+
catch (_) { return map; }
|
|
358
|
+
for (const key of Object.keys(raw || {})) {
|
|
359
|
+
const entry = raw[key];
|
|
360
|
+
const ch = entry?.deliveryContext?.channel || entry?.lastChannel || entry?.channel;
|
|
361
|
+
const file = entry?.sessionFile;
|
|
362
|
+
if (ch && file) {
|
|
363
|
+
// Normalise OpenClaw's channel names to our enum
|
|
364
|
+
const normalized =
|
|
365
|
+
ch === 'kimi-claw' ? 'kimi-web'
|
|
366
|
+
: ch; // 'telegram' stays, others pass through
|
|
367
|
+
// v0.11.2: index by THREE keys so archive-staged files (whose
|
|
368
|
+
// names differ from the original sessionFile path) still match:
|
|
369
|
+
// 1. Full absolute path (as written in sessions.json)
|
|
370
|
+
// 2. Basename (e.g. "3824f87a-ea6e-4e08-…-c596288bcfe3.jsonl")
|
|
371
|
+
// 3. uuid8:<base-uuid> (extracted via baseUuid8 — survives renaming
|
|
372
|
+
// into inbox-staged "openclaw-3824f87a.jsonl")
|
|
373
|
+
map.set(file, normalized);
|
|
374
|
+
const bn = basename(file);
|
|
375
|
+
if (bn && bn !== file) map.set(bn, normalized);
|
|
376
|
+
const uid = baseUuid8(bn);
|
|
377
|
+
if (uid && uid.length === 8) map.set(`uuid8:${uid}`, normalized);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
return map;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Multi-key channel lookup. Tries (in order):
|
|
385
|
+
* 1. Exact file path — for live ingest from ~/.openclaw/...
|
|
386
|
+
* 2. Basename — for partial-path scenarios
|
|
387
|
+
* 3. uuid8:<8 hex> — for archive-staged or inbox-renamed files
|
|
388
|
+
* Returns the channel string (e.g. "telegram" / "kimi-web") or null.
|
|
389
|
+
*
|
|
390
|
+
* The v0.11.2 fix that turns file-level channel detection from "matches
|
|
391
|
+
* 1 of 59 self-hosted sessions" into "matches all 59".
|
|
392
|
+
*/
|
|
393
|
+
export function lookupChannel(channelMap, filePath) {
|
|
394
|
+
if (!channelMap || !filePath) return null;
|
|
395
|
+
let v = channelMap.get(filePath);
|
|
396
|
+
if (v) return v;
|
|
397
|
+
const bn = basename(filePath);
|
|
398
|
+
v = channelMap.get(bn);
|
|
399
|
+
if (v) return v;
|
|
400
|
+
const uid = baseUuid8(bn);
|
|
401
|
+
if (uid) v = channelMap.get(`uuid8:${uid}`);
|
|
402
|
+
return v || null;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Detect what kind of OpenClaw deployment a session file belongs to.
|
|
407
|
+
* Two modes, decided by sessions.json contents:
|
|
408
|
+
*
|
|
409
|
+
* 'kimi-claw' — Moonshot's hosted Kimi-Claw service. ONE merged
|
|
410
|
+
* session file packs multiple channels (Kimi-web user
|
|
411
|
+
* chats + Telegram batched messages received while
|
|
412
|
+
* busy). Per-message text-pattern detection critical;
|
|
413
|
+
* .checkpoint files contain NEW Telegram messages
|
|
414
|
+
* that arrived during checkpoint window so we ingest
|
|
415
|
+
* them.
|
|
416
|
+
*
|
|
417
|
+
* 'self-hosted' — Standard OpenClaw on user's VPS/box (90% of
|
|
418
|
+
* installs per maintainer feedback, 2026-05). Each
|
|
419
|
+
* session = one separate <uuid>.jsonl with a SINGLE
|
|
420
|
+
* channel for its entire lifetime. .checkpoint files
|
|
421
|
+
* are pure snapshots — duplicating their content
|
|
422
|
+
* would explode the DB 30-40× without adding info.
|
|
423
|
+
*
|
|
424
|
+
* 'unknown' — sessions.json missing or file not registered.
|
|
425
|
+
* Default behaviour (caller decides): we treat
|
|
426
|
+
* unknown as self-hosted (skip checkpoints) since
|
|
427
|
+
* self-hosted is the dominant case.
|
|
428
|
+
*
|
|
429
|
+
* The signal: if the file's channel resolves to 'kimi-web' (or
|
|
430
|
+
* 'kimi-claw' raw) via lookupChannel → it's a Kimi-Claw session. Any
|
|
431
|
+
* other channel value → self-hosted. No channel found → unknown.
|
|
432
|
+
*/
|
|
433
|
+
export function detectSessionType(filePath, channelMap) {
|
|
434
|
+
// 1. sessions.json lookup (fast, authoritative for active sessions)
|
|
435
|
+
const ch = lookupChannel(channelMap, filePath);
|
|
436
|
+
if (ch === 'kimi-web' || ch === 'kimi-claw') return 'kimi-claw';
|
|
437
|
+
if (ch) return 'self-hosted';
|
|
438
|
+
|
|
439
|
+
// 2. v0.11.3: content-based fallback. sessions.json only tracks
|
|
440
|
+
// CURRENT live sessions — archived/rotated session files have no
|
|
441
|
+
// entry, so the lookup fails for everything in ~/.memex/archive/
|
|
442
|
+
// older than the current main session. Scan the file head to find
|
|
443
|
+
// channel markers and classify by their COMBINATION.
|
|
444
|
+
//
|
|
445
|
+
// For checkpoint files, prefer to scan the corresponding MAIN file
|
|
446
|
+
// (same baseUuid8) — a checkpoint may contain only busy-arrived TG
|
|
447
|
+
// without Kimi markers, but the main session would be a Kimi-Claw
|
|
448
|
+
// merged file with both. Without this hop, every Kimi-Claw checkpoint
|
|
449
|
+
// looks self-hosted and gets skipped (causing data loss on backfill).
|
|
450
|
+
if (!filePath) return 'unknown';
|
|
451
|
+
let probeFile = filePath;
|
|
452
|
+
if (isCheckpointFile(basename(filePath))) {
|
|
453
|
+
const uid = baseUuid8(basename(filePath));
|
|
454
|
+
if (uid) {
|
|
455
|
+
// Try common main-file naming in the same dir
|
|
456
|
+
const dir = dirname(filePath);
|
|
457
|
+
const candidates = [
|
|
458
|
+
join(dir, `openclaw-${uid}.jsonl`), // inbox-staged
|
|
459
|
+
];
|
|
460
|
+
for (const c of candidates) {
|
|
461
|
+
if (existsSync(c)) { probeFile = c; break; }
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
return detectSessionTypeFromContent(probeFile);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Inspect the first ~64 KB of a session file and decide its type from
|
|
470
|
+
* the channel markers present:
|
|
471
|
+
*
|
|
472
|
+
* has Kimi marker AND a Telegram marker → 'kimi-claw' (merged file —
|
|
473
|
+
* the Moonshot signature)
|
|
474
|
+
* has any single marker → 'self-hosted' (one channel
|
|
475
|
+
* per file is the standard
|
|
476
|
+
* self-hosted layout)
|
|
477
|
+
* no markers at all → 'unknown'
|
|
478
|
+
*
|
|
479
|
+
* Used as the fallback when sessions.json doesn't know about the file
|
|
480
|
+
* (e.g. old archived sessions after main-session rotation).
|
|
481
|
+
*/
|
|
482
|
+
export function detectSessionTypeFromContent(filePath) {
|
|
483
|
+
if (!filePath || !existsSync(filePath)) return 'unknown';
|
|
484
|
+
let head = '';
|
|
485
|
+
try {
|
|
486
|
+
const fd = openSync(filePath, 'r');
|
|
487
|
+
const buf = Buffer.alloc(64 * 1024);
|
|
488
|
+
const n = readSync(fd, buf, 0, buf.length, 0);
|
|
489
|
+
closeSync(fd);
|
|
490
|
+
head = buf.toString('utf-8', 0, n);
|
|
491
|
+
} catch (_) {
|
|
492
|
+
return 'unknown';
|
|
493
|
+
}
|
|
494
|
+
if (!head) return 'unknown';
|
|
495
|
+
const hasKimi = /User Message From Kimi:/i.test(head);
|
|
496
|
+
const hasBatchedTg = /\[Queued messages while agent was busy\]/i.test(head);
|
|
497
|
+
const hasSingleTg = /Conversation info \(untrusted metadata\)/i.test(head);
|
|
498
|
+
if (hasKimi && (hasBatchedTg || hasSingleTg)) return 'kimi-claw';
|
|
499
|
+
if (hasKimi || hasBatchedTg || hasSingleTg) return 'self-hosted';
|
|
500
|
+
return 'unknown';
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Detect checkpoint file by name. Matches both source naming
|
|
505
|
+
* "<uuid>.checkpoint.<chkpt-uuid>.jsonl" (on-disk OpenClaw source)
|
|
506
|
+
* and inbox-staged naming
|
|
507
|
+
* "openclaw-<base8>-ckpt-<chkpt8>.jsonl" (renamed by memex-sync).
|
|
508
|
+
*/
|
|
509
|
+
export function isCheckpointFile(fileName) {
|
|
510
|
+
if (!fileName) return false;
|
|
511
|
+
const stem = fileName.replace(/\.jsonl$/i, '');
|
|
512
|
+
return /\.checkpoint\./i.test(stem) || /-ckpt-[0-9a-f]+$/i.test(stem);
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Discover sessions.json given the path of a JSONL session file.
|
|
517
|
+
*
|
|
518
|
+
* 1. Walk up from the file's directory looking for a sessions.json
|
|
519
|
+
* sibling (up to 5 levels). Works for live ingest from
|
|
520
|
+
* ~/.openclaw/agents/main/sessions/<uuid>.jsonl.
|
|
521
|
+
*
|
|
522
|
+
* 2. v0.11.2: when ingesting from ARCHIVE (~/.memex/archive/openclaw/),
|
|
523
|
+
* step 1 fails — there's no sessions.json next to archived files.
|
|
524
|
+
* Fall back to standard OpenClaw install locations:
|
|
525
|
+
* $HOME/.openclaw/agents/main/sessions/sessions.json
|
|
526
|
+
* So `backfill-channels` can still detect session type for
|
|
527
|
+
* archived files (which is critical for the Kimi-Claw vs
|
|
528
|
+
* self-hosted decision).
|
|
529
|
+
*
|
|
530
|
+
* Returns the absolute path or null if nothing found.
|
|
531
|
+
*/
|
|
532
|
+
export function findSessionsJson(sessionFilePath, options = {}) {
|
|
533
|
+
if (!sessionFilePath) return null;
|
|
534
|
+
// 1. Walk up
|
|
535
|
+
let dir = dirname(sessionFilePath);
|
|
536
|
+
for (let i = 0; i < 5; i++) {
|
|
537
|
+
const candidate = join(dir, 'sessions.json');
|
|
538
|
+
if (existsSync(candidate)) return candidate;
|
|
539
|
+
const parent = dirname(dir);
|
|
540
|
+
if (parent === dir) break;
|
|
541
|
+
dir = parent;
|
|
542
|
+
}
|
|
543
|
+
// 2. Fall back to standard OpenClaw locations
|
|
544
|
+
const home = options.home || process.env.HOME || '';
|
|
545
|
+
if (home) {
|
|
546
|
+
const standard = [
|
|
547
|
+
join(home, '.openclaw', 'agents', 'main', 'sessions', 'sessions.json'),
|
|
548
|
+
];
|
|
549
|
+
for (const p of standard) {
|
|
550
|
+
if (existsSync(p)) return p;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
return null;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// ----- conversation_id derivation -----
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Pick the right conversation_id for an OpenClaw message given its
|
|
560
|
+
* detected channel and any extracted sender info.
|
|
561
|
+
*
|
|
562
|
+
* v0.11.1: routes through the CHANNELS registry and falls back to
|
|
563
|
+
* `getOrAutoRegister` for unknown self-hosted channels. Backward-
|
|
564
|
+
* compatible with v0.11.0 calling convention — the second arg may be
|
|
565
|
+
* either a string `sender_id` or an object `{sender_id, accountId, …}`.
|
|
566
|
+
*
|
|
567
|
+
* Examples (built-in channels):
|
|
568
|
+
* telegram + sender_id → "openclaw-tg-<sender_id>"
|
|
569
|
+
* kimi-web → "openclaw-kimi-<fileUuid8>"
|
|
570
|
+
* system → "openclaw-sys-<fileUuid8>"
|
|
571
|
+
*
|
|
572
|
+
* Auto-discovered (self-hosted; e.g. channel="discord"):
|
|
573
|
+
* discord + accountId → "openclaw-discord-<accountId>"
|
|
574
|
+
* discord (no info) → "openclaw-discord-<fileUuid8>"
|
|
575
|
+
*
|
|
576
|
+
* Fallback:
|
|
577
|
+
* anything else / null → "openclaw-<fileUuid8>"
|
|
578
|
+
*
|
|
579
|
+
* `fileUuid8` is the first 8 hex chars of the SOURCE FILE uuid (NOT the
|
|
580
|
+
* inbox-staged name) — this stays stable across checkpoints since they
|
|
581
|
+
* derive from the same base.
|
|
582
|
+
*/
|
|
583
|
+
export function deriveOpenclawConvId(channel, senderInfoOrId, fileUuid8) {
|
|
584
|
+
if (!channel) return `openclaw-${fileUuid8}`;
|
|
585
|
+
const def = findChannelDef(channel) || getOrAutoRegister(channel);
|
|
586
|
+
if (!def) return `openclaw-${fileUuid8}`;
|
|
587
|
+
const info =
|
|
588
|
+
senderInfoOrId == null
|
|
589
|
+
? {}
|
|
590
|
+
: typeof senderInfoOrId === 'string'
|
|
591
|
+
? { sender_id: senderInfoOrId }
|
|
592
|
+
: senderInfoOrId;
|
|
593
|
+
return def.convIdFor(info, { fileUuid8 }) || `openclaw-${fileUuid8}`;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Title prefix for a channel — "[Telegram]" / "[Kimi-web]" / "[discord]".
|
|
598
|
+
* Returns "" when channel is null or has no known/auto-registered def.
|
|
599
|
+
*/
|
|
600
|
+
export function titlePrefixFor(channel) {
|
|
601
|
+
if (!channel) return '';
|
|
602
|
+
const def = findChannelDef(channel) || getOrAutoRegister(channel);
|
|
603
|
+
return def?.titlePrefix || '';
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Extract the base session uuid8 from an OpenClaw inbox/source filename:
|
|
608
|
+
* "openclaw-3824f87a.jsonl" → "3824f87a"
|
|
609
|
+
* "openclaw-3824f87a-ckpt-e6c37ac7.jsonl" → "3824f87a"
|
|
610
|
+
* "openclaw-3824f87a-reset-deadbeef.jsonl" → "3824f87a" (v0.11.4)
|
|
611
|
+
* "3824f87a-ea6e-4e08-…-c596288bcfe3.jsonl" → "3824f87a"
|
|
612
|
+
* "3824f87a.checkpoint.e6c37ac7.jsonl" → "3824f87a"
|
|
613
|
+
* "3824f87a.reset.deadbeef.jsonl" → "3824f87a" (v0.11.4)
|
|
614
|
+
*/
|
|
615
|
+
export function baseUuid8(fileName) {
|
|
616
|
+
const stem = fileName.replace(/\.jsonl$/, '');
|
|
617
|
+
// OpenClaw inbox-staged forms
|
|
618
|
+
let m = stem.match(/^openclaw-([0-9a-f]{8})(?:-(?:ckpt|reset)-[0-9a-f]{8})?$/i);
|
|
619
|
+
if (m) return m[1];
|
|
620
|
+
// Source-file form
|
|
621
|
+
m = stem.match(/^([0-9a-f]{8})/i);
|
|
622
|
+
if (m) return m[1];
|
|
623
|
+
// Fallback: first 8 chars (alphanumeric).
|
|
624
|
+
return stem.replace(/[^0-9a-z]/gi, '').slice(0, 8);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* Detect reset file by name. Matches both source naming
|
|
629
|
+
* "<uuid>.reset.<reset-uuid>.jsonl"
|
|
630
|
+
* and inbox-staged naming
|
|
631
|
+
* "openclaw-<base8>-reset-<reset8>.jsonl"
|
|
632
|
+
*
|
|
633
|
+
* Reset files are FULL archives of a session at the moment of reset —
|
|
634
|
+
* NOT snapshots like checkpoints. They contain unique conversation
|
|
635
|
+
* history that exists nowhere else once the main session is gone.
|
|
636
|
+
* Always ingested regardless of session-type detection (kimi-claw or
|
|
637
|
+
* self-hosted).
|
|
638
|
+
*/
|
|
639
|
+
export function isResetFile(fileName) {
|
|
640
|
+
if (!fileName) return false;
|
|
641
|
+
const stem = fileName.replace(/\.jsonl$/i, '');
|
|
642
|
+
return /\.reset\./i.test(stem) || /-reset-[0-9a-f]+$/i.test(stem);
|
|
643
|
+
}
|