@cecwxf/wtt 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,420 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { promises as fs } from "node:fs";
4
+ import { createWriteStream } from "node:fs";
5
+ import os from "node:os";
6
+ import path from "node:path";
7
+ import { randomBytes } from "node:crypto";
8
+
9
+ const DEFAULT_MAX_BYTES = 15 * 1024 * 1024;
10
+ const DEFAULT_TIMEOUT_MS = 20_000;
11
+
12
+ function parseArgs(argv) {
13
+ const args = {
14
+ home: process.env.OPENCLAW_HOME?.trim() || path.join(os.homedir(), ".openclaw"),
15
+ dir: "",
16
+ download: true,
17
+ dryRun: false,
18
+ maxBytes: DEFAULT_MAX_BYTES,
19
+ timeoutMs: DEFAULT_TIMEOUT_MS,
20
+ limit: 0,
21
+ topic: "",
22
+ verbose: false,
23
+ };
24
+
25
+ for (let i = 0; i < argv.length; i += 1) {
26
+ const token = argv[i];
27
+ const next = argv[i + 1];
28
+
29
+ if (token === "--help" || token === "-h") {
30
+ args.help = true;
31
+ continue;
32
+ }
33
+ if (token === "--dry-run") {
34
+ args.dryRun = true;
35
+ continue;
36
+ }
37
+ if (token === "--no-download") {
38
+ args.download = false;
39
+ continue;
40
+ }
41
+ if (token === "--verbose") {
42
+ args.verbose = true;
43
+ continue;
44
+ }
45
+ if (token === "--home" && next) {
46
+ args.home = next;
47
+ i += 1;
48
+ continue;
49
+ }
50
+ if (token === "--dir" && next) {
51
+ args.dir = next;
52
+ i += 1;
53
+ continue;
54
+ }
55
+ if (token === "--topic" && next) {
56
+ args.topic = next;
57
+ i += 1;
58
+ continue;
59
+ }
60
+ if (token === "--limit" && next) {
61
+ args.limit = Number(next) || 0;
62
+ i += 1;
63
+ continue;
64
+ }
65
+ if (token === "--max-bytes" && next) {
66
+ args.maxBytes = Number(next) || DEFAULT_MAX_BYTES;
67
+ i += 1;
68
+ continue;
69
+ }
70
+ if (token === "--timeout-ms" && next) {
71
+ args.timeoutMs = Number(next) || DEFAULT_TIMEOUT_MS;
72
+ i += 1;
73
+ continue;
74
+ }
75
+ }
76
+
77
+ args.home = path.resolve(args.home);
78
+ args.dir = args.dir ? path.resolve(args.dir) : path.join(args.home, "topic-memory");
79
+ return args;
80
+ }
81
+
82
+ function printHelp() {
83
+ console.log(`Usage: openclaw-wtt-topic-memory-backfill [options]\n\nOptions:\n --home <path> OpenClaw home (default: ~/.openclaw)\n --dir <path> Topic-memory dir (default: <home>/topic-memory)\n --topic <topicId> Backfill only one topic file\n --limit <n> Process at most n files\n --dry-run Preview without writing files\n --no-download Don't download media to local paths\n --max-bytes <n> Max bytes per media file (default: 15728640)\n --timeout-ms <n> Per-media timeout in ms (default: 20000)\n --verbose Print per-file details\n -h, --help Show help\n`);
84
+ }
85
+
86
+ function compactDiscussionContent(raw) {
87
+ const source = String(raw || "");
88
+ return source
89
+ .replace(/┌─\s*来源标识[\s\S]*?└[^\n]*\n?/g, "")
90
+ .replace(/\[回复上下文\][\s\S]*?(?:---|$)/g, "")
91
+ .replace(/<br\s*\/?>/gi, "\n")
92
+ .replace(/<\/p>/gi, "\n")
93
+ .replace(/<[^>]+>/g, "")
94
+ .replace(/!\[[^\]]*\]\(([^)]+)\)/g, "[image:$1]")
95
+ .replace(/\n{3,}/g, "\n\n")
96
+ .trim();
97
+ }
98
+
99
+ function detectMediaUrls(rawText) {
100
+ const text = String(rawText || "");
101
+ const out = new Set();
102
+
103
+ const imageTagRe = /\[image:([^\]]+)\]/gi;
104
+ let m;
105
+ while ((m = imageTagRe.exec(text)) !== null) {
106
+ const u = String(m[1] || "").trim();
107
+ if (u) out.add(u);
108
+ }
109
+
110
+ const mdImgRe = /!\[[^\]]*\]\(([^)]+)\)/gi;
111
+ while ((m = mdImgRe.exec(text)) !== null) {
112
+ const u = String(m[1] || "").trim();
113
+ if (u) out.add(u);
114
+ }
115
+
116
+ const urlRe = /https?:\/\/[^\s)\]]+/gi;
117
+ while ((m = urlRe.exec(text)) !== null) {
118
+ const u = String(m[0] || "").replace(/[),.;]+$/, "").trim();
119
+ if (/\.(?:png|jpe?g|gif|webp|bmp|svg|heic|heif)(?:\?|$)/i.test(u) || /\/media\//i.test(u)) {
120
+ out.add(u);
121
+ }
122
+ }
123
+
124
+ return Array.from(out);
125
+ }
126
+
127
+ function parseTopicFile(raw) {
128
+ const lines = String(raw || "").split("\n");
129
+ const topicNameLine = lines.find((line) => /^topic_name:\s*/.test(line));
130
+ const topicName = topicNameLine ? topicNameLine.replace(/^topic_name:\s*/, "").trim() : "";
131
+
132
+ const entryRe = /^- \[([^\]]*)\]\s+([\w]+):(\S+?)(?:\(([^)]*)\))?\s+id=(\S+?)(?:\s+reply_to=(\S+))?(?:\s+.*)?$/;
133
+
134
+ const messages = [];
135
+ let i = 0;
136
+ while (i < lines.length) {
137
+ const match = lines[i].match(entryRe);
138
+ if (!match) {
139
+ i += 1;
140
+ continue;
141
+ }
142
+
143
+ const [, createdAt, senderType, senderId, displayName, id, replyTo] = match;
144
+ i += 1;
145
+
146
+ const bodyLines = [];
147
+ while (i < lines.length && !entryRe.test(lines[i])) {
148
+ if (lines[i].startsWith(" ")) bodyLines.push(lines[i].slice(2));
149
+ i += 1;
150
+ }
151
+
152
+ let text = "";
153
+ const mediaPaths = [];
154
+ const mediaUrls = [];
155
+ let replyExcerpt = "";
156
+
157
+ for (const row of bodyLines) {
158
+ const line = row.trim();
159
+ if (!line) continue;
160
+
161
+ if (line.startsWith("text:")) {
162
+ text = line.slice("text:".length).trim();
163
+ continue;
164
+ }
165
+ if (line.startsWith("media_paths:")) {
166
+ const payload = line.slice("media_paths:".length).trim();
167
+ for (const part of payload.split("|").map((v) => v.trim()).filter(Boolean)) mediaPaths.push(part);
168
+ continue;
169
+ }
170
+ if (line.startsWith("media_urls:")) {
171
+ const payload = line.slice("media_urls:".length).trim();
172
+ for (const part of payload.split("|").map((v) => v.trim()).filter(Boolean)) mediaUrls.push(part);
173
+ continue;
174
+ }
175
+ if (line.startsWith("reply_excerpt:")) {
176
+ replyExcerpt = line.slice("reply_excerpt:".length).trim();
177
+ continue;
178
+ }
179
+
180
+ if (!text) text = line;
181
+ else text += ` ${line}`;
182
+ }
183
+
184
+ if (!mediaUrls.length && text) {
185
+ mediaUrls.push(...detectMediaUrls(text));
186
+ }
187
+
188
+ messages.push({
189
+ id,
190
+ senderId,
191
+ senderDisplayName: displayName || "",
192
+ senderType: senderType || "unknown",
193
+ createdAt: createdAt || "",
194
+ replyTo: replyTo || "",
195
+ text: compactDiscussionContent(text),
196
+ mediaPaths: Array.from(new Set(mediaPaths)),
197
+ mediaUrls: Array.from(new Set(mediaUrls)),
198
+ replyExcerpt: replyExcerpt || "",
199
+ });
200
+ }
201
+
202
+ return { topicName, messages };
203
+ }
204
+
205
+ function extensionFromContentType(contentType) {
206
+ const normalized = String(contentType || "").toLowerCase();
207
+ if (!normalized) return "";
208
+ if (normalized.includes("jpeg") || normalized.includes("jpg")) return ".jpg";
209
+ if (normalized.includes("png")) return ".png";
210
+ if (normalized.includes("gif")) return ".gif";
211
+ if (normalized.includes("webp")) return ".webp";
212
+ if (normalized.includes("bmp")) return ".bmp";
213
+ if (normalized.includes("svg")) return ".svg";
214
+ if (normalized.includes("heic")) return ".heic";
215
+ if (normalized.includes("heif")) return ".heif";
216
+ if (normalized.includes("mp4")) return ".mp4";
217
+ return "";
218
+ }
219
+
220
+ function extensionFromUrl(urlRaw) {
221
+ try {
222
+ const parsed = new URL(urlRaw);
223
+ const ext = path.extname(parsed.pathname || "").toLowerCase();
224
+ if (/^\.[a-z0-9]{1,8}$/.test(ext)) return ext;
225
+ return "";
226
+ } catch {
227
+ return "";
228
+ }
229
+ }
230
+
231
+ async function downloadOneMedia(url, inboundDir, opts) {
232
+ const controller = new AbortController();
233
+ const timer = setTimeout(() => controller.abort(), Math.max(1000, opts.timeoutMs));
234
+
235
+ try {
236
+ const resp = await fetch(url, {
237
+ method: "GET",
238
+ redirect: "follow",
239
+ signal: controller.signal,
240
+ headers: { Accept: "image/*,*/*;q=0.8" },
241
+ });
242
+
243
+ if (!resp.ok) {
244
+ throw new Error(`http_${resp.status}`);
245
+ }
246
+
247
+ const contentType = (resp.headers.get("content-type") || "").split(";")[0].trim().toLowerCase();
248
+ const ext = extensionFromContentType(contentType) || extensionFromUrl(url) || ".bin";
249
+
250
+ const fileName = `wtt-backfill-${Date.now()}-${randomBytes(6).toString("hex")}${ext}`;
251
+ const filePath = path.join(inboundDir, fileName);
252
+
253
+ await fs.mkdir(inboundDir, { recursive: true });
254
+
255
+ const stream = createWriteStream(filePath);
256
+ let total = 0;
257
+ for await (const chunk of resp.body) {
258
+ const buf = Buffer.from(chunk);
259
+ total += buf.length;
260
+ if (total > opts.maxBytes) {
261
+ stream.destroy();
262
+ await fs.rm(filePath, { force: true }).catch(() => {});
263
+ throw new Error(`media_too_large_${total}`);
264
+ }
265
+ stream.write(buf);
266
+ }
267
+ stream.end();
268
+
269
+ return filePath;
270
+ } finally {
271
+ clearTimeout(timer);
272
+ }
273
+ }
274
+
275
+ function renderTopicFile(topicId, topicName, messages) {
276
+ const lines = [];
277
+ lines.push(`# topic_id_${topicId}`);
278
+ if (topicName && topicName.trim()) lines.push(`topic_name: ${topicName.trim()}`);
279
+ lines.push(`updated_at: ${new Date().toISOString()}`);
280
+ lines.push("");
281
+
282
+ for (const msg of messages) {
283
+ const nameTag = msg.senderDisplayName ? `(${msg.senderDisplayName})` : "";
284
+ const who = `${msg.senderType || "unknown"}:${msg.senderId}${nameTag}`;
285
+ const mediaCount = (msg.mediaPaths?.length || 0) + (msg.mediaUrls?.length || 0);
286
+ const header = `- [${msg.createdAt || ""}] ${who} id=${msg.id}${msg.replyTo ? ` reply_to=${msg.replyTo}` : ""}${mediaCount > 0 ? ` media_count=${mediaCount}` : ""}`;
287
+ lines.push(header);
288
+ lines.push(` text: ${compactDiscussionContent(msg.text || "")}`);
289
+ if (msg.mediaPaths && msg.mediaPaths.length > 0) {
290
+ lines.push(` media_paths: ${msg.mediaPaths.join(" | ")}`);
291
+ }
292
+ if (msg.mediaUrls && msg.mediaUrls.length > 0) {
293
+ lines.push(` media_urls: ${msg.mediaUrls.join(" | ")}`);
294
+ }
295
+ if (msg.replyExcerpt) {
296
+ lines.push(` reply_excerpt: ${compactDiscussionContent(msg.replyExcerpt).slice(0, 220)}`);
297
+ }
298
+ }
299
+
300
+ return `${lines.join("\n")}\n`;
301
+ }
302
+
303
+ async function main() {
304
+ const args = parseArgs(process.argv.slice(2));
305
+ if (args.help) {
306
+ printHelp();
307
+ return;
308
+ }
309
+
310
+ const topicDir = args.dir;
311
+ const inboundDir = path.join(args.home, "media", "inbound");
312
+
313
+ let files;
314
+ try {
315
+ files = await fs.readdir(topicDir);
316
+ } catch (err) {
317
+ console.error(`[backfill] topic-memory dir not found: ${topicDir}`);
318
+ process.exitCode = 1;
319
+ return;
320
+ }
321
+
322
+ let targets = files
323
+ .filter((name) => /^topic_id_.+\.md$/.test(name))
324
+ .map((name) => path.join(topicDir, name))
325
+ .sort();
326
+
327
+ if (args.topic) {
328
+ const wanted = path.join(topicDir, `topic_id_${args.topic}.md`);
329
+ targets = targets.filter((p) => p === wanted);
330
+ }
331
+
332
+ if (args.limit > 0) {
333
+ targets = targets.slice(0, args.limit);
334
+ }
335
+
336
+ const downloadCache = new Map();
337
+ let scanned = 0;
338
+ let updated = 0;
339
+ let downloaded = 0;
340
+
341
+ for (const filePath of targets) {
342
+ scanned += 1;
343
+ const fileName = path.basename(filePath);
344
+ const topicId = fileName.replace(/^topic_id_/, "").replace(/\.md$/, "");
345
+
346
+ const raw = await fs.readFile(filePath, "utf8");
347
+ const parsed = parseTopicFile(raw);
348
+
349
+ if (!parsed.messages.length) {
350
+ if (args.verbose) console.log(`[backfill] skip empty ${fileName}`);
351
+ continue;
352
+ }
353
+
354
+ const messageById = new Map(parsed.messages.map((m) => [m.id, m]));
355
+
356
+ for (const msg of parsed.messages) {
357
+ if (msg.replyTo && !msg.replyExcerpt) {
358
+ const target = messageById.get(msg.replyTo);
359
+ if (target) {
360
+ msg.replyExcerpt = compactDiscussionContent(target.text || "").slice(0, 220);
361
+ }
362
+ }
363
+
364
+ if (!Array.isArray(msg.mediaUrls) || msg.mediaUrls.length === 0) {
365
+ msg.mediaUrls = detectMediaUrls(msg.text || "");
366
+ }
367
+
368
+ if (!args.download || !msg.mediaUrls.length) continue;
369
+
370
+ const nextPaths = Array.isArray(msg.mediaPaths) ? [...msg.mediaPaths] : [];
371
+ for (const url of msg.mediaUrls) {
372
+ if (!/^https?:\/\//i.test(url)) continue;
373
+ if (downloadCache.has(url)) {
374
+ const cached = downloadCache.get(url);
375
+ if (cached) nextPaths.push(cached);
376
+ continue;
377
+ }
378
+
379
+ try {
380
+ const localPath = await downloadOneMedia(url, inboundDir, {
381
+ maxBytes: args.maxBytes,
382
+ timeoutMs: args.timeoutMs,
383
+ });
384
+ downloaded += 1;
385
+ downloadCache.set(url, localPath);
386
+ nextPaths.push(localPath);
387
+ } catch (err) {
388
+ downloadCache.set(url, "");
389
+ if (args.verbose) {
390
+ const reason = err instanceof Error ? err.message : String(err);
391
+ console.log(`[backfill] media download failed topic=${topicId} url=${url} reason=${reason}`);
392
+ }
393
+ }
394
+ }
395
+
396
+ msg.mediaPaths = Array.from(new Set(nextPaths.filter(Boolean)));
397
+ }
398
+
399
+ const nextContent = renderTopicFile(topicId, parsed.topicName, parsed.messages);
400
+ if (nextContent !== raw) {
401
+ updated += 1;
402
+ if (!args.dryRun) {
403
+ await fs.writeFile(filePath, nextContent, "utf8");
404
+ }
405
+ if (args.verbose || args.dryRun) {
406
+ console.log(`[backfill] updated ${fileName}`);
407
+ }
408
+ } else if (args.verbose) {
409
+ console.log(`[backfill] unchanged ${fileName}`);
410
+ }
411
+ }
412
+
413
+ console.log(`[backfill] done scanned=${scanned} updated=${updated} downloaded=${downloaded} dry_run=${args.dryRun}`);
414
+ }
415
+
416
+ main().catch((err) => {
417
+ const msg = err instanceof Error ? err.stack || err.message : String(err);
418
+ console.error(`[backfill] fatal: ${msg}`);
419
+ process.exitCode = 1;
420
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cecwxf/wtt",
3
- "version": "0.1.13",
3
+ "version": "0.1.14",
4
4
  "description": "WTT channel plugin for OpenClaw — real-time Agent communication via Topics",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -13,7 +13,8 @@
13
13
  }
14
14
  },
15
15
  "bin": {
16
- "openclaw-wtt-bootstrap": "bin/openclaw-wtt-bootstrap.mjs"
16
+ "openclaw-wtt-bootstrap": "bin/openclaw-wtt-bootstrap.mjs",
17
+ "openclaw-wtt-topic-memory-backfill": "bin/openclaw-wtt-topic-memory-backfill.mjs"
17
18
  },
18
19
  "files": [
19
20
  "dist",
@@ -22,6 +23,7 @@
22
23
  "index.ts",
23
24
  "openclaw.plugin.json",
24
25
  "bin/openclaw-wtt-bootstrap.mjs",
26
+ "bin/openclaw-wtt-topic-memory-backfill.mjs",
25
27
  "scripts/install-bootstrap-cli.sh"
26
28
  ],
27
29
  "scripts": {