openclaw-threema 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.6.6 (2026-05-04)
4
+
5
+ ### Added
6
+ - **Idempotency Cache for Webhook Replay Protection**: Implements replay-attack protection against Threema webhook retries.
7
+ - New message-ID deduplication mechanism with configurable TTL (24h) and cache size (500 entries max).
8
+ - Automatic pruning of expired entries during check.
9
+ - Disk persistence via `~/.openclaw/extensions/threema/.idempotency-cache/messageids.json` to survive plugin reloads.
10
+ - Throttled writes (max 1 per 5 seconds) to prevent excessive I/O.
11
+ - Prevents duplicate processing when Threema Gateway retries a failed webhook delivery.
12
+ - Solves the issue where Plugin reloads during `npm publish` + temporary 5xx errors could cause the same message to be processed twice.
13
+
14
+ ## 0.6.5 (2026-05-04)
15
+
16
+ ### Added
17
+ - **Voice-Reply Function**: Threema plugin now supports sending voice notes (audio messages).
18
+ - New `sendVoiceNote(toId, audioBuffer, mimeType, caption)` method on ThreemaClient for E2E encrypted voice messages.
19
+ - When agent reply contains `audioAsVoice: true` with a `mediaUrl` (e.g., TTS or Whisper output), the plugin automatically sends it as a voice message instead of text.
20
+ - Audio detection works in both text-inbound and file-inbound reply pipelines.
21
+ - Fallback to text mode when audio file not found or when E2E mode is disabled (voice notes require E2E).
22
+ - Supports multiple audio MIME types: audio/aac, audio/mpeg, audio/wav, audio/ogg, audio/m4a, audio/webm.
23
+ - Error handling: logs errors and gracefully falls back to text delivery if voice send fails.
24
+
3
25
  ## 0.6.4 (2026-05-04)
4
26
 
5
27
  ### Fixed
package/dist/index.js CHANGED
@@ -157,10 +157,12 @@ function composeBodyForAgent(userText, cfg) {
157
157
  }
158
158
  // Allowed base directory for local media files (exfiltration protection)
159
159
  const MEDIA_ALLOWED_BASE = path.join(process.env.HOME || "/tmp", ".openclaw", "media");
160
+ // Extension state directory for persistent caches
161
+ const EXTENSION_STATE_DIR = path.join(process.env.HOME || "/tmp", ".openclaw", "extensions", "threema");
160
162
  // Message-ID dedup cache (replay protection): messageId -> timestamp
161
163
  const seenMsgIds = new Map();
162
- const MSG_ID_TTL_MS = 15 * 60 * 1000; // 15 minutes
163
- const MSG_ID_CACHE_MAX = 5000;
164
+ const MSG_ID_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
165
+ const MSG_ID_CACHE_MAX = 500;
164
166
  // Audio MIME types that should be transcribed
165
167
  const AUDIO_MIME_TYPES = [
166
168
  "audio/aac",
@@ -327,6 +329,79 @@ class ThreemaClient {
327
329
  const decrypted = nacl.secretbox.open(encryptedBlob, nonce, key);
328
330
  return decrypted || null;
329
331
  }
332
+ /**
333
+ * Send a voice note message (audio file with voice message rendering type).
334
+ * Type 0x17 file message with j=1 (media rendering) and audio MIME type.
335
+ * Suitable for Whisper transcriptions or agent-generated TTS audio.
336
+ */
337
+ async sendVoiceNote(to, audioBuffer, mimeType = "audio/aac", caption) {
338
+ if (!this.privateKey) {
339
+ throw new Error("E2E mode requires privateKey configuration");
340
+ }
341
+ const recipientPubKey = await this.getPublicKey(to);
342
+ // Generate random symmetric key for file encryption
343
+ const fileKey = nacl.randomBytes(32);
344
+ // Threema FILE_NONCE: 23 zero bytes + 0x01
345
+ const fileNonce = new Uint8Array(24);
346
+ fileNonce[23] = 0x01;
347
+ // Encrypt the audio with secretbox
348
+ const encryptedAudio = nacl.secretbox(new Uint8Array(audioBuffer), fileNonce, fileKey);
349
+ // Upload encrypted blob
350
+ const blobId = await this.uploadBlob(encryptedAudio);
351
+ // Create file message JSON for voice note
352
+ // j=1 marks it as media (voice message bubble in UI)
353
+ const fileMsg = {
354
+ b: blobId,
355
+ k: bytesToHex(fileKey),
356
+ m: mimeType,
357
+ n: `voice.${this.getMimeExtension(mimeType)}`,
358
+ s: audioBuffer.length,
359
+ j: 1, // 1 = render as media (voice message bubble)
360
+ i: 1, // deprecated but needed for older clients
361
+ };
362
+ if (caption) {
363
+ fileMsg.d = caption;
364
+ }
365
+ const fileMsgJson = JSON.stringify(fileMsg);
366
+ const fileMsgBytes = decodeUTF8(fileMsgJson);
367
+ // Create E2E payload (type 0x17 = file message)
368
+ const payload = buildE2EPayload(0x17, fileMsgBytes);
369
+ // Generate nonce and encrypt with NaCl box
370
+ const nonce = nacl.randomBytes(24);
371
+ const box = nacl.box(payload, nonce, recipientPubKey, this.privateKey);
372
+ const params = new URLSearchParams({
373
+ from: this.gatewayId,
374
+ to,
375
+ nonce: bytesToHex(nonce),
376
+ box: bytesToHex(box),
377
+ secret: this.secretKey,
378
+ });
379
+ const url = `${THREEMA_API_BASE}/send_e2e`;
380
+ const res = await fetch(url, {
381
+ method: "POST",
382
+ headers: { "Content-Type": "application/x-www-form-urlencoded" },
383
+ body: params.toString(),
384
+ });
385
+ if (!res.ok) {
386
+ // Don't log response body (may contain secrets)
387
+ throw new Error(`Threema E2E API error ${res.status}`);
388
+ }
389
+ return res.text();
390
+ }
391
+ /**
392
+ * Get file extension for MIME type
393
+ */
394
+ getMimeExtension(mimeType) {
395
+ const mimeMap = {
396
+ "audio/aac": "aac",
397
+ "audio/mpeg": "mp3",
398
+ "audio/wav": "wav",
399
+ "audio/ogg": "ogg",
400
+ "audio/m4a": "m4a",
401
+ "audio/webm": "webm",
402
+ };
403
+ return mimeMap[mimeType.toLowerCase()] || "m4a";
404
+ }
330
405
  /**
331
406
  * Send a text message (Basic mode - server-side encryption)
332
407
  */
@@ -481,15 +556,85 @@ function buildE2EPayload(type, inner) {
481
556
  * Check if a message ID has been seen recently (replay protection)
482
557
  * Returns true if duplicate (should be ignored)
483
558
  */
559
+ // Idempotency cache directory
560
+ const CACHE_DIR = path.join(EXTENSION_STATE_DIR, ".idempotency-cache");
561
+ const CACHE_FILE = path.join(CACHE_DIR, "messageids.json");
562
+ let lastCacheSave = 0;
563
+ const CACHE_SAVE_THROTTLE_MS = 5000; // Max 1 write per 5 sec
564
+ /**
565
+ * Load idempotency cache from disk if available and fresh
566
+ */
567
+ function loadIdempotencyCache() {
568
+ try {
569
+ if (!fs.existsSync(CACHE_FILE))
570
+ return;
571
+ const data = fs.readFileSync(CACHE_FILE, "utf-8");
572
+ const parsed = JSON.parse(data);
573
+ if (!parsed || typeof parsed !== "object")
574
+ return;
575
+ const now = Date.now();
576
+ for (const [id, ts] of Object.entries(parsed)) {
577
+ const timestamp = Number(ts);
578
+ // Only load entries that are still within TTL
579
+ if (!isNaN(timestamp) && now - timestamp < MSG_ID_TTL_MS) {
580
+ seenMsgIds.set(id, timestamp);
581
+ }
582
+ }
583
+ }
584
+ catch (err) {
585
+ // Silently skip if cache file is corrupted or unreadable
586
+ // Next write will overwrite it
587
+ }
588
+ }
589
+ /**
590
+ * Save idempotency cache to disk (throttled)
591
+ */
592
+ function saveIdempotencyCache() {
593
+ const now = Date.now();
594
+ if (now - lastCacheSave < CACHE_SAVE_THROTTLE_MS) {
595
+ return; // Skip this write, within throttle window
596
+ }
597
+ lastCacheSave = now;
598
+ try {
599
+ if (!fs.existsSync(CACHE_DIR)) {
600
+ fs.mkdirSync(CACHE_DIR, { recursive: true });
601
+ }
602
+ const obj = {};
603
+ for (const [id, ts] of seenMsgIds) {
604
+ obj[id] = ts;
605
+ }
606
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(obj, null, 2), "utf-8");
607
+ }
608
+ catch (err) {
609
+ // Silently skip if write fails; in-memory cache is still valid
610
+ }
611
+ }
612
+ /**
613
+ * Check if message has been seen before (idempotency check)
614
+ * Returns true if duplicate (should skip), false if new (should process)
615
+ */
484
616
  function isDuplicateMsgId(messageId) {
485
617
  const now = Date.now();
486
- // Cleanup old entries if cache is too large
487
- if (seenMsgIds.size > MSG_ID_CACHE_MAX) {
618
+ // Prune entries older than TTL
619
+ for (const [id, ts] of seenMsgIds) {
620
+ if (now - ts > MSG_ID_TTL_MS) {
621
+ seenMsgIds.delete(id);
622
+ }
623
+ }
624
+ // If cache is still too large, evict oldest entries
625
+ if (seenMsgIds.size >= MSG_ID_CACHE_MAX) {
626
+ // Find and remove the oldest entry
627
+ let oldest = messageId;
628
+ let oldestTs = now;
488
629
  for (const [id, ts] of seenMsgIds) {
489
- if (now - ts > MSG_ID_TTL_MS) {
490
- seenMsgIds.delete(id);
630
+ if (ts < oldestTs) {
631
+ oldest = id;
632
+ oldestTs = ts;
491
633
  }
492
634
  }
635
+ if (oldest !== messageId) {
636
+ seenMsgIds.delete(oldest);
637
+ }
493
638
  }
494
639
  // Check if seen
495
640
  const seenAt = seenMsgIds.get(messageId);
@@ -498,6 +643,7 @@ function isDuplicateMsgId(messageId) {
498
643
  }
499
644
  // Mark as seen
500
645
  seenMsgIds.set(messageId, now);
646
+ saveIdempotencyCache(); // Throttled write
501
647
  return false;
502
648
  }
503
649
  /**
@@ -1369,10 +1515,13 @@ const threemaChannel = {
1369
1515
  // ============================================================================
1370
1516
  export const id = "threema";
1371
1517
  export const name = "Threema Gateway";
1372
- export const version = "0.6.0";
1518
+ export const version = "0.6.6";
1373
1519
  export const description = "Threema messaging channel via Threema Gateway API (E2E encrypted, with media support)";
1374
1520
  export default function register(api) {
1375
1521
  try {
1522
+ // Load idempotency cache from disk (if available)
1523
+ loadIdempotencyCache();
1524
+ api.logger?.debug?.("Threema: idempotency cache loaded from disk");
1376
1525
  const config = api.config;
1377
1526
  const threemaCfg = getThreemaConfig(config);
1378
1527
  const runtime = api.runtime;
@@ -1554,40 +1703,89 @@ export default function register(api) {
1554
1703
  cfg: currentCfg,
1555
1704
  dispatcherOptions: {
1556
1705
  deliver: async (payload) => {
1557
- const text = payload.text ?? payload.body;
1558
- if (!text)
1559
- return;
1560
- // Chunk long replies if needed
1561
- const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
1562
- if (text.length <= limit) {
1563
- if (replyClient.isE2EEnabled) {
1564
- await replyClient.sendE2E(from, text);
1706
+ // Check if reply contains audio that should be sent as voice note
1707
+ const isAudioAsVoice = payload.audioAsVoice === true && payload.mediaUrl;
1708
+ if (isAudioAsVoice && payload.mediaUrl) {
1709
+ // Send as voice note
1710
+ try {
1711
+ if (replyClient.isE2EEnabled) {
1712
+ // Load audio from mediaUrl and send via sendVoiceNote
1713
+ const audioPath = payload.mediaUrl;
1714
+ if (fs.existsSync(audioPath)) {
1715
+ const audioBuffer = fs.readFileSync(audioPath);
1716
+ const mimeType = payload.mediaMimeType ?? "audio/aac";
1717
+ const caption = payload.text ?? payload.body ?? undefined;
1718
+ await replyClient.sendVoiceNote(from, audioBuffer, mimeType, caption);
1719
+ }
1720
+ else {
1721
+ api.logger?.warn?.(`Threema: audio file not found at ${audioPath}, falling back to text`);
1722
+ // Fallback to text if audio file not found
1723
+ const text = payload.text ?? payload.body;
1724
+ if (text) {
1725
+ await replyClient.sendE2E(from, text);
1726
+ }
1727
+ }
1728
+ }
1729
+ else {
1730
+ // Voice notes only work in E2E mode; fallback to text in basic mode
1731
+ api.logger?.info?.(`Threema: voice notes require E2E mode, sending text instead`);
1732
+ const text = payload.text ?? payload.body;
1733
+ if (text) {
1734
+ await replyClient.sendSimple(from, text);
1735
+ }
1736
+ }
1565
1737
  }
1566
- else {
1567
- await replyClient.sendSimple(from, text);
1738
+ catch (audioErr) {
1739
+ api.logger?.error?.(`Threema: error sending voice note: ${audioErr.message}`);
1740
+ // Fallback to text on error
1741
+ const text = payload.text ?? payload.body;
1742
+ if (text) {
1743
+ if (replyClient.isE2EEnabled) {
1744
+ await replyClient.sendE2E(from, text);
1745
+ }
1746
+ else {
1747
+ await replyClient.sendSimple(from, text);
1748
+ }
1749
+ }
1568
1750
  }
1569
1751
  }
1570
1752
  else {
1571
- // Split into chunks at newline boundaries
1572
- const chunks = [];
1573
- let remaining = text;
1574
- while (remaining.length > 0) {
1575
- if (remaining.length <= limit) {
1576
- chunks.push(remaining);
1577
- break;
1578
- }
1579
- let splitIdx = remaining.lastIndexOf("\n", limit);
1580
- if (splitIdx <= 0)
1581
- splitIdx = limit;
1582
- chunks.push(remaining.slice(0, splitIdx));
1583
- remaining = remaining.slice(splitIdx).replace(/^\n/, "");
1584
- }
1585
- for (const chunk of chunks) {
1753
+ // Send as text (existing logic)
1754
+ const text = payload.text ?? payload.body;
1755
+ if (!text)
1756
+ return;
1757
+ // Chunk long replies if needed
1758
+ const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
1759
+ if (text.length <= limit) {
1586
1760
  if (replyClient.isE2EEnabled) {
1587
- await replyClient.sendE2E(from, chunk);
1761
+ await replyClient.sendE2E(from, text);
1588
1762
  }
1589
1763
  else {
1590
- await replyClient.sendSimple(from, chunk);
1764
+ await replyClient.sendSimple(from, text);
1765
+ }
1766
+ }
1767
+ else {
1768
+ // Split into chunks at newline boundaries
1769
+ const chunks = [];
1770
+ let remaining = text;
1771
+ while (remaining.length > 0) {
1772
+ if (remaining.length <= limit) {
1773
+ chunks.push(remaining);
1774
+ break;
1775
+ }
1776
+ let splitIdx = remaining.lastIndexOf("\n", limit);
1777
+ if (splitIdx <= 0)
1778
+ splitIdx = limit;
1779
+ chunks.push(remaining.slice(0, splitIdx));
1780
+ remaining = remaining.slice(splitIdx).replace(/^\n/, "");
1781
+ }
1782
+ for (const chunk of chunks) {
1783
+ if (replyClient.isE2EEnabled) {
1784
+ await replyClient.sendE2E(from, chunk);
1785
+ }
1786
+ else {
1787
+ await replyClient.sendSimple(from, chunk);
1788
+ }
1591
1789
  }
1592
1790
  }
1593
1791
  }
@@ -1730,38 +1928,87 @@ export default function register(api) {
1730
1928
  cfg: currentCfg,
1731
1929
  dispatcherOptions: {
1732
1930
  deliver: async (payload) => {
1733
- const text = payload.text ?? payload.body;
1734
- if (!text)
1735
- return;
1736
- const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
1737
- if (text.length <= limit) {
1738
- if (replyClient.isE2EEnabled) {
1739
- await replyClient.sendE2E(from, text);
1931
+ // Check if reply contains audio that should be sent as voice note
1932
+ const isAudioAsVoice = payload.audioAsVoice === true && payload.mediaUrl;
1933
+ if (isAudioAsVoice && payload.mediaUrl) {
1934
+ // Send as voice note
1935
+ try {
1936
+ if (replyClient.isE2EEnabled) {
1937
+ // Load audio from mediaUrl and send via sendVoiceNote
1938
+ const audioPath = payload.mediaUrl;
1939
+ if (fs.existsSync(audioPath)) {
1940
+ const audioBuffer = fs.readFileSync(audioPath);
1941
+ const mimeType = payload.mediaMimeType ?? "audio/aac";
1942
+ const caption = payload.text ?? payload.body ?? undefined;
1943
+ await replyClient.sendVoiceNote(from, audioBuffer, mimeType, caption);
1944
+ }
1945
+ else {
1946
+ api.logger?.warn?.(`Threema: audio file not found at ${audioPath}, falling back to text`);
1947
+ // Fallback to text if audio file not found
1948
+ const text = payload.text ?? payload.body;
1949
+ if (text) {
1950
+ await replyClient.sendE2E(from, text);
1951
+ }
1952
+ }
1953
+ }
1954
+ else {
1955
+ // Voice notes only work in E2E mode; fallback to text in basic mode
1956
+ api.logger?.info?.(`Threema: voice notes require E2E mode, sending text instead`);
1957
+ const text = payload.text ?? payload.body;
1958
+ if (text) {
1959
+ await replyClient.sendSimple(from, text);
1960
+ }
1961
+ }
1740
1962
  }
1741
- else {
1742
- await replyClient.sendSimple(from, text);
1963
+ catch (audioErr) {
1964
+ api.logger?.error?.(`Threema: error sending voice note: ${audioErr.message}`);
1965
+ // Fallback to text on error
1966
+ const text = payload.text ?? payload.body;
1967
+ if (text) {
1968
+ if (replyClient.isE2EEnabled) {
1969
+ await replyClient.sendE2E(from, text);
1970
+ }
1971
+ else {
1972
+ await replyClient.sendSimple(from, text);
1973
+ }
1974
+ }
1743
1975
  }
1744
1976
  }
1745
1977
  else {
1746
- const chunks = [];
1747
- let remaining = text;
1748
- while (remaining.length > 0) {
1749
- if (remaining.length <= limit) {
1750
- chunks.push(remaining);
1751
- break;
1752
- }
1753
- let splitIdx = remaining.lastIndexOf("\n", limit);
1754
- if (splitIdx <= 0)
1755
- splitIdx = limit;
1756
- chunks.push(remaining.slice(0, splitIdx));
1757
- remaining = remaining.slice(splitIdx).replace(/^\n/, "");
1758
- }
1759
- for (const chunk of chunks) {
1978
+ // Send as text (existing logic)
1979
+ const text = payload.text ?? payload.body;
1980
+ if (!text)
1981
+ return;
1982
+ const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
1983
+ if (text.length <= limit) {
1760
1984
  if (replyClient.isE2EEnabled) {
1761
- await replyClient.sendE2E(from, chunk);
1985
+ await replyClient.sendE2E(from, text);
1762
1986
  }
1763
1987
  else {
1764
- await replyClient.sendSimple(from, chunk);
1988
+ await replyClient.sendSimple(from, text);
1989
+ }
1990
+ }
1991
+ else {
1992
+ const chunks = [];
1993
+ let remaining = text;
1994
+ while (remaining.length > 0) {
1995
+ if (remaining.length <= limit) {
1996
+ chunks.push(remaining);
1997
+ break;
1998
+ }
1999
+ let splitIdx = remaining.lastIndexOf("\n", limit);
2000
+ if (splitIdx <= 0)
2001
+ splitIdx = limit;
2002
+ chunks.push(remaining.slice(0, splitIdx));
2003
+ remaining = remaining.slice(splitIdx).replace(/^\n/, "");
2004
+ }
2005
+ for (const chunk of chunks) {
2006
+ if (replyClient.isE2EEnabled) {
2007
+ await replyClient.sendE2E(from, chunk);
2008
+ }
2009
+ else {
2010
+ await replyClient.sendSimple(from, chunk);
2011
+ }
1765
2012
  }
1766
2013
  }
1767
2014
  }
package/index.ts CHANGED
@@ -311,10 +311,18 @@ const MEDIA_ALLOWED_BASE = path.join(
311
311
  "media"
312
312
  );
313
313
 
314
+ // Extension state directory for persistent caches
315
+ const EXTENSION_STATE_DIR = path.join(
316
+ process.env.HOME || "/tmp",
317
+ ".openclaw",
318
+ "extensions",
319
+ "threema"
320
+ );
321
+
314
322
  // Message-ID dedup cache (replay protection): messageId -> timestamp
315
323
  const seenMsgIds = new Map<string, number>();
316
- const MSG_ID_TTL_MS = 15 * 60 * 1000; // 15 minutes
317
- const MSG_ID_CACHE_MAX = 5000;
324
+ const MSG_ID_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
325
+ const MSG_ID_CACHE_MAX = 500;
318
326
 
319
327
  // Audio MIME types that should be transcribed
320
328
  const AUDIO_MIME_TYPES = [
@@ -525,6 +533,98 @@ class ThreemaClient {
525
533
  return decrypted || null;
526
534
  }
527
535
 
536
+ /**
537
+ * Send a voice note message (audio file with voice message rendering type).
538
+ * Type 0x17 file message with j=1 (media rendering) and audio MIME type.
539
+ * Suitable for Whisper transcriptions or agent-generated TTS audio.
540
+ */
541
+ async sendVoiceNote(
542
+ to: string,
543
+ audioBuffer: Buffer,
544
+ mimeType: string = "audio/aac",
545
+ caption?: string
546
+ ): Promise<string> {
547
+ if (!this.privateKey) {
548
+ throw new Error("E2E mode requires privateKey configuration");
549
+ }
550
+
551
+ const recipientPubKey = await this.getPublicKey(to);
552
+
553
+ // Generate random symmetric key for file encryption
554
+ const fileKey = nacl.randomBytes(32);
555
+ // Threema FILE_NONCE: 23 zero bytes + 0x01
556
+ const fileNonce = new Uint8Array(24);
557
+ fileNonce[23] = 0x01;
558
+
559
+ // Encrypt the audio with secretbox
560
+ const encryptedAudio = nacl.secretbox(new Uint8Array(audioBuffer), fileNonce, fileKey);
561
+
562
+ // Upload encrypted blob
563
+ const blobId = await this.uploadBlob(encryptedAudio);
564
+
565
+ // Create file message JSON for voice note
566
+ // j=1 marks it as media (voice message bubble in UI)
567
+ const fileMsg: ThreemaFileMessage = {
568
+ b: blobId,
569
+ k: bytesToHex(fileKey),
570
+ m: mimeType,
571
+ n: `voice.${this.getMimeExtension(mimeType)}`,
572
+ s: audioBuffer.length,
573
+ j: 1, // 1 = render as media (voice message bubble)
574
+ i: 1, // deprecated but needed for older clients
575
+ };
576
+ if (caption) {
577
+ fileMsg.d = caption;
578
+ }
579
+
580
+ const fileMsgJson = JSON.stringify(fileMsg);
581
+ const fileMsgBytes = decodeUTF8(fileMsgJson);
582
+
583
+ // Create E2E payload (type 0x17 = file message)
584
+ const payload = buildE2EPayload(0x17, fileMsgBytes);
585
+
586
+ // Generate nonce and encrypt with NaCl box
587
+ const nonce = nacl.randomBytes(24);
588
+ const box = nacl.box(payload, nonce, recipientPubKey, this.privateKey);
589
+
590
+ const params = new URLSearchParams({
591
+ from: this.gatewayId,
592
+ to,
593
+ nonce: bytesToHex(nonce),
594
+ box: bytesToHex(box),
595
+ secret: this.secretKey,
596
+ });
597
+
598
+ const url = `${THREEMA_API_BASE}/send_e2e`;
599
+ const res = await fetch(url, {
600
+ method: "POST",
601
+ headers: { "Content-Type": "application/x-www-form-urlencoded" },
602
+ body: params.toString(),
603
+ });
604
+
605
+ if (!res.ok) {
606
+ // Don't log response body (may contain secrets)
607
+ throw new Error(`Threema E2E API error ${res.status}`);
608
+ }
609
+
610
+ return res.text();
611
+ }
612
+
613
+ /**
614
+ * Get file extension for MIME type
615
+ */
616
+ private getMimeExtension(mimeType: string): string {
617
+ const mimeMap: Record<string, string> = {
618
+ "audio/aac": "aac",
619
+ "audio/mpeg": "mp3",
620
+ "audio/wav": "wav",
621
+ "audio/ogg": "ogg",
622
+ "audio/m4a": "m4a",
623
+ "audio/webm": "webm",
624
+ };
625
+ return mimeMap[mimeType.toLowerCase()] || "m4a";
626
+ }
627
+
528
628
  /**
529
629
  * Send a text message (Basic mode - server-side encryption)
530
630
  */
@@ -718,16 +818,89 @@ function buildE2EPayload(type: number, inner: Uint8Array): Uint8Array {
718
818
  * Check if a message ID has been seen recently (replay protection)
719
819
  * Returns true if duplicate (should be ignored)
720
820
  */
821
+ // Idempotency cache directory
822
+ const CACHE_DIR = path.join(EXTENSION_STATE_DIR, ".idempotency-cache");
823
+ const CACHE_FILE = path.join(CACHE_DIR, "messageids.json");
824
+ let lastCacheSave = 0;
825
+ const CACHE_SAVE_THROTTLE_MS = 5000; // Max 1 write per 5 sec
826
+
827
+ /**
828
+ * Load idempotency cache from disk if available and fresh
829
+ */
830
+ function loadIdempotencyCache(): void {
831
+ try {
832
+ if (!fs.existsSync(CACHE_FILE)) return;
833
+
834
+ const data = fs.readFileSync(CACHE_FILE, "utf-8");
835
+ const parsed = JSON.parse(data);
836
+ if (!parsed || typeof parsed !== "object") return;
837
+
838
+ const now = Date.now();
839
+ for (const [id, ts] of Object.entries(parsed)) {
840
+ const timestamp = Number(ts);
841
+ // Only load entries that are still within TTL
842
+ if (!isNaN(timestamp) && now - timestamp < MSG_ID_TTL_MS) {
843
+ seenMsgIds.set(id, timestamp);
844
+ }
845
+ }
846
+ } catch (err: any) {
847
+ // Silently skip if cache file is corrupted or unreadable
848
+ // Next write will overwrite it
849
+ }
850
+ }
851
+
852
+ /**
853
+ * Save idempotency cache to disk (throttled)
854
+ */
855
+ function saveIdempotencyCache(): void {
856
+ const now = Date.now();
857
+ if (now - lastCacheSave < CACHE_SAVE_THROTTLE_MS) {
858
+ return; // Skip this write, within throttle window
859
+ }
860
+ lastCacheSave = now;
861
+
862
+ try {
863
+ if (!fs.existsSync(CACHE_DIR)) {
864
+ fs.mkdirSync(CACHE_DIR, { recursive: true });
865
+ }
866
+ const obj: Record<string, number> = {};
867
+ for (const [id, ts] of seenMsgIds) {
868
+ obj[id] = ts;
869
+ }
870
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(obj, null, 2), "utf-8");
871
+ } catch (err: any) {
872
+ // Silently skip if write fails; in-memory cache is still valid
873
+ }
874
+ }
875
+
876
+ /**
877
+ * Check if message has been seen before (idempotency check)
878
+ * Returns true if duplicate (should skip), false if new (should process)
879
+ */
721
880
  function isDuplicateMsgId(messageId: string): boolean {
722
881
  const now = Date.now();
723
882
 
724
- // Cleanup old entries if cache is too large
725
- if (seenMsgIds.size > MSG_ID_CACHE_MAX) {
883
+ // Prune entries older than TTL
884
+ for (const [id, ts] of seenMsgIds) {
885
+ if (now - ts > MSG_ID_TTL_MS) {
886
+ seenMsgIds.delete(id);
887
+ }
888
+ }
889
+
890
+ // If cache is still too large, evict oldest entries
891
+ if (seenMsgIds.size >= MSG_ID_CACHE_MAX) {
892
+ // Find and remove the oldest entry
893
+ let oldest = messageId;
894
+ let oldestTs = now;
726
895
  for (const [id, ts] of seenMsgIds) {
727
- if (now - ts > MSG_ID_TTL_MS) {
728
- seenMsgIds.delete(id);
896
+ if (ts < oldestTs) {
897
+ oldest = id;
898
+ oldestTs = ts;
729
899
  }
730
900
  }
901
+ if (oldest !== messageId) {
902
+ seenMsgIds.delete(oldest);
903
+ }
731
904
  }
732
905
 
733
906
  // Check if seen
@@ -738,6 +911,7 @@ function isDuplicateMsgId(messageId: string): boolean {
738
911
 
739
912
  // Mark as seen
740
913
  seenMsgIds.set(messageId, now);
914
+ saveIdempotencyCache(); // Throttled write
741
915
  return false;
742
916
  }
743
917
 
@@ -1780,12 +1954,16 @@ const threemaChannel = {
1780
1954
 
1781
1955
  export const id = "threema";
1782
1956
  export const name = "Threema Gateway";
1783
- export const version = "0.6.0";
1957
+ export const version = "0.6.6";
1784
1958
  export const description =
1785
1959
  "Threema messaging channel via Threema Gateway API (E2E encrypted, with media support)";
1786
1960
 
1787
1961
  export default function register(api: any) {
1788
1962
  try {
1963
+ // Load idempotency cache from disk (if available)
1964
+ loadIdempotencyCache();
1965
+ api.logger?.debug?.("Threema: idempotency cache loaded from disk");
1966
+
1789
1967
  const config = api.config as OpenClawConfig;
1790
1968
  const threemaCfg = getThreemaConfig(config);
1791
1969
  const runtime = api.runtime;
@@ -2000,35 +2178,80 @@ export default function register(api: any) {
2000
2178
  cfg: currentCfg,
2001
2179
  dispatcherOptions: {
2002
2180
  deliver: async (payload: any) => {
2003
- const text = payload.text ?? payload.body;
2004
- if (!text) return;
2005
- // Chunk long replies if needed
2006
- const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
2007
- if (text.length <= limit) {
2008
- if (replyClient.isE2EEnabled) {
2009
- await replyClient.sendE2E(from, text);
2010
- } else {
2011
- await replyClient.sendSimple(from, text);
2012
- }
2013
- } else {
2014
- // Split into chunks at newline boundaries
2015
- const chunks: string[] = [];
2016
- let remaining = text;
2017
- while (remaining.length > 0) {
2018
- if (remaining.length <= limit) {
2019
- chunks.push(remaining);
2020
- break;
2181
+ // Check if reply contains audio that should be sent as voice note
2182
+ const isAudioAsVoice = payload.audioAsVoice === true && payload.mediaUrl;
2183
+
2184
+ if (isAudioAsVoice && payload.mediaUrl) {
2185
+ // Send as voice note
2186
+ try {
2187
+ if (replyClient.isE2EEnabled) {
2188
+ // Load audio from mediaUrl and send via sendVoiceNote
2189
+ const audioPath = payload.mediaUrl;
2190
+ if (fs.existsSync(audioPath)) {
2191
+ const audioBuffer = fs.readFileSync(audioPath);
2192
+ const mimeType = payload.mediaMimeType ?? "audio/aac";
2193
+ const caption = payload.text ?? payload.body ?? undefined;
2194
+ await replyClient.sendVoiceNote(from, audioBuffer, mimeType, caption);
2195
+ } else {
2196
+ api.logger?.warn?.(`Threema: audio file not found at ${audioPath}, falling back to text`);
2197
+ // Fallback to text if audio file not found
2198
+ const text = payload.text ?? payload.body;
2199
+ if (text) {
2200
+ await replyClient.sendE2E(from, text);
2201
+ }
2202
+ }
2203
+ } else {
2204
+ // Voice notes only work in E2E mode; fallback to text in basic mode
2205
+ api.logger?.info?.(`Threema: voice notes require E2E mode, sending text instead`);
2206
+ const text = payload.text ?? payload.body;
2207
+ if (text) {
2208
+ await replyClient.sendSimple(from, text);
2209
+ }
2210
+ }
2211
+ } catch (audioErr: any) {
2212
+ api.logger?.error?.(`Threema: error sending voice note: ${audioErr.message}`);
2213
+ // Fallback to text on error
2214
+ const text = payload.text ?? payload.body;
2215
+ if (text) {
2216
+ if (replyClient.isE2EEnabled) {
2217
+ await replyClient.sendE2E(from, text);
2218
+ } else {
2219
+ await replyClient.sendSimple(from, text);
2220
+ }
2021
2221
  }
2022
- let splitIdx = remaining.lastIndexOf("\n", limit);
2023
- if (splitIdx <= 0) splitIdx = limit;
2024
- chunks.push(remaining.slice(0, splitIdx));
2025
- remaining = remaining.slice(splitIdx).replace(/^\n/, "");
2026
2222
  }
2027
- for (const chunk of chunks) {
2223
+ } else {
2224
+ // Send as text (existing logic)
2225
+ const text = payload.text ?? payload.body;
2226
+ if (!text) return;
2227
+ // Chunk long replies if needed
2228
+ const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
2229
+ if (text.length <= limit) {
2028
2230
  if (replyClient.isE2EEnabled) {
2029
- await replyClient.sendE2E(from, chunk);
2231
+ await replyClient.sendE2E(from, text);
2030
2232
  } else {
2031
- await replyClient.sendSimple(from, chunk);
2233
+ await replyClient.sendSimple(from, text);
2234
+ }
2235
+ } else {
2236
+ // Split into chunks at newline boundaries
2237
+ const chunks: string[] = [];
2238
+ let remaining = text;
2239
+ while (remaining.length > 0) {
2240
+ if (remaining.length <= limit) {
2241
+ chunks.push(remaining);
2242
+ break;
2243
+ }
2244
+ let splitIdx = remaining.lastIndexOf("\n", limit);
2245
+ if (splitIdx <= 0) splitIdx = limit;
2246
+ chunks.push(remaining.slice(0, splitIdx));
2247
+ remaining = remaining.slice(splitIdx).replace(/^\n/, "");
2248
+ }
2249
+ for (const chunk of chunks) {
2250
+ if (replyClient.isE2EEnabled) {
2251
+ await replyClient.sendE2E(from, chunk);
2252
+ } else {
2253
+ await replyClient.sendSimple(from, chunk);
2254
+ }
2032
2255
  }
2033
2256
  }
2034
2257
  }
@@ -2179,33 +2402,78 @@ export default function register(api: any) {
2179
2402
  cfg: currentCfg,
2180
2403
  dispatcherOptions: {
2181
2404
  deliver: async (payload: any) => {
2182
- const text = payload.text ?? payload.body;
2183
- if (!text) return;
2184
- const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
2185
- if (text.length <= limit) {
2186
- if (replyClient.isE2EEnabled) {
2187
- await replyClient.sendE2E(from, text);
2188
- } else {
2189
- await replyClient.sendSimple(from, text);
2190
- }
2191
- } else {
2192
- const chunks: string[] = [];
2193
- let remaining = text;
2194
- while (remaining.length > 0) {
2195
- if (remaining.length <= limit) {
2196
- chunks.push(remaining);
2197
- break;
2405
+ // Check if reply contains audio that should be sent as voice note
2406
+ const isAudioAsVoice = payload.audioAsVoice === true && payload.mediaUrl;
2407
+
2408
+ if (isAudioAsVoice && payload.mediaUrl) {
2409
+ // Send as voice note
2410
+ try {
2411
+ if (replyClient.isE2EEnabled) {
2412
+ // Load audio from mediaUrl and send via sendVoiceNote
2413
+ const audioPath = payload.mediaUrl;
2414
+ if (fs.existsSync(audioPath)) {
2415
+ const audioBuffer = fs.readFileSync(audioPath);
2416
+ const mimeType = payload.mediaMimeType ?? "audio/aac";
2417
+ const caption = payload.text ?? payload.body ?? undefined;
2418
+ await replyClient.sendVoiceNote(from, audioBuffer, mimeType, caption);
2419
+ } else {
2420
+ api.logger?.warn?.(`Threema: audio file not found at ${audioPath}, falling back to text`);
2421
+ // Fallback to text if audio file not found
2422
+ const text = payload.text ?? payload.body;
2423
+ if (text) {
2424
+ await replyClient.sendE2E(from, text);
2425
+ }
2426
+ }
2427
+ } else {
2428
+ // Voice notes only work in E2E mode; fallback to text in basic mode
2429
+ api.logger?.info?.(`Threema: voice notes require E2E mode, sending text instead`);
2430
+ const text = payload.text ?? payload.body;
2431
+ if (text) {
2432
+ await replyClient.sendSimple(from, text);
2433
+ }
2434
+ }
2435
+ } catch (audioErr: any) {
2436
+ api.logger?.error?.(`Threema: error sending voice note: ${audioErr.message}`);
2437
+ // Fallback to text on error
2438
+ const text = payload.text ?? payload.body;
2439
+ if (text) {
2440
+ if (replyClient.isE2EEnabled) {
2441
+ await replyClient.sendE2E(from, text);
2442
+ } else {
2443
+ await replyClient.sendSimple(from, text);
2444
+ }
2198
2445
  }
2199
- let splitIdx = remaining.lastIndexOf("\n", limit);
2200
- if (splitIdx <= 0) splitIdx = limit;
2201
- chunks.push(remaining.slice(0, splitIdx));
2202
- remaining = remaining.slice(splitIdx).replace(/^\n/, "");
2203
2446
  }
2204
- for (const chunk of chunks) {
2447
+ } else {
2448
+ // Send as text (existing logic)
2449
+ const text = payload.text ?? payload.body;
2450
+ if (!text) return;
2451
+ const limit = getThreemaConfig(currentCfg)?.textChunkLimit ?? 3500;
2452
+ if (text.length <= limit) {
2205
2453
  if (replyClient.isE2EEnabled) {
2206
- await replyClient.sendE2E(from, chunk);
2454
+ await replyClient.sendE2E(from, text);
2207
2455
  } else {
2208
- await replyClient.sendSimple(from, chunk);
2456
+ await replyClient.sendSimple(from, text);
2457
+ }
2458
+ } else {
2459
+ const chunks: string[] = [];
2460
+ let remaining = text;
2461
+ while (remaining.length > 0) {
2462
+ if (remaining.length <= limit) {
2463
+ chunks.push(remaining);
2464
+ break;
2465
+ }
2466
+ let splitIdx = remaining.lastIndexOf("\n", limit);
2467
+ if (splitIdx <= 0) splitIdx = limit;
2468
+ chunks.push(remaining.slice(0, splitIdx));
2469
+ remaining = remaining.slice(splitIdx).replace(/^\n/, "");
2470
+ }
2471
+ for (const chunk of chunks) {
2472
+ if (replyClient.isE2EEnabled) {
2473
+ await replyClient.sendE2E(from, chunk);
2474
+ } else {
2475
+ await replyClient.sendSimple(from, chunk);
2476
+ }
2209
2477
  }
2210
2478
  }
2211
2479
  }
@@ -2,7 +2,7 @@
2
2
  "id": "threema",
3
3
  "name": "Threema Gateway",
4
4
  "description": "Threema messaging channel via Threema Gateway API (E2E encrypted)",
5
- "version": "0.6.4",
5
+ "version": "0.6.6",
6
6
  "channels": [
7
7
  "threema"
8
8
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-threema",
3
- "version": "0.6.4",
3
+ "version": "0.6.6",
4
4
  "description": "Threema Gateway channel plugin for OpenClaw",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",