whatsapp_notifier 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,458 @@
1
+ // Inbound media store (v0.7.0)
2
+ //
3
+ // Downloads customer media (images, voice notes, documents) to disk and serves
4
+ // it back to the host over GET /media/:userId/:messageId. Kept separate from
5
+ // index.ts (which calls Bun.serve() at import time) so every piece — id
6
+ // sanitization, the size/type policy, the TTL sweep, the download pipeline and
7
+ // the route responses — can be unit-tested without booting Chromium.
8
+ //
9
+ // Layout: <media root>/<safeUser>/<safeMessageId> holds the raw bytes and
10
+ // <safeMessageId>~meta.json the sidecar { mime, filename, size, capturedAt }.
11
+ // The '~' sits OUTSIDE the sanitize charset, so a hostile message id ending in
12
+ // ".json" can never name-collide with (or overwrite) another message's sidecar
13
+ // — data files and sidecars live in disjoint namespaces by construction. The
14
+ // media root is <SESSION_DIR>/media in production (survives restarts that wipe
15
+ // the in-memory inbound queues); tests point it at a tmp dir via
16
+ // configureMedia, mirroring configureInbound.
17
+
18
+ import {
19
+ existsSync,
20
+ mkdirSync,
21
+ readFileSync,
22
+ writeFileSync,
23
+ rmSync,
24
+ readdirSync,
25
+ statSync
26
+ } from 'fs';
27
+ import { join, resolve, sep } from 'path';
28
+ import { createHash, timingSafeEqual } from 'crypto';
29
+
30
+ export interface MediaMeta {
31
+ mime: string;
32
+ filename: string | null;
33
+ size: number;
34
+ capturedAt: number; // epoch ms
35
+ }
36
+
37
+ export type MediaSkipReason = 'unsupported_type' | 'too_large' | 'disk_full';
38
+ export type MediaFailureReason = MediaSkipReason | 'expired' | 'download_failed' | 'invalid_id';
39
+
40
+ // The verdict captureInbound merges into the inbound payload. Structurally
41
+ // compatible with inbound.ts's optional media fields — every failure mode
42
+ // still surfaces the message itself, just without bytes.
43
+ export interface MediaResolution {
44
+ mediaStatus: 'available' | 'unavailable';
45
+ mediaError?: MediaFailureReason;
46
+ mediaMime?: string;
47
+ mediaFilename?: string;
48
+ mediaSize?: number;
49
+ }
50
+
51
+ export type MediaPolicy = { download: true } | { download: false; reason: MediaSkipReason };
52
+
53
+ // Inline media (image / voice note) caps at WhatsApp's own 16MB ceiling;
54
+ // documents get a separate, env-tunable cap. Envs are read lazily (not frozen
55
+ // at import) so the limits can be retuned per deployment and per test.
56
+ export const INLINE_MEDIA_MAX_BYTES = 16 * 1024 * 1024;
57
+ export const MEDIA_DOWNLOAD_TIMEOUT_MS = 30000;
58
+
59
+ // Malformed env values ("50GB", "2 days") parse to NaN, and every NaN
60
+ // comparison is false — the TTL sweep and the disk cap would silently
61
+ // disable themselves. Fall back to the default instead.
62
+ function envLimit(name: string, fallback: number): number {
63
+ const parsed = Number(process.env[name] || fallback);
64
+ return Number.isFinite(parsed) ? parsed : fallback;
65
+ }
66
+
67
+ export function mediaTtlMs(): number {
68
+ return envLimit('WHATSAPP_MEDIA_TTL_MS', 48 * 60 * 60 * 1000); // 48h
69
+ }
70
+
71
+ export function maxDocumentBytes(): number {
72
+ return envLimit('WHATSAPP_MEDIA_MAX_BYTES', 25 * 1024 * 1024); // 25MB
73
+ }
74
+
75
+ export function maxDiskBytes(): number {
76
+ return envLimit('WHATSAPP_MEDIA_MAX_DISK_BYTES', 5 * 1024 * 1024 * 1024); // 5GB
77
+ }
78
+
79
+ // ── Root + cap accounting ──
80
+
81
+ // index.ts wires this to <SESSION_BASE_DIR>/media; tests to a tmp dir.
82
+ let mediaRootResolver: () => string = () => './media';
83
+ // Total payload bytes on disk, kept incrementally by write/delete and
84
+ // recomputed by the sweep, so downloadPolicy's disk-full check is O(1).
85
+ let cachedDiskBytes: number | null = null;
86
+
87
+ export function configureMedia(rootResolver: () => string) {
88
+ mediaRootResolver = rootResolver;
89
+ cachedDiskBytes = null;
90
+ }
91
+
92
+ export function mediaDiskBytes(): number {
93
+ if (cachedDiskBytes === null) cachedDiskBytes = computeDiskBytes();
94
+ return cachedDiskBytes;
95
+ }
96
+
97
+ function computeDiskBytes(): number {
98
+ let total = 0;
99
+ try {
100
+ const root = mediaRootResolver();
101
+ for (const user of readdirSync(root, { withFileTypes: true })) {
102
+ if (!user.isDirectory()) continue;
103
+ const dir = join(root, user.name);
104
+ for (const file of readdirSync(dir)) {
105
+ if (isSidecarName(file)) continue; // sidecars are negligible
106
+ try { total += statSync(join(dir, file)).size; } catch (_) { /* raced a delete */ }
107
+ }
108
+ }
109
+ } catch (_) { /* media root not created yet → nothing stored */ }
110
+ return total;
111
+ }
112
+
113
+ // ── Id sanitization + path layout ──
114
+
115
+ // Both route params become path segments, so they must be reduced to a safe
116
+ // charset. WhatsApp message ids ("true_9199...@c.us_ABC") and our numeric user
117
+ // ids fit [A-Za-z0-9@._-] untouched; anything else is hostile or garbage.
118
+ export function sanitizeId(raw: unknown): string | null {
119
+ if (typeof raw !== 'string') return null;
120
+ const cleaned = raw.replace(/[^A-Za-z0-9@._-]/g, '');
121
+ if (!cleaned || cleaned.length > 200) return null;
122
+ if (/^\.+$/.test(cleaned)) return null; // '.', '..', … are path-segment hazards
123
+ return cleaned;
124
+ }
125
+
126
+ // Sidecar names end with a suffix whose '~' is outside the sanitize charset:
127
+ // no sanitized message id can ever produce (or overwrite) a sidecar name, so
128
+ // the accounting/sweep/orphan logic can tell the two apart by name alone.
129
+ const SIDECAR_SUFFIX = '~meta.json';
130
+
131
+ function isSidecarName(file: string): boolean {
132
+ return file.endsWith(SIDECAR_SUFFIX);
133
+ }
134
+
135
+ export function mediaPaths(
136
+ userId: string,
137
+ messageId: string
138
+ ): { dir: string; dataPath: string; metaPath: string } | null {
139
+ const safeUser = sanitizeId(userId);
140
+ const safeMessage = sanitizeId(messageId);
141
+ if (!safeUser || !safeMessage) return null;
142
+
143
+ const root = resolve(mediaRootResolver());
144
+ const dir = resolve(root, safeUser);
145
+ const dataPath = resolve(dir, safeMessage);
146
+ // Belt and braces: even a sanitizer bug must never escape the media root.
147
+ if (!dir.startsWith(root + sep) || !dataPath.startsWith(dir + sep)) return null;
148
+
149
+ return { dir, dataPath, metaPath: `${dataPath}${SIDECAR_SUFFIX}` };
150
+ }
151
+
152
+ // ── Store primitives ──
153
+
154
+ export function writeMedia(
155
+ userId: string,
156
+ messageId: string,
157
+ data: Uint8Array,
158
+ meta: { mime: string; filename?: string | null }
159
+ ): boolean {
160
+ const paths = mediaPaths(userId, messageId);
161
+ if (!paths) return false;
162
+ try {
163
+ mkdirSync(paths.dir, { recursive: true });
164
+ writeFileSync(paths.dataPath, data);
165
+ const sidecar: MediaMeta = {
166
+ mime: meta.mime,
167
+ filename: meta.filename ?? null,
168
+ size: data.byteLength,
169
+ capturedAt: Date.now()
170
+ };
171
+ writeFileSync(paths.metaPath, JSON.stringify(sidecar));
172
+ if (cachedDiskBytes !== null) cachedDiskBytes += data.byteLength;
173
+ return true;
174
+ } catch (e) {
175
+ console.error(`Failed to persist media ${messageId} for ${userId}`, e);
176
+ return false;
177
+ }
178
+ }
179
+
180
+ // Returns the sidecar when BOTH the bytes and the sidecar are present —
181
+ // captureInbound uses this to skip re-downloading on a reconnect backfill.
182
+ export function mediaExists(userId: string, messageId: string): MediaMeta | null {
183
+ const paths = mediaPaths(userId, messageId);
184
+ if (!paths) return null;
185
+ try {
186
+ if (!existsSync(paths.dataPath) || !existsSync(paths.metaPath)) return null;
187
+ const raw = JSON.parse(readFileSync(paths.metaPath, 'utf8'));
188
+ return {
189
+ mime: typeof raw?.mime === 'string' ? raw.mime : 'application/octet-stream',
190
+ filename: typeof raw?.filename === 'string' ? raw.filename : null,
191
+ size: Number(raw?.size) || 0,
192
+ capturedAt: Number(raw?.capturedAt) || 0
193
+ };
194
+ } catch (_) {
195
+ return null; // corrupt sidecar → treat as absent (a re-download heals it)
196
+ }
197
+ }
198
+
199
+ export function readMedia(userId: string, messageId: string): { data: Buffer; meta: MediaMeta } | null {
200
+ const meta = mediaExists(userId, messageId);
201
+ if (!meta) return null;
202
+ try {
203
+ return { data: readFileSync(mediaPaths(userId, messageId)!.dataPath), meta };
204
+ } catch (_) {
205
+ return null; // raced a sweep/delete between the exists check and the read
206
+ }
207
+ }
208
+
209
+ // Logout privacy contract: stored media belongs to the OLD pairing. POST
210
+ // /logout wipes the session dir and the inbound queue, but without this the
211
+ // customer photos/documents stayed on disk — fetchable via GET /media — for
212
+ // up to the 48h TTL after the operator severed the pairing. Same sanitize +
213
+ // containment rules as mediaPaths; recomputing the cached disk total keeps
214
+ // downloadPolicy's cap check honest after a bulk removal.
215
+ export function clearUserMedia(userId: string): boolean {
216
+ const safeUser = sanitizeId(userId);
217
+ if (!safeUser) return false;
218
+ const root = resolve(mediaRootResolver());
219
+ const dir = resolve(root, safeUser);
220
+ if (!dir.startsWith(root + sep)) return false;
221
+ try {
222
+ rmSync(dir, { recursive: true, force: true });
223
+ } catch (e) {
224
+ console.error(`Failed to clear media dir for ${userId}`, e);
225
+ return false;
226
+ }
227
+ cachedDiskBytes = computeDiskBytes();
228
+ return true;
229
+ }
230
+
231
+ // Idempotent: deleting media that was never stored (or already swept) is fine.
232
+ export function deleteMedia(userId: string, messageId: string): boolean {
233
+ const paths = mediaPaths(userId, messageId);
234
+ if (!paths) return false;
235
+ const meta = mediaExists(userId, messageId);
236
+ try {
237
+ rmSync(paths.dataPath, { force: true });
238
+ rmSync(paths.metaPath, { force: true });
239
+ if (meta && cachedDiskBytes !== null) {
240
+ cachedDiskBytes = Math.max(0, cachedDiskBytes - meta.size);
241
+ }
242
+ return true;
243
+ } catch (e) {
244
+ console.error(`Failed to delete media ${messageId} for ${userId}`, e);
245
+ return false;
246
+ }
247
+ }
248
+
249
+ // ── Download policy ──
250
+
251
+ // Stickers and videos are deliberately not downloaded (no CMS rendering need,
252
+ // videos routinely blow the cap); view-once media must not be persisted at
253
+ // all — the sender chose ephemerality.
254
+ const DOWNLOADABLE_TYPES = new Set(['image', 'audio', 'ptt', 'document']);
255
+
256
+ export function downloadPolicy(type: string, size: number, viewOnce = false): MediaPolicy {
257
+ if (viewOnce || !DOWNLOADABLE_TYPES.has(type)) return { download: false, reason: 'unsupported_type' };
258
+ const cap = type === 'document' ? maxDocumentBytes() : INLINE_MEDIA_MAX_BYTES;
259
+ if (size > cap) return { download: false, reason: 'too_large' };
260
+ if (mediaDiskBytes() + size > maxDiskBytes()) return { download: false, reason: 'disk_full' };
261
+ return { download: true };
262
+ }
263
+
264
+ // ── TTL sweep ──
265
+
266
+ // Remove media older than the TTL (the host attaches what it wants well within
267
+ // 48h; everything else is abandoned) plus orphaned sidecars, then refresh the
268
+ // disk-cap accounting. index.ts runs this on the existing reaper interval.
269
+ export function sweepExpired(nowMs = Date.now()): number {
270
+ const ttl = mediaTtlMs();
271
+ let removed = 0;
272
+ try {
273
+ const root = mediaRootResolver();
274
+ for (const user of readdirSync(root, { withFileTypes: true })) {
275
+ if (!user.isDirectory()) continue;
276
+ const dir = join(root, user.name);
277
+ for (const file of readdirSync(dir)) {
278
+ if (isSidecarName(file)) continue;
279
+ const dataPath = join(dir, file);
280
+ if (nowMs - capturedAtFor(dataPath, `${dataPath}${SIDECAR_SUFFIX}`) > ttl) {
281
+ rmSync(dataPath, { force: true });
282
+ rmSync(`${dataPath}${SIDECAR_SUFFIX}`, { force: true });
283
+ removed += 1;
284
+ }
285
+ }
286
+ // Sidecars whose payload is already gone are garbage regardless of age.
287
+ for (const file of readdirSync(dir)) {
288
+ if (isSidecarName(file) && !existsSync(join(dir, file.slice(0, -SIDECAR_SUFFIX.length)))) {
289
+ rmSync(join(dir, file), { force: true });
290
+ }
291
+ }
292
+ }
293
+ } catch (_) { /* media root not created yet → nothing to sweep */ }
294
+ cachedDiskBytes = computeDiskBytes();
295
+ return removed;
296
+ }
297
+
298
+ function capturedAtFor(dataPath: string, metaPath: string): number {
299
+ try {
300
+ const raw = JSON.parse(readFileSync(metaPath, 'utf8'));
301
+ const capturedAt = Number(raw?.capturedAt);
302
+ if (Number.isFinite(capturedAt) && capturedAt > 0) return capturedAt;
303
+ } catch (_) { /* missing/corrupt sidecar → fall back to the file clock */ }
304
+ try {
305
+ return statSync(dataPath).mtimeMs;
306
+ } catch (_) {
307
+ return 0; // unstattable → looks ancient → swept
308
+ }
309
+ }
310
+
311
+ // ── Download pipeline ──
312
+
313
+ // Policy pre-check on the declared size → bounded downloadMedia() → policy
314
+ // re-check on the actual bytes → persist. Every failure mode returns a typed
315
+ // 'unavailable' verdict instead of throwing: the message itself must always
316
+ // reach the host, with or without its bytes.
317
+ export async function resolveMediaForMessage(
318
+ userId: string,
319
+ msg: any,
320
+ deps: { timeoutMs?: number } = {}
321
+ ): Promise<MediaResolution> {
322
+ // Must mirror normalizeInbound's messageId fallback (inbound.ts) so the
323
+ // stored file is addressable by the id the host received.
324
+ const messageId = (msg?.id && msg.id._serialized) || `${msg?.from || ''}-${msg?.timestamp}`;
325
+ if (!mediaPaths(userId, messageId)) {
326
+ return { mediaStatus: 'unavailable', mediaError: 'invalid_id' };
327
+ }
328
+
329
+ // Reconnect backfill replays recent messages — serve the copy already on
330
+ // disk instead of re-downloading (and re-counting against the disk cap).
331
+ const existing = mediaExists(userId, messageId);
332
+ if (existing) return availableResolution(existing.mime, existing.filename, existing.size);
333
+
334
+ const type = msg?.type || 'chat';
335
+ const viewOnce = !!(msg?._data?.isViewOnce);
336
+ const declaredSize = Number(msg?._data?.size) || 0; // 0 = unknown → re-checked post-download
337
+ const pre = downloadPolicy(type, declaredSize, viewOnce);
338
+ if (!pre.download) return { mediaStatus: 'unavailable', mediaError: pre.reason };
339
+
340
+ let media: any;
341
+ try {
342
+ media = await withTimeout(
343
+ (async () => msg.downloadMedia())(),
344
+ deps.timeoutMs ?? MEDIA_DOWNLOAD_TIMEOUT_MS
345
+ );
346
+ } catch (e) {
347
+ console.error(`Media download failed for ${userId}/${messageId}`, e);
348
+ return { mediaStatus: 'unavailable', mediaError: 'download_failed' };
349
+ }
350
+ // whatsapp-web.js resolves undefined when the media is no longer on
351
+ // WhatsApp's servers (old message, sender deleted it, …).
352
+ if (!media || !media.data) return { mediaStatus: 'unavailable', mediaError: 'expired' };
353
+
354
+ const data = Buffer.from(media.data, 'base64');
355
+ // The declared size is advisory — re-apply the caps to the real bytes.
356
+ const post = downloadPolicy(type, data.byteLength, viewOnce);
357
+ if (!post.download) return { mediaStatus: 'unavailable', mediaError: post.reason };
358
+
359
+ const mime = media.mimetype || msg?._data?.mimetype || 'application/octet-stream';
360
+ const filename = media.filename || msg?._data?.filename || null;
361
+ if (!writeMedia(userId, messageId, data, { mime, filename })) {
362
+ return { mediaStatus: 'unavailable', mediaError: 'download_failed' };
363
+ }
364
+ return availableResolution(mime, filename, data.byteLength);
365
+ }
366
+
367
+ function availableResolution(mime: string, filename: string | null, size: number): MediaResolution {
368
+ return {
369
+ mediaStatus: 'available',
370
+ mediaMime: mime,
371
+ ...(filename ? { mediaFilename: filename } : {}),
372
+ mediaSize: size
373
+ };
374
+ }
375
+
376
+ function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
377
+ return new Promise((resolvePromise, rejectPromise) => {
378
+ const timer = setTimeout(
379
+ () => rejectPromise(new Error(`media download timed out after ${ms}ms`)),
380
+ ms
381
+ );
382
+ promise.then(
383
+ (value) => { clearTimeout(timer); resolvePromise(value); },
384
+ (err) => { clearTimeout(timer); rejectPromise(err); }
385
+ );
386
+ });
387
+ }
388
+
389
+ // ── Route responses ──
390
+ //
391
+ // Full Response builders for GET/DELETE /media/:userId/:messageId so index.ts
392
+ // stays glue-only and the route contract is unit-testable. Neither handler may
393
+ // ever create a WhatsApp client (same fast-reject rule as GET /inbound): they
394
+ // touch only the on-disk store.
395
+
396
+ // X-WA-Token check shared by both /media routes — ENFORCED ONLY when the
397
+ // service has WHATSAPP_WEBHOOK_TOKEN set (mirrors the host's webhook receiver,
398
+ // which reuses the same shared secret in the other direction). Hashing both
399
+ // sides first gives constant-length inputs for the timing-safe comparison.
400
+ export function verifyMediaToken(provided: string | undefined, expected: string | undefined): boolean {
401
+ if (!expected) return true;
402
+ const a = createHash('sha256').update(provided ?? '').digest();
403
+ const b = createHash('sha256').update(expected).digest();
404
+ return timingSafeEqual(a, b);
405
+ }
406
+
407
+ // Keep stored filenames from smuggling header syntax (quotes, CR/LF) into
408
+ // Content-Disposition.
409
+ function headerSafeFilename(name: string): string {
410
+ return name.replace(/[^A-Za-z0-9@. _-]/g, '_');
411
+ }
412
+
413
+ export function mediaGetResponse(
414
+ userId: string,
415
+ messageId: string,
416
+ token: string | undefined,
417
+ expectedToken: string | undefined
418
+ ): Response {
419
+ if (!verifyMediaToken(token, expectedToken)) {
420
+ return Response.json({ error: 'unauthorized' }, { status: 401 });
421
+ }
422
+ const found = readMedia(userId, messageId); // sanitizes both ids itself
423
+ if (!found) {
424
+ // Unknown, swept, deleted AND invalid ids all answer the same 404 —
425
+ // the route must not reveal which.
426
+ return Response.json({ error: 'not_found' }, { status: 404 });
427
+ }
428
+ return new Response(found.data, {
429
+ status: 200,
430
+ headers: {
431
+ 'Content-Type': found.meta.mime || 'application/octet-stream',
432
+ 'Content-Length': String(found.data.byteLength),
433
+ 'Content-Disposition': found.meta.filename
434
+ ? `attachment; filename="${headerSafeFilename(found.meta.filename)}"`
435
+ : 'attachment'
436
+ }
437
+ });
438
+ }
439
+
440
+ // Idempotent by contract: the host calls this after attaching the bytes, and a
441
+ // retry (or a TTL sweep racing it) must not turn into an error.
442
+ export function mediaDeleteResponse(
443
+ userId: string,
444
+ messageId: string,
445
+ token: string | undefined,
446
+ expectedToken: string | undefined
447
+ ): Response {
448
+ if (!verifyMediaToken(token, expectedToken)) {
449
+ return Response.json({ error: 'unauthorized' }, { status: 401 });
450
+ }
451
+ deleteMedia(userId, messageId);
452
+ return Response.json({ success: true });
453
+ }
454
+
455
+ // Test helper: wipe in-memory state between examples (mirrors resetInboundState).
456
+ export function resetMediaState() {
457
+ cachedDiskBytes = null;
458
+ }
@@ -1,4 +1,4 @@
1
1
  module WhatsAppNotifier
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
 
4
4
  end
@@ -6,6 +6,30 @@ module WhatsAppNotifier
6
6
  class WebAdapter
7
7
  DEFAULT_OPEN_TIMEOUT = 5
8
8
  DEFAULT_READ_TIMEOUT = 30
9
+ # Media bytes can be tens of MB over a slow link — give the binary fetch a
10
+ # longer read window than the JSON control plane.
11
+ MEDIA_OPEN_TIMEOUT = 5
12
+ MEDIA_READ_TIMEOUT = 60
13
+
14
+ HTTP_CLASSES = {
15
+ post: Net::HTTP::Post,
16
+ get: Net::HTTP::Get,
17
+ delete: Net::HTTP::Delete
18
+ }.freeze
19
+
20
+ # Optional inbound keys introduced by the 0.7.0 service (media verdict +
21
+ # sender display name). Mapped ONLY when the wire payload carries them, so
22
+ # hosts can key-gate on has_media presence: a missing key means "0.6.0
23
+ # service, no media support", while has_media: false means "text message".
24
+ INBOUND_OPTIONAL_KEYS = {
25
+ has_media: %w[hasMedia has_media],
26
+ media_status: %w[mediaStatus media_status],
27
+ media_error: %w[mediaError media_error],
28
+ media_mime: %w[mediaMime media_mime],
29
+ media_filename: %w[mediaFilename media_filename],
30
+ media_size: %w[mediaSize media_size],
31
+ sender_name: %w[senderName sender_name]
32
+ }.freeze
9
33
 
10
34
  def self.default_base_url
11
35
  ENV["WHATSAPP_NOTIFIER_SERVICE_URL"] || ENV["WHATSAPP_SERVICE_URL"] || "http://127.0.0.1:3001"
@@ -60,15 +84,40 @@ module WhatsAppNotifier
60
84
  user_id = user_id_from(metadata)
61
85
  response = request(:get, "/inbound/#{user_id}")
62
86
  raw = response.is_a?(Hash) ? response["messages"] : response
63
- Array(raw).map do |m|
64
- {
65
- from: m["from"],
66
- body: m["body"],
67
- message_id: m["messageId"] || m["message_id"],
68
- timestamp: m["timestamp"],
69
- type: m["type"]
70
- }
71
- end
87
+ Array(raw).map { |m| map_inbound_message(m) }
88
+ end
89
+
90
+ # Fetches the raw bytes of a downloaded inbound media file. Returns
91
+ # { body:, mime:, filename:, size: } or nil when the service has no copy
92
+ # (never downloaded, swept by TTL, or already deleted).
93
+ #
94
+ # Deliberately NOT routed through #request: that path JSON-parses the
95
+ # response body (and host apps are known to patch it further), which would
96
+ # corrupt binary payloads.
97
+ def fetch_media(message_id:, metadata: {})
98
+ user_id = user_id_from(metadata)
99
+ res = binary_get("/media/#{user_id}/#{path_id(message_id)}")
100
+ return nil if res.code.to_s == "404"
101
+ raise "service request failed (#{res.code}): #{res.body}" unless res.is_a?(Net::HTTPSuccess)
102
+
103
+ body = res.body.to_s
104
+ {
105
+ body: body,
106
+ mime: res["Content-Type"],
107
+ filename: filename_from(res["Content-Disposition"]),
108
+ size: body.bytesize
109
+ }
110
+ end
111
+
112
+ # Removes the service's copy after the host has attached the bytes.
113
+ # Idempotent on the service side: deleting absent media still succeeds.
114
+ # A 0.6.0 service mid-rollout has no /media routes and answers 404 —
115
+ # degrade to { success: false } instead of raising, mirroring
116
+ # fetch_media's nil-on-404.
117
+ def delete_media(message_id:, metadata: {})
118
+ user_id = user_id_from(metadata)
119
+ response = request(:delete, "/media/#{user_id}/#{path_id(message_id)}", allow_404: true)
120
+ { success: response.fetch("success", false) }
72
121
  end
73
122
 
74
123
  # Logs the user out of WhatsApp and clears their saved session on the service.
@@ -84,18 +133,75 @@ module WhatsAppNotifier
84
133
  (metadata[:user_id] || metadata["user_id"] || "default").to_s
85
134
  end
86
135
 
87
- def request(method, path, body: nil)
136
+ def map_inbound_message(message)
137
+ mapped = {
138
+ from: message["from"],
139
+ body: message["body"],
140
+ message_id: message["messageId"] || message["message_id"],
141
+ timestamp: message["timestamp"],
142
+ type: message["type"]
143
+ }
144
+ INBOUND_OPTIONAL_KEYS.each do |key, wire_keys|
145
+ wire = wire_keys.find { |candidate| message.key?(candidate) }
146
+ mapped[key] = message[wire] if wire
147
+ end
148
+ mapped
149
+ end
150
+
151
+ # Mirror the service-side sanitizeId charset so a hostile message_id can
152
+ # never smuggle path separators or a query string into the request URL.
153
+ def path_id(message_id)
154
+ message_id.to_s.gsub(/[^A-Za-z0-9@._-]/, "")
155
+ end
156
+
157
+ def filename_from(content_disposition)
158
+ content_disposition.to_s[/filename="([^"]*)"/, 1]
159
+ end
160
+
161
+ # The /media routes are token-gated when the service has
162
+ # WHATSAPP_WEBHOOK_TOKEN set — the same shared secret the service uses to
163
+ # sign its webhook pushes, reused in the other direction.
164
+ def webhook_token
165
+ token = ENV["WHATSAPP_WEBHOOK_TOKEN"].to_s
166
+ token.empty? ? nil : token
167
+ end
168
+
169
+ # Net::HTTP does NOT infer TLS from the URL scheme — without an explicit
170
+ # use_ssl a https:// service URL would silently speak plaintext to port
171
+ # 443. Both request paths (JSON control plane + binary media fetch) must
172
+ # honor the scheme.
173
+ def use_ssl?(uri)
174
+ uri.scheme == "https"
175
+ end
176
+
177
+ def binary_get(path)
178
+ uri = URI.parse("#{@base_url}#{path}")
179
+ req = Net::HTTP::Get.new(uri.request_uri)
180
+ req["X-WA-Token"] = webhook_token if webhook_token
181
+
182
+ Net::HTTP.start(uri.host, uri.port,
183
+ use_ssl: use_ssl?(uri),
184
+ open_timeout: MEDIA_OPEN_TIMEOUT,
185
+ read_timeout: MEDIA_READ_TIMEOUT) { |http| http.request(req) }
186
+ end
187
+
188
+ def request(method, path, body: nil, allow_404: false)
88
189
  uri = URI.parse("#{@base_url}#{path}")
89
- klass = method == :post ? Net::HTTP::Post : Net::HTTP::Get
90
- req = klass.new(uri.request_uri)
190
+ req = HTTP_CLASSES.fetch(method).new(uri.request_uri)
91
191
  req["Content-Type"] = "application/json"
192
+ req["X-WA-Token"] = webhook_token if webhook_token
92
193
  req.body = JSON.generate(body) if body
93
194
 
94
195
  res = Net::HTTP.start(uri.host, uri.port,
196
+ use_ssl: use_ssl?(uri),
95
197
  open_timeout: @open_timeout,
96
198
  read_timeout: @read_timeout) { |http| http.request(req) }
97
199
  parsed = parse_body(res.body)
98
200
  return parsed if res.is_a?(Net::HTTPSuccess)
201
+ # Callers opting in treat "route/resource not there" as a soft miss
202
+ # (e.g. delete_media against a 0.6.0 service) — the parsed error body
203
+ # carries no "success" key, so they degrade rather than raise.
204
+ return parsed if allow_404 && res.code.to_s == "404"
99
205
 
100
206
  raise "service request failed (#{res.code}): #{parsed["error"] || res.body}"
101
207
  end
@@ -66,6 +66,14 @@ module WhatsAppNotifier
66
66
  client.fetch_inbound(provider: provider, metadata: metadata)
67
67
  end
68
68
 
69
+ def fetch_media(message_id:, provider: nil, metadata: {})
70
+ client.fetch_media(message_id: message_id, provider: provider, metadata: metadata)
71
+ end
72
+
73
+ def delete_media(message_id:, provider: nil, metadata: {})
74
+ client.delete_media(message_id: message_id, provider: provider, metadata: metadata)
75
+ end
76
+
69
77
  def logout(provider: nil, metadata: {})
70
78
  client.logout(provider: provider, metadata: metadata)
71
79
  end
data/spec/client_spec.rb CHANGED
@@ -67,6 +67,27 @@ RSpec.describe WhatsAppNotifier::Client do
67
67
  end
68
68
  end
69
69
 
70
+ it "delegates fetch_media and delete_media to the provider" do
71
+ Dir.mktmpdir do |dir|
72
+ config.provider = :web_automation
73
+ config.web_automation_enabled = true
74
+ config.web_session_path = File.join(dir, "session.json")
75
+ config.web_adapter = double(
76
+ send_message: { success: true, session: {} },
77
+ fetch_qr_code: "qr",
78
+ connection_status: { state: "AUTHENTICATED", authenticated: true },
79
+ fetch_media: { body: "bytes", mime: "image/jpeg", filename: nil, size: 5 },
80
+ delete_media: { success: true }
81
+ )
82
+ client = described_class.new(configuration: config)
83
+
84
+ expect(client.fetch_media(message_id: "m1", provider: :web_automation, metadata: { user_id: 1 }))
85
+ .to include(body: "bytes", size: 5)
86
+ expect(client.delete_media(message_id: "m1", provider: :web_automation, metadata: { user_id: 1 }))
87
+ .to eq(success: true)
88
+ end
89
+ end
90
+
70
91
  it "delegates logout to the provider" do
71
92
  Dir.mktmpdir do |dir|
72
93
  config.provider = :web_automation