whatsapp_notifier 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -3
- data/lib/generators/whatsapp_notifier/install_service_generator.rb +1 -0
- data/lib/whatsapp_notifier/client.rb +8 -0
- data/lib/whatsapp_notifier/providers/web_automation.rb +21 -0
- data/lib/whatsapp_notifier/services/web_automation/inbound.test.ts +149 -1
- data/lib/whatsapp_notifier/services/web_automation/inbound.ts +90 -2
- data/lib/whatsapp_notifier/services/web_automation/index.ts +42 -29
- data/lib/whatsapp_notifier/services/web_automation/media.test.ts +585 -0
- data/lib/whatsapp_notifier/services/web_automation/media.ts +458 -0
- data/lib/whatsapp_notifier/version.rb +1 -1
- data/lib/whatsapp_notifier/web_adapter.rb +118 -12
- data/lib/whatsapp_notifier.rb +8 -0
- data/spec/client_spec.rb +21 -0
- data/spec/generators/install_service_generator_spec.rb +12 -1
- data/spec/providers/web_automation_spec.rb +39 -0
- data/spec/web_adapter_spec.rb +176 -0
- data/spec/whatsapp_notifier_spec.rb +6 -0
- metadata +3 -1
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
// Inbound media store (v0.7.0)
|
|
2
|
+
//
|
|
3
|
+
// Downloads customer media (images, voice notes, documents) to disk and serves
|
|
4
|
+
// it back to the host over GET /media/:userId/:messageId. Kept separate from
|
|
5
|
+
// index.ts (which calls Bun.serve() at import time) so every piece — id
|
|
6
|
+
// sanitization, the size/type policy, the TTL sweep, the download pipeline and
|
|
7
|
+
// the route responses — can be unit-tested without booting Chromium.
|
|
8
|
+
//
|
|
9
|
+
// Layout: <media root>/<safeUser>/<safeMessageId> holds the raw bytes and
|
|
10
|
+
// <safeMessageId>~meta.json the sidecar { mime, filename, size, capturedAt }.
|
|
11
|
+
// The '~' sits OUTSIDE the sanitize charset, so a hostile message id ending in
|
|
12
|
+
// ".json" can never name-collide with (or overwrite) another message's sidecar
|
|
13
|
+
// — data files and sidecars live in disjoint namespaces by construction. The
|
|
14
|
+
// media root is <SESSION_DIR>/media in production (survives restarts that wipe
|
|
15
|
+
// the in-memory inbound queues); tests point it at a tmp dir via
|
|
16
|
+
// configureMedia, mirroring configureInbound.
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
existsSync,
|
|
20
|
+
mkdirSync,
|
|
21
|
+
readFileSync,
|
|
22
|
+
writeFileSync,
|
|
23
|
+
rmSync,
|
|
24
|
+
readdirSync,
|
|
25
|
+
statSync
|
|
26
|
+
} from 'fs';
|
|
27
|
+
import { join, resolve, sep } from 'path';
|
|
28
|
+
import { createHash, timingSafeEqual } from 'crypto';
|
|
29
|
+
|
|
30
|
+
export interface MediaMeta {
|
|
31
|
+
mime: string;
|
|
32
|
+
filename: string | null;
|
|
33
|
+
size: number;
|
|
34
|
+
capturedAt: number; // epoch ms
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export type MediaSkipReason = 'unsupported_type' | 'too_large' | 'disk_full';
|
|
38
|
+
export type MediaFailureReason = MediaSkipReason | 'expired' | 'download_failed' | 'invalid_id';
|
|
39
|
+
|
|
40
|
+
// The verdict captureInbound merges into the inbound payload. Structurally
|
|
41
|
+
// compatible with inbound.ts's optional media fields — every failure mode
|
|
42
|
+
// still surfaces the message itself, just without bytes.
|
|
43
|
+
export interface MediaResolution {
|
|
44
|
+
mediaStatus: 'available' | 'unavailable';
|
|
45
|
+
mediaError?: MediaFailureReason;
|
|
46
|
+
mediaMime?: string;
|
|
47
|
+
mediaFilename?: string;
|
|
48
|
+
mediaSize?: number;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export type MediaPolicy = { download: true } | { download: false; reason: MediaSkipReason };
|
|
52
|
+
|
|
53
|
+
// Inline media (image / voice note) caps at WhatsApp's own 16MB ceiling;
|
|
54
|
+
// documents get a separate, env-tunable cap. Envs are read lazily (not frozen
|
|
55
|
+
// at import) so the limits can be retuned per deployment and per test.
|
|
56
|
+
export const INLINE_MEDIA_MAX_BYTES = 16 * 1024 * 1024;
|
|
57
|
+
export const MEDIA_DOWNLOAD_TIMEOUT_MS = 30000;
|
|
58
|
+
|
|
59
|
+
// Malformed env values ("50GB", "2 days") parse to NaN, and every NaN
|
|
60
|
+
// comparison is false — the TTL sweep and the disk cap would silently
|
|
61
|
+
// disable themselves. Fall back to the default instead.
|
|
62
|
+
function envLimit(name: string, fallback: number): number {
|
|
63
|
+
const parsed = Number(process.env[name] || fallback);
|
|
64
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function mediaTtlMs(): number {
|
|
68
|
+
return envLimit('WHATSAPP_MEDIA_TTL_MS', 48 * 60 * 60 * 1000); // 48h
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function maxDocumentBytes(): number {
|
|
72
|
+
return envLimit('WHATSAPP_MEDIA_MAX_BYTES', 25 * 1024 * 1024); // 25MB
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function maxDiskBytes(): number {
|
|
76
|
+
return envLimit('WHATSAPP_MEDIA_MAX_DISK_BYTES', 5 * 1024 * 1024 * 1024); // 5GB
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ── Root + cap accounting ──
|
|
80
|
+
|
|
81
|
+
// index.ts wires this to <SESSION_BASE_DIR>/media; tests to a tmp dir.
|
|
82
|
+
let mediaRootResolver: () => string = () => './media';
|
|
83
|
+
// Total payload bytes on disk, kept incrementally by write/delete and
|
|
84
|
+
// recomputed by the sweep, so downloadPolicy's disk-full check is O(1).
|
|
85
|
+
let cachedDiskBytes: number | null = null;
|
|
86
|
+
|
|
87
|
+
export function configureMedia(rootResolver: () => string) {
|
|
88
|
+
mediaRootResolver = rootResolver;
|
|
89
|
+
cachedDiskBytes = null;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function mediaDiskBytes(): number {
|
|
93
|
+
if (cachedDiskBytes === null) cachedDiskBytes = computeDiskBytes();
|
|
94
|
+
return cachedDiskBytes;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function computeDiskBytes(): number {
|
|
98
|
+
let total = 0;
|
|
99
|
+
try {
|
|
100
|
+
const root = mediaRootResolver();
|
|
101
|
+
for (const user of readdirSync(root, { withFileTypes: true })) {
|
|
102
|
+
if (!user.isDirectory()) continue;
|
|
103
|
+
const dir = join(root, user.name);
|
|
104
|
+
for (const file of readdirSync(dir)) {
|
|
105
|
+
if (isSidecarName(file)) continue; // sidecars are negligible
|
|
106
|
+
try { total += statSync(join(dir, file)).size; } catch (_) { /* raced a delete */ }
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
} catch (_) { /* media root not created yet → nothing stored */ }
|
|
110
|
+
return total;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ── Id sanitization + path layout ──
|
|
114
|
+
|
|
115
|
+
// Both route params become path segments, so they must be reduced to a safe
|
|
116
|
+
// charset. WhatsApp message ids ("true_9199...@c.us_ABC") and our numeric user
|
|
117
|
+
// ids fit [A-Za-z0-9@._-] untouched; anything else is hostile or garbage.
|
|
118
|
+
export function sanitizeId(raw: unknown): string | null {
|
|
119
|
+
if (typeof raw !== 'string') return null;
|
|
120
|
+
const cleaned = raw.replace(/[^A-Za-z0-9@._-]/g, '');
|
|
121
|
+
if (!cleaned || cleaned.length > 200) return null;
|
|
122
|
+
if (/^\.+$/.test(cleaned)) return null; // '.', '..', … are path-segment hazards
|
|
123
|
+
return cleaned;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Sidecar names end with a suffix whose '~' is outside the sanitize charset:
|
|
127
|
+
// no sanitized message id can ever produce (or overwrite) a sidecar name, so
|
|
128
|
+
// the accounting/sweep/orphan logic can tell the two apart by name alone.
|
|
129
|
+
const SIDECAR_SUFFIX = '~meta.json';
|
|
130
|
+
|
|
131
|
+
function isSidecarName(file: string): boolean {
|
|
132
|
+
return file.endsWith(SIDECAR_SUFFIX);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function mediaPaths(
|
|
136
|
+
userId: string,
|
|
137
|
+
messageId: string
|
|
138
|
+
): { dir: string; dataPath: string; metaPath: string } | null {
|
|
139
|
+
const safeUser = sanitizeId(userId);
|
|
140
|
+
const safeMessage = sanitizeId(messageId);
|
|
141
|
+
if (!safeUser || !safeMessage) return null;
|
|
142
|
+
|
|
143
|
+
const root = resolve(mediaRootResolver());
|
|
144
|
+
const dir = resolve(root, safeUser);
|
|
145
|
+
const dataPath = resolve(dir, safeMessage);
|
|
146
|
+
// Belt and braces: even a sanitizer bug must never escape the media root.
|
|
147
|
+
if (!dir.startsWith(root + sep) || !dataPath.startsWith(dir + sep)) return null;
|
|
148
|
+
|
|
149
|
+
return { dir, dataPath, metaPath: `${dataPath}${SIDECAR_SUFFIX}` };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ── Store primitives ──
|
|
153
|
+
|
|
154
|
+
export function writeMedia(
|
|
155
|
+
userId: string,
|
|
156
|
+
messageId: string,
|
|
157
|
+
data: Uint8Array,
|
|
158
|
+
meta: { mime: string; filename?: string | null }
|
|
159
|
+
): boolean {
|
|
160
|
+
const paths = mediaPaths(userId, messageId);
|
|
161
|
+
if (!paths) return false;
|
|
162
|
+
try {
|
|
163
|
+
mkdirSync(paths.dir, { recursive: true });
|
|
164
|
+
writeFileSync(paths.dataPath, data);
|
|
165
|
+
const sidecar: MediaMeta = {
|
|
166
|
+
mime: meta.mime,
|
|
167
|
+
filename: meta.filename ?? null,
|
|
168
|
+
size: data.byteLength,
|
|
169
|
+
capturedAt: Date.now()
|
|
170
|
+
};
|
|
171
|
+
writeFileSync(paths.metaPath, JSON.stringify(sidecar));
|
|
172
|
+
if (cachedDiskBytes !== null) cachedDiskBytes += data.byteLength;
|
|
173
|
+
return true;
|
|
174
|
+
} catch (e) {
|
|
175
|
+
console.error(`Failed to persist media ${messageId} for ${userId}`, e);
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Returns the sidecar when BOTH the bytes and the sidecar are present —
|
|
181
|
+
// captureInbound uses this to skip re-downloading on a reconnect backfill.
|
|
182
|
+
export function mediaExists(userId: string, messageId: string): MediaMeta | null {
|
|
183
|
+
const paths = mediaPaths(userId, messageId);
|
|
184
|
+
if (!paths) return null;
|
|
185
|
+
try {
|
|
186
|
+
if (!existsSync(paths.dataPath) || !existsSync(paths.metaPath)) return null;
|
|
187
|
+
const raw = JSON.parse(readFileSync(paths.metaPath, 'utf8'));
|
|
188
|
+
return {
|
|
189
|
+
mime: typeof raw?.mime === 'string' ? raw.mime : 'application/octet-stream',
|
|
190
|
+
filename: typeof raw?.filename === 'string' ? raw.filename : null,
|
|
191
|
+
size: Number(raw?.size) || 0,
|
|
192
|
+
capturedAt: Number(raw?.capturedAt) || 0
|
|
193
|
+
};
|
|
194
|
+
} catch (_) {
|
|
195
|
+
return null; // corrupt sidecar → treat as absent (a re-download heals it)
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export function readMedia(userId: string, messageId: string): { data: Buffer; meta: MediaMeta } | null {
|
|
200
|
+
const meta = mediaExists(userId, messageId);
|
|
201
|
+
if (!meta) return null;
|
|
202
|
+
try {
|
|
203
|
+
return { data: readFileSync(mediaPaths(userId, messageId)!.dataPath), meta };
|
|
204
|
+
} catch (_) {
|
|
205
|
+
return null; // raced a sweep/delete between the exists check and the read
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Logout privacy contract: stored media belongs to the OLD pairing. POST
|
|
210
|
+
// /logout wipes the session dir and the inbound queue, but without this the
|
|
211
|
+
// customer photos/documents stayed on disk — fetchable via GET /media — for
|
|
212
|
+
// up to the 48h TTL after the operator severed the pairing. Same sanitize +
|
|
213
|
+
// containment rules as mediaPaths; recomputing the cached disk total keeps
|
|
214
|
+
// downloadPolicy's cap check honest after a bulk removal.
|
|
215
|
+
export function clearUserMedia(userId: string): boolean {
|
|
216
|
+
const safeUser = sanitizeId(userId);
|
|
217
|
+
if (!safeUser) return false;
|
|
218
|
+
const root = resolve(mediaRootResolver());
|
|
219
|
+
const dir = resolve(root, safeUser);
|
|
220
|
+
if (!dir.startsWith(root + sep)) return false;
|
|
221
|
+
try {
|
|
222
|
+
rmSync(dir, { recursive: true, force: true });
|
|
223
|
+
} catch (e) {
|
|
224
|
+
console.error(`Failed to clear media dir for ${userId}`, e);
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
cachedDiskBytes = computeDiskBytes();
|
|
228
|
+
return true;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Idempotent: deleting media that was never stored (or already swept) is fine.
|
|
232
|
+
export function deleteMedia(userId: string, messageId: string): boolean {
|
|
233
|
+
const paths = mediaPaths(userId, messageId);
|
|
234
|
+
if (!paths) return false;
|
|
235
|
+
const meta = mediaExists(userId, messageId);
|
|
236
|
+
try {
|
|
237
|
+
rmSync(paths.dataPath, { force: true });
|
|
238
|
+
rmSync(paths.metaPath, { force: true });
|
|
239
|
+
if (meta && cachedDiskBytes !== null) {
|
|
240
|
+
cachedDiskBytes = Math.max(0, cachedDiskBytes - meta.size);
|
|
241
|
+
}
|
|
242
|
+
return true;
|
|
243
|
+
} catch (e) {
|
|
244
|
+
console.error(`Failed to delete media ${messageId} for ${userId}`, e);
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// ── Download policy ──
|
|
250
|
+
|
|
251
|
+
// Stickers and videos are deliberately not downloaded (no CMS rendering need,
|
|
252
|
+
// videos routinely blow the cap); view-once media must not be persisted at
|
|
253
|
+
// all — the sender chose ephemerality.
|
|
254
|
+
const DOWNLOADABLE_TYPES = new Set(['image', 'audio', 'ptt', 'document']);
|
|
255
|
+
|
|
256
|
+
export function downloadPolicy(type: string, size: number, viewOnce = false): MediaPolicy {
|
|
257
|
+
if (viewOnce || !DOWNLOADABLE_TYPES.has(type)) return { download: false, reason: 'unsupported_type' };
|
|
258
|
+
const cap = type === 'document' ? maxDocumentBytes() : INLINE_MEDIA_MAX_BYTES;
|
|
259
|
+
if (size > cap) return { download: false, reason: 'too_large' };
|
|
260
|
+
if (mediaDiskBytes() + size > maxDiskBytes()) return { download: false, reason: 'disk_full' };
|
|
261
|
+
return { download: true };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// ── TTL sweep ──
|
|
265
|
+
|
|
266
|
+
// Remove media older than the TTL (the host attaches what it wants well within
|
|
267
|
+
// 48h; everything else is abandoned) plus orphaned sidecars, then refresh the
|
|
268
|
+
// disk-cap accounting. index.ts runs this on the existing reaper interval.
|
|
269
|
+
export function sweepExpired(nowMs = Date.now()): number {
|
|
270
|
+
const ttl = mediaTtlMs();
|
|
271
|
+
let removed = 0;
|
|
272
|
+
try {
|
|
273
|
+
const root = mediaRootResolver();
|
|
274
|
+
for (const user of readdirSync(root, { withFileTypes: true })) {
|
|
275
|
+
if (!user.isDirectory()) continue;
|
|
276
|
+
const dir = join(root, user.name);
|
|
277
|
+
for (const file of readdirSync(dir)) {
|
|
278
|
+
if (isSidecarName(file)) continue;
|
|
279
|
+
const dataPath = join(dir, file);
|
|
280
|
+
if (nowMs - capturedAtFor(dataPath, `${dataPath}${SIDECAR_SUFFIX}`) > ttl) {
|
|
281
|
+
rmSync(dataPath, { force: true });
|
|
282
|
+
rmSync(`${dataPath}${SIDECAR_SUFFIX}`, { force: true });
|
|
283
|
+
removed += 1;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// Sidecars whose payload is already gone are garbage regardless of age.
|
|
287
|
+
for (const file of readdirSync(dir)) {
|
|
288
|
+
if (isSidecarName(file) && !existsSync(join(dir, file.slice(0, -SIDECAR_SUFFIX.length)))) {
|
|
289
|
+
rmSync(join(dir, file), { force: true });
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
} catch (_) { /* media root not created yet → nothing to sweep */ }
|
|
294
|
+
cachedDiskBytes = computeDiskBytes();
|
|
295
|
+
return removed;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function capturedAtFor(dataPath: string, metaPath: string): number {
|
|
299
|
+
try {
|
|
300
|
+
const raw = JSON.parse(readFileSync(metaPath, 'utf8'));
|
|
301
|
+
const capturedAt = Number(raw?.capturedAt);
|
|
302
|
+
if (Number.isFinite(capturedAt) && capturedAt > 0) return capturedAt;
|
|
303
|
+
} catch (_) { /* missing/corrupt sidecar → fall back to the file clock */ }
|
|
304
|
+
try {
|
|
305
|
+
return statSync(dataPath).mtimeMs;
|
|
306
|
+
} catch (_) {
|
|
307
|
+
return 0; // unstattable → looks ancient → swept
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// ── Download pipeline ──
|
|
312
|
+
|
|
313
|
+
// Policy pre-check on the declared size → bounded downloadMedia() → policy
|
|
314
|
+
// re-check on the actual bytes → persist. Every failure mode returns a typed
|
|
315
|
+
// 'unavailable' verdict instead of throwing: the message itself must always
|
|
316
|
+
// reach the host, with or without its bytes.
|
|
317
|
+
export async function resolveMediaForMessage(
|
|
318
|
+
userId: string,
|
|
319
|
+
msg: any,
|
|
320
|
+
deps: { timeoutMs?: number } = {}
|
|
321
|
+
): Promise<MediaResolution> {
|
|
322
|
+
// Must mirror normalizeInbound's messageId fallback (inbound.ts) so the
|
|
323
|
+
// stored file is addressable by the id the host received.
|
|
324
|
+
const messageId = (msg?.id && msg.id._serialized) || `${msg?.from || ''}-${msg?.timestamp}`;
|
|
325
|
+
if (!mediaPaths(userId, messageId)) {
|
|
326
|
+
return { mediaStatus: 'unavailable', mediaError: 'invalid_id' };
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Reconnect backfill replays recent messages — serve the copy already on
|
|
330
|
+
// disk instead of re-downloading (and re-counting against the disk cap).
|
|
331
|
+
const existing = mediaExists(userId, messageId);
|
|
332
|
+
if (existing) return availableResolution(existing.mime, existing.filename, existing.size);
|
|
333
|
+
|
|
334
|
+
const type = msg?.type || 'chat';
|
|
335
|
+
const viewOnce = !!(msg?._data?.isViewOnce);
|
|
336
|
+
const declaredSize = Number(msg?._data?.size) || 0; // 0 = unknown → re-checked post-download
|
|
337
|
+
const pre = downloadPolicy(type, declaredSize, viewOnce);
|
|
338
|
+
if (!pre.download) return { mediaStatus: 'unavailable', mediaError: pre.reason };
|
|
339
|
+
|
|
340
|
+
let media: any;
|
|
341
|
+
try {
|
|
342
|
+
media = await withTimeout(
|
|
343
|
+
(async () => msg.downloadMedia())(),
|
|
344
|
+
deps.timeoutMs ?? MEDIA_DOWNLOAD_TIMEOUT_MS
|
|
345
|
+
);
|
|
346
|
+
} catch (e) {
|
|
347
|
+
console.error(`Media download failed for ${userId}/${messageId}`, e);
|
|
348
|
+
return { mediaStatus: 'unavailable', mediaError: 'download_failed' };
|
|
349
|
+
}
|
|
350
|
+
// whatsapp-web.js resolves undefined when the media is no longer on
|
|
351
|
+
// WhatsApp's servers (old message, sender deleted it, …).
|
|
352
|
+
if (!media || !media.data) return { mediaStatus: 'unavailable', mediaError: 'expired' };
|
|
353
|
+
|
|
354
|
+
const data = Buffer.from(media.data, 'base64');
|
|
355
|
+
// The declared size is advisory — re-apply the caps to the real bytes.
|
|
356
|
+
const post = downloadPolicy(type, data.byteLength, viewOnce);
|
|
357
|
+
if (!post.download) return { mediaStatus: 'unavailable', mediaError: post.reason };
|
|
358
|
+
|
|
359
|
+
const mime = media.mimetype || msg?._data?.mimetype || 'application/octet-stream';
|
|
360
|
+
const filename = media.filename || msg?._data?.filename || null;
|
|
361
|
+
if (!writeMedia(userId, messageId, data, { mime, filename })) {
|
|
362
|
+
return { mediaStatus: 'unavailable', mediaError: 'download_failed' };
|
|
363
|
+
}
|
|
364
|
+
return availableResolution(mime, filename, data.byteLength);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function availableResolution(mime: string, filename: string | null, size: number): MediaResolution {
|
|
368
|
+
return {
|
|
369
|
+
mediaStatus: 'available',
|
|
370
|
+
mediaMime: mime,
|
|
371
|
+
...(filename ? { mediaFilename: filename } : {}),
|
|
372
|
+
mediaSize: size
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
|
|
377
|
+
return new Promise((resolvePromise, rejectPromise) => {
|
|
378
|
+
const timer = setTimeout(
|
|
379
|
+
() => rejectPromise(new Error(`media download timed out after ${ms}ms`)),
|
|
380
|
+
ms
|
|
381
|
+
);
|
|
382
|
+
promise.then(
|
|
383
|
+
(value) => { clearTimeout(timer); resolvePromise(value); },
|
|
384
|
+
(err) => { clearTimeout(timer); rejectPromise(err); }
|
|
385
|
+
);
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ── Route responses ──
|
|
390
|
+
//
|
|
391
|
+
// Full Response builders for GET/DELETE /media/:userId/:messageId so index.ts
|
|
392
|
+
// stays glue-only and the route contract is unit-testable. Neither handler may
|
|
393
|
+
// ever create a WhatsApp client (same fast-reject rule as GET /inbound): they
|
|
394
|
+
// touch only the on-disk store.
|
|
395
|
+
|
|
396
|
+
// X-WA-Token check shared by both /media routes — ENFORCED ONLY when the
|
|
397
|
+
// service has WHATSAPP_WEBHOOK_TOKEN set (mirrors the host's webhook receiver,
|
|
398
|
+
// which reuses the same shared secret in the other direction). Hashing both
|
|
399
|
+
// sides first gives constant-length inputs for the timing-safe comparison.
|
|
400
|
+
export function verifyMediaToken(provided: string | undefined, expected: string | undefined): boolean {
|
|
401
|
+
if (!expected) return true;
|
|
402
|
+
const a = createHash('sha256').update(provided ?? '').digest();
|
|
403
|
+
const b = createHash('sha256').update(expected).digest();
|
|
404
|
+
return timingSafeEqual(a, b);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Keep stored filenames from smuggling header syntax (quotes, CR/LF) into
|
|
408
|
+
// Content-Disposition.
|
|
409
|
+
function headerSafeFilename(name: string): string {
|
|
410
|
+
return name.replace(/[^A-Za-z0-9@. _-]/g, '_');
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
export function mediaGetResponse(
|
|
414
|
+
userId: string,
|
|
415
|
+
messageId: string,
|
|
416
|
+
token: string | undefined,
|
|
417
|
+
expectedToken: string | undefined
|
|
418
|
+
): Response {
|
|
419
|
+
if (!verifyMediaToken(token, expectedToken)) {
|
|
420
|
+
return Response.json({ error: 'unauthorized' }, { status: 401 });
|
|
421
|
+
}
|
|
422
|
+
const found = readMedia(userId, messageId); // sanitizes both ids itself
|
|
423
|
+
if (!found) {
|
|
424
|
+
// Unknown, swept, deleted AND invalid ids all answer the same 404 —
|
|
425
|
+
// the route must not reveal which.
|
|
426
|
+
return Response.json({ error: 'not_found' }, { status: 404 });
|
|
427
|
+
}
|
|
428
|
+
return new Response(found.data, {
|
|
429
|
+
status: 200,
|
|
430
|
+
headers: {
|
|
431
|
+
'Content-Type': found.meta.mime || 'application/octet-stream',
|
|
432
|
+
'Content-Length': String(found.data.byteLength),
|
|
433
|
+
'Content-Disposition': found.meta.filename
|
|
434
|
+
? `attachment; filename="${headerSafeFilename(found.meta.filename)}"`
|
|
435
|
+
: 'attachment'
|
|
436
|
+
}
|
|
437
|
+
});
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Idempotent by contract: the host calls this after attaching the bytes, and a
|
|
441
|
+
// retry (or a TTL sweep racing it) must not turn into an error.
|
|
442
|
+
export function mediaDeleteResponse(
|
|
443
|
+
userId: string,
|
|
444
|
+
messageId: string,
|
|
445
|
+
token: string | undefined,
|
|
446
|
+
expectedToken: string | undefined
|
|
447
|
+
): Response {
|
|
448
|
+
if (!verifyMediaToken(token, expectedToken)) {
|
|
449
|
+
return Response.json({ error: 'unauthorized' }, { status: 401 });
|
|
450
|
+
}
|
|
451
|
+
deleteMedia(userId, messageId);
|
|
452
|
+
return Response.json({ success: true });
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Test helper: wipe in-memory state between examples (mirrors resetInboundState).
|
|
456
|
+
export function resetMediaState() {
|
|
457
|
+
cachedDiskBytes = null;
|
|
458
|
+
}
|
|
@@ -6,6 +6,30 @@ module WhatsAppNotifier
|
|
|
6
6
|
class WebAdapter
|
|
7
7
|
DEFAULT_OPEN_TIMEOUT = 5
|
|
8
8
|
DEFAULT_READ_TIMEOUT = 30
|
|
9
|
+
# Media bytes can be tens of MB over a slow link — give the binary fetch a
|
|
10
|
+
# longer read window than the JSON control plane.
|
|
11
|
+
MEDIA_OPEN_TIMEOUT = 5
|
|
12
|
+
MEDIA_READ_TIMEOUT = 60
|
|
13
|
+
|
|
14
|
+
HTTP_CLASSES = {
|
|
15
|
+
post: Net::HTTP::Post,
|
|
16
|
+
get: Net::HTTP::Get,
|
|
17
|
+
delete: Net::HTTP::Delete
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
# Optional inbound keys introduced by the 0.7.0 service (media verdict +
|
|
21
|
+
# sender display name). Mapped ONLY when the wire payload carries them, so
|
|
22
|
+
# hosts can key-gate on has_media presence: a missing key means "0.6.0
|
|
23
|
+
# service, no media support", while has_media: false means "text message".
|
|
24
|
+
INBOUND_OPTIONAL_KEYS = {
|
|
25
|
+
has_media: %w[hasMedia has_media],
|
|
26
|
+
media_status: %w[mediaStatus media_status],
|
|
27
|
+
media_error: %w[mediaError media_error],
|
|
28
|
+
media_mime: %w[mediaMime media_mime],
|
|
29
|
+
media_filename: %w[mediaFilename media_filename],
|
|
30
|
+
media_size: %w[mediaSize media_size],
|
|
31
|
+
sender_name: %w[senderName sender_name]
|
|
32
|
+
}.freeze
|
|
9
33
|
|
|
10
34
|
def self.default_base_url
|
|
11
35
|
ENV["WHATSAPP_NOTIFIER_SERVICE_URL"] || ENV["WHATSAPP_SERVICE_URL"] || "http://127.0.0.1:3001"
|
|
@@ -60,15 +84,40 @@ module WhatsAppNotifier
|
|
|
60
84
|
user_id = user_id_from(metadata)
|
|
61
85
|
response = request(:get, "/inbound/#{user_id}")
|
|
62
86
|
raw = response.is_a?(Hash) ? response["messages"] : response
|
|
63
|
-
Array(raw).map
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
87
|
+
Array(raw).map { |m| map_inbound_message(m) }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Fetches the raw bytes of a downloaded inbound media file. Returns
|
|
91
|
+
# { body:, mime:, filename:, size: } or nil when the service has no copy
|
|
92
|
+
# (never downloaded, swept by TTL, or already deleted).
|
|
93
|
+
#
|
|
94
|
+
# Deliberately NOT routed through #request: that path JSON-parses the
|
|
95
|
+
# response body (and host apps are known to patch it further), which would
|
|
96
|
+
# corrupt binary payloads.
|
|
97
|
+
def fetch_media(message_id:, metadata: {})
|
|
98
|
+
user_id = user_id_from(metadata)
|
|
99
|
+
res = binary_get("/media/#{user_id}/#{path_id(message_id)}")
|
|
100
|
+
return nil if res.code.to_s == "404"
|
|
101
|
+
raise "service request failed (#{res.code}): #{res.body}" unless res.is_a?(Net::HTTPSuccess)
|
|
102
|
+
|
|
103
|
+
body = res.body.to_s
|
|
104
|
+
{
|
|
105
|
+
body: body,
|
|
106
|
+
mime: res["Content-Type"],
|
|
107
|
+
filename: filename_from(res["Content-Disposition"]),
|
|
108
|
+
size: body.bytesize
|
|
109
|
+
}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Removes the service's copy after the host has attached the bytes.
|
|
113
|
+
# Idempotent on the service side: deleting absent media still succeeds.
|
|
114
|
+
# A 0.6.0 service mid-rollout has no /media routes and answers 404 —
|
|
115
|
+
# degrade to { success: false } instead of raising, mirroring
|
|
116
|
+
# fetch_media's nil-on-404.
|
|
117
|
+
def delete_media(message_id:, metadata: {})
|
|
118
|
+
user_id = user_id_from(metadata)
|
|
119
|
+
response = request(:delete, "/media/#{user_id}/#{path_id(message_id)}", allow_404: true)
|
|
120
|
+
{ success: response.fetch("success", false) }
|
|
72
121
|
end
|
|
73
122
|
|
|
74
123
|
# Logs the user out of WhatsApp and clears their saved session on the service.
|
|
@@ -84,18 +133,75 @@ module WhatsAppNotifier
|
|
|
84
133
|
(metadata[:user_id] || metadata["user_id"] || "default").to_s
|
|
85
134
|
end
|
|
86
135
|
|
|
87
|
-
def
|
|
136
|
+
def map_inbound_message(message)
|
|
137
|
+
mapped = {
|
|
138
|
+
from: message["from"],
|
|
139
|
+
body: message["body"],
|
|
140
|
+
message_id: message["messageId"] || message["message_id"],
|
|
141
|
+
timestamp: message["timestamp"],
|
|
142
|
+
type: message["type"]
|
|
143
|
+
}
|
|
144
|
+
INBOUND_OPTIONAL_KEYS.each do |key, wire_keys|
|
|
145
|
+
wire = wire_keys.find { |candidate| message.key?(candidate) }
|
|
146
|
+
mapped[key] = message[wire] if wire
|
|
147
|
+
end
|
|
148
|
+
mapped
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Mirror the service-side sanitizeId charset so a hostile message_id can
|
|
152
|
+
# never smuggle path separators or a query string into the request URL.
|
|
153
|
+
def path_id(message_id)
|
|
154
|
+
message_id.to_s.gsub(/[^A-Za-z0-9@._-]/, "")
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def filename_from(content_disposition)
|
|
158
|
+
content_disposition.to_s[/filename="([^"]*)"/, 1]
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# The /media routes are token-gated when the service has
|
|
162
|
+
# WHATSAPP_WEBHOOK_TOKEN set — the same shared secret the service uses to
|
|
163
|
+
# sign its webhook pushes, reused in the other direction.
|
|
164
|
+
def webhook_token
|
|
165
|
+
token = ENV["WHATSAPP_WEBHOOK_TOKEN"].to_s
|
|
166
|
+
token.empty? ? nil : token
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Net::HTTP does NOT infer TLS from the URL scheme — without an explicit
|
|
170
|
+
# use_ssl a https:// service URL would silently speak plaintext to port
|
|
171
|
+
# 443. Both request paths (JSON control plane + binary media fetch) must
|
|
172
|
+
# honor the scheme.
|
|
173
|
+
def use_ssl?(uri)
|
|
174
|
+
uri.scheme == "https"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def binary_get(path)
|
|
178
|
+
uri = URI.parse("#{@base_url}#{path}")
|
|
179
|
+
req = Net::HTTP::Get.new(uri.request_uri)
|
|
180
|
+
req["X-WA-Token"] = webhook_token if webhook_token
|
|
181
|
+
|
|
182
|
+
Net::HTTP.start(uri.host, uri.port,
|
|
183
|
+
use_ssl: use_ssl?(uri),
|
|
184
|
+
open_timeout: MEDIA_OPEN_TIMEOUT,
|
|
185
|
+
read_timeout: MEDIA_READ_TIMEOUT) { |http| http.request(req) }
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def request(method, path, body: nil, allow_404: false)
|
|
88
189
|
uri = URI.parse("#{@base_url}#{path}")
|
|
89
|
-
|
|
90
|
-
req = klass.new(uri.request_uri)
|
|
190
|
+
req = HTTP_CLASSES.fetch(method).new(uri.request_uri)
|
|
91
191
|
req["Content-Type"] = "application/json"
|
|
192
|
+
req["X-WA-Token"] = webhook_token if webhook_token
|
|
92
193
|
req.body = JSON.generate(body) if body
|
|
93
194
|
|
|
94
195
|
res = Net::HTTP.start(uri.host, uri.port,
|
|
196
|
+
use_ssl: use_ssl?(uri),
|
|
95
197
|
open_timeout: @open_timeout,
|
|
96
198
|
read_timeout: @read_timeout) { |http| http.request(req) }
|
|
97
199
|
parsed = parse_body(res.body)
|
|
98
200
|
return parsed if res.is_a?(Net::HTTPSuccess)
|
|
201
|
+
# Callers opting in treat "route/resource not there" as a soft miss
|
|
202
|
+
# (e.g. delete_media against a 0.6.0 service) — the parsed error body
|
|
203
|
+
# carries no "success" key, so they degrade rather than raise.
|
|
204
|
+
return parsed if allow_404 && res.code.to_s == "404"
|
|
99
205
|
|
|
100
206
|
raise "service request failed (#{res.code}): #{parsed["error"] || res.body}"
|
|
101
207
|
end
|
data/lib/whatsapp_notifier.rb
CHANGED
|
@@ -66,6 +66,14 @@ module WhatsAppNotifier
|
|
|
66
66
|
client.fetch_inbound(provider: provider, metadata: metadata)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
def fetch_media(message_id:, provider: nil, metadata: {})
|
|
70
|
+
client.fetch_media(message_id: message_id, provider: provider, metadata: metadata)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def delete_media(message_id:, provider: nil, metadata: {})
|
|
74
|
+
client.delete_media(message_id: message_id, provider: provider, metadata: metadata)
|
|
75
|
+
end
|
|
76
|
+
|
|
69
77
|
def logout(provider: nil, metadata: {})
|
|
70
78
|
client.logout(provider: provider, metadata: metadata)
|
|
71
79
|
end
|
data/spec/client_spec.rb
CHANGED
|
@@ -67,6 +67,27 @@ RSpec.describe WhatsAppNotifier::Client do
|
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
+
it "delegates fetch_media and delete_media to the provider" do
|
|
71
|
+
Dir.mktmpdir do |dir|
|
|
72
|
+
config.provider = :web_automation
|
|
73
|
+
config.web_automation_enabled = true
|
|
74
|
+
config.web_session_path = File.join(dir, "session.json")
|
|
75
|
+
config.web_adapter = double(
|
|
76
|
+
send_message: { success: true, session: {} },
|
|
77
|
+
fetch_qr_code: "qr",
|
|
78
|
+
connection_status: { state: "AUTHENTICATED", authenticated: true },
|
|
79
|
+
fetch_media: { body: "bytes", mime: "image/jpeg", filename: nil, size: 5 },
|
|
80
|
+
delete_media: { success: true }
|
|
81
|
+
)
|
|
82
|
+
client = described_class.new(configuration: config)
|
|
83
|
+
|
|
84
|
+
expect(client.fetch_media(message_id: "m1", provider: :web_automation, metadata: { user_id: 1 }))
|
|
85
|
+
.to include(body: "bytes", size: 5)
|
|
86
|
+
expect(client.delete_media(message_id: "m1", provider: :web_automation, metadata: { user_id: 1 }))
|
|
87
|
+
.to eq(success: true)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
70
91
|
it "delegates logout to the provider" do
|
|
71
92
|
Dir.mktmpdir do |dir|
|
|
72
93
|
config.provider = :web_automation
|