@daboss2003/liveness-web 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/engine.ts ADDED
@@ -0,0 +1,913 @@
1
+ export type LivenessCallbacks = {
2
+ onChallengeChanged?: (stepIndex: number, stepLabel: string) => void;
3
+ onFailure?: (reason: string) => void;
4
+ onSuccess?: (imageBase64: string) => void;
5
+ onFaceInOval?: (inside: boolean, reason?: string) => void;
6
+ onDebugFrame?: (info: { hasFace: boolean; metrics: Metrics | null; step: string }) => void;
7
+ };
8
+
9
+ export type LivenessSoundOptions = {
10
+ baseUrl?: string;
11
+ left?: string;
12
+ blink?: string;
13
+ right?: string;
14
+ nod?: string;
15
+ mouth?: string;
16
+ good?: string;
17
+ capture?: string;
18
+ };
19
+
20
+ export type LivenessOptions = {
21
+ videoElement: HTMLVideoElement;
22
+ canvasElement: HTMLCanvasElement;
23
+ modelUrl?: string;
24
+ wasmUrl?: string;
25
+ callbacks?: LivenessCallbacks;
26
+ sounds?: LivenessSoundOptions;
27
+ };
28
+
29
+ export const DEFAULT_MODEL_URL =
30
+ "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task";
31
+
32
+ export const DEFAULT_WASM_URL =
33
+ "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm";
34
+
35
+ /** Error code when CDN/assets are unavailable after retries (internet confirmed). */
36
+ export const LIVENESS_ERROR_CDN_NOT_AVAILABLE = "cdnNotAvailable" as const;
37
+ /** Error code when the user has no internet connection. */
38
+ export const LIVENESS_ERROR_OFFLINE = "offline" as const;
39
+
40
+ export function isCdnNotAvailableError(reason: string): boolean {
41
+ return reason === LIVENESS_ERROR_CDN_NOT_AVAILABLE;
42
+ }
43
+ export function isOfflineError(reason: string): boolean {
44
+ return reason === LIVENESS_ERROR_OFFLINE;
45
+ }
46
+
47
+ export class LivenessError extends Error {
48
+ constructor(
49
+ public readonly code: typeof LIVENESS_ERROR_CDN_NOT_AVAILABLE | typeof LIVENESS_ERROR_OFFLINE,
50
+ message: string
51
+ ) {
52
+ super(message);
53
+ this.name = "LivenessError";
54
+ Object.setPrototypeOf(this, LivenessError.prototype);
55
+ }
56
+ }
57
+
58
+ const CONNECTIVITY_CHECK_URL = "https://www.gstatic.com/generate_204";
59
+ const CONNECTIVITY_CHECK_TIMEOUT_MS = 5000;
60
+ const LOAD_ATTEMPT_TIMEOUT_MS = 45000;
61
+ const MAX_CDN_RETRIES = 5;
62
+
63
+ async function checkConnectivity(): Promise<boolean> {
64
+ if (typeof navigator !== "undefined" && !navigator.onLine) return false;
65
+ try {
66
+ const res = await fetch(CONNECTIVITY_CHECK_URL, {
67
+ method: "HEAD",
68
+ signal: AbortSignal.timeout(CONNECTIVITY_CHECK_TIMEOUT_MS),
69
+ });
70
+ return res.ok;
71
+ } catch {
72
+ return false;
73
+ }
74
+ }
75
+
76
+ function isRetriableCdnError(error: unknown): boolean {
77
+ if (error instanceof TypeError) return true;
78
+ const msg = error instanceof Error ? error.message : String(error);
79
+ const lower = msg.toLowerCase();
80
+ const retriablePatterns = [
81
+ "fetch",
82
+ "network",
83
+ "wasm",
84
+ "webassembly",
85
+ "load",
86
+ "404",
87
+ "503",
88
+ "502",
89
+ "500",
90
+ "timeout",
91
+ "failed to load",
92
+ ];
93
+ if (retriablePatterns.some((p) => lower.includes(p))) return true;
94
+ const status = (error as { status?: number })?.status;
95
+ if (typeof status === "number" && [404, 502, 503, 500].includes(status)) return true;
96
+ return false;
97
+ }
98
+
99
+ type NormalizedLandmark = { x: number; y: number; z: number };
100
+ type BlendshapeCategory = { categoryName: string; score: number };
101
+
102
+ type FaceLandmarkerResult = {
103
+ faceLandmarks: NormalizedLandmark[][];
104
+ facialTransformationMatrixes?: Array<{ data?: number[] | Float32Array; layout?: number; rows?: number; cols?: number } | number[]>;
105
+ faceBlendshapes?: Array<{ categories: BlendshapeCategory[] }>;
106
+ };
107
+
108
+ type FaceLandmarker = {
109
+ detectForVideo: (video: HTMLVideoElement, timestampMs: number) => FaceLandmarkerResult;
110
+ close: () => void;
111
+ };
112
+
113
+ type FilesetResolver = { forVisionTasks: (wasmUrl: string) => Promise<unknown> };
114
+ type TasksVisionModule = {
115
+ FaceLandmarker: {
116
+ createFromOptions: (vision: unknown, opts: Record<string, unknown>) => Promise<FaceLandmarker>;
117
+ };
118
+ FilesetResolver: FilesetResolver;
119
+ };
120
+
121
+ type LivenessStep = { index: number; label: string };
122
+
123
+ export type Metrics = {
124
+ yaw: number; // degrees, negative=left positive=right
125
+ pitch: number; // degrees, negative=up positive=down
126
+ ear: number;
127
+ mar: number;
128
+ blinkScore: number; // 0=open → 1=closed
129
+ mouthScore: number; // 0=closed → 1=open
130
+ faceCx: number;
131
+ faceCy: number;
132
+ faceSize: number;
133
+ };
134
+
135
+ const STEP_LABELS = [
136
+ "Turn your head LEFT",
137
+ "Blink",
138
+ "Turn your head RIGHT",
139
+ "Nod your head",
140
+ "Open your mouth",
141
+ ] as const;
142
+
143
+ /** Step label → sound key (filename without .mp3). Used so the correct sound plays regardless of randomized step order. */
144
+ const STEP_LABEL_TO_SOUND: Record<string, string> = {
145
+ "Turn your head LEFT": "left",
146
+ "Blink": "blink",
147
+ "Turn your head RIGHT": "right",
148
+ "Nod your head": "nod",
149
+ "Open your mouth": "mouth",
150
+ };
151
+
152
+ function shuffleArray<T>(array: readonly T[]): T[] {
153
+ const out = [...array];
154
+ for (let i = out.length - 1; i > 0; i--) {
155
+ const j = Math.floor(Math.random() * (i + 1));
156
+ [out[i], out[j]] = [out[j], out[i]];
157
+ }
158
+ return out;
159
+ }
160
+
161
+ const steps: LivenessStep[] = shuffleArray(STEP_LABELS).map((label, index) => ({ index, label }));
162
+
163
+ export const LIVENESS_STEP_COUNT = steps.length;
164
+
165
+ // ─────────────────────────────────────────────────────────────────────────────
166
+ // KEY DESIGN: RELATIVE MEASUREMENT
167
+ //
168
+ // Rather than fixed absolute thresholds, the engine samples the user's
169
+ // resting yaw/pitch at the start of each step (during the readyMs window)
170
+ // and measures CHANGE FROM THAT BASELINE.
171
+ //
172
+ // This fixes the core UX problem: someone sitting slightly turned or with
173
+ // a slightly tilted monitor should not need to fight their natural position.
174
+ //
175
+ // Head turn LEFT: yaw delta < -12° from baseline (a natural glance)
176
+ // Head turn RIGHT: yaw delta > +12° from baseline
177
+ // Nod down: pitch delta > +10° from baseline (chin dips toward chest)
178
+ // Nod up return: pitch delta < +3° (back near starting point, not below it)
179
+ //
180
+ // Blink and mouth use blendshapes which are already camera-relative.
181
+ // ─────────────────────────────────────────────────────────────────────────────
182
+ const config = {
183
+ readyMs: 1800, // ms to sample baseline before evaluating
184
+ sessionTimeoutMs: 120000,
185
+
186
+ // ── Baseline sampling ──────────────────────────────────────────────────────
187
+ // Number of frames averaged to produce the resting baseline per step
188
+ baselineFrames: 8,
189
+
190
+ // ── Head turns (relative to baseline) ─────────────────────────────────────
191
+ yawTurnDelta: 12, // degrees of YAW change needed from rest
192
+ yawWrongDirDelta: 16, // block if turned clearly the WRONG way
193
+ headTurnHoldMs: 120, // sustain the turned pose for this long
194
+
195
+ // ── Nod (relative to baseline) ────────────────────────────────────────────
196
+ nodDownDelta: 8, // chin must DROP by this many degrees from baseline
197
+ nodReturnFraction: 0.40, // return to 40% of peak nod depth to complete
198
+ nodReturnMaxDelta: 5, // cap: never require returning past 5° from baseline
199
+ maxYawDuringNod: 22,
200
+
201
+ // ── Blink ──────────────────────────────────────────────────────────────────
202
+ blinkClosedThreshold: 0.35, // blendshape score = eyes closed
203
+ blinkOpenThreshold: 0.20, // blendshape score = eyes open
204
+ earClosedThreshold: 0.20,
205
+ earOpenThreshold: 0.25,
206
+ blinkMaxDurationMs: 4000,
207
+ maxYawDuringBlink: 25,
208
+ maxPitchDuringBlink: 25,
209
+
210
+ // ── Mouth ──────────────────────────────────────────────────────────────────
211
+ mouthOpenThreshold: 0.28, // jawOpen blendshape
212
+ mouthOpenMarThreshold: 0.28,
213
+ mouthHoldMs: 120,
214
+ maxYawDuringMouth: 25,
215
+ maxPitchDuringMouth: 25,
216
+
217
+ // ── Face-in-oval ───────────────────────────────────────────────────────────
218
+ ovalCx: 0.50,
219
+ ovalCy: 0.42,
220
+ ovalRx: 0.32,
221
+ ovalRy: 0.40,
222
+ minFaceSize: 0.10,
223
+ maxFaceSize: 0.62,
224
+ headTurnSteps: new Set(["Turn your head LEFT", "Turn your head RIGHT"]),
225
+
226
+ // ── Capture ────────────────────────────────────────────────────────────────
227
+ captureDelayMs: 700,
228
+ captureMaxAttempts: 90,
229
+ captureMaxYaw: 18,
230
+ captureMaxPitch: 18,
231
+ captureMaxMouthScore: 0.20,
232
+ captureMaxBlinkScore: 0.25,
233
+ captureMinEar: 0.22,
234
+ captureMaxMar: 0.22,
235
+ } as const;
236
+
237
+ // ─────────────────────────────────────────────────────────────────────────────
238
+
239
+ export class LivenessEngine {
240
+ private landmarker: FaceLandmarker | null = null;
241
+ private running = false;
242
+ private rafId: number | null = null;
243
+ private stream: MediaStream | null = null;
244
+
245
+ private stepIndex = 0;
246
+ private stepStart = 0;
247
+
248
+ // ── Baseline (sampled during readyMs window) ───────────────────────────────
249
+ private baselineYaw: number | null = null;
250
+ private baselinePitch: number | null = null;
251
+ private baselineSamples: Array<{ yaw: number; pitch: number }> = [];
252
+
253
+ // ── Per-step sub-state ─────────────────────────────────────────────────────
254
+ private blinkState: "waitingClose" | "closed" = "waitingClose";
255
+ private blinkCloseTs = 0;
256
+ private nodState: "neutral" | "down" = "neutral";
257
+ private holdStart: number | null = null;
258
+
259
+ private latestMetrics: Metrics | null = null;
260
+ private nodPeakDPitch: number = 0;
261
+ private lastDetectTs = -1;
262
+ private lastOvalState: boolean | null = null;
263
+ private stepSoundPlayedForCurrentStep = false;
264
+ private currentStepAudio: HTMLAudioElement | null = null;
265
+ private currentStepAudioCleanup: (() => void) | null = null;
266
+ private sessionTimeoutId: number | null = null;
267
+
268
+ constructor(private opts: LivenessOptions) {}
269
+
270
+ private playSound(url: string, onEnded?: () => void): void {
271
+ const a = new Audio(url);
272
+ if (onEnded) {
273
+ const done = () => {
274
+ a.removeEventListener("ended", done);
275
+ a.removeEventListener("error", done);
276
+ onEnded();
277
+ };
278
+ a.addEventListener("ended", done);
279
+ a.addEventListener("error", done);
280
+ }
281
+ a.play().catch(() => onEnded?.());
282
+ }
283
+
284
+ private getSoundUrl(key: string): string | undefined {
285
+ const s = this.opts.sounds;
286
+ if (!s) return undefined;
287
+ const override = (s as Record<string, string | undefined>)[key];
288
+ if (override) return override;
289
+ const base = s.baseUrl;
290
+ if (!base) return undefined;
291
+ const baseNorm = base.replace(/\/?$/, "/");
292
+ return baseNorm + key + ".mp3";
293
+ }
294
+
295
+ private playStepSound(stepLabel: string): void {
296
+ const key = STEP_LABEL_TO_SOUND[stepLabel];
297
+ if (!key) return;
298
+ const url = this.getSoundUrl(key);
299
+ if (!url) return;
300
+ this.stopStepSound();
301
+ const a = new Audio(url);
302
+ const done = () => {
303
+ a.removeEventListener("ended", done);
304
+ a.removeEventListener("error", done);
305
+ if (this.currentStepAudio === a) this.currentStepAudio = null;
306
+ if (this.currentStepAudioCleanup === cleanup) this.currentStepAudioCleanup = null;
307
+ };
308
+ const cleanup = () => {
309
+ a.removeEventListener("ended", done);
310
+ a.removeEventListener("error", done);
311
+ };
312
+ this.currentStepAudio = a;
313
+ this.currentStepAudioCleanup = cleanup;
314
+ a.addEventListener("ended", done);
315
+ a.addEventListener("error", done);
316
+ a.play().catch(() => done());
317
+ }
318
+
319
+ private stopStepSound(): void {
320
+ if (this.currentStepAudio) {
321
+ this.currentStepAudio.pause();
322
+ this.currentStepAudio.currentTime = 0;
323
+ }
324
+ this.currentStepAudioCleanup?.();
325
+ this.currentStepAudioCleanup = null;
326
+ this.currentStepAudio = null;
327
+ }
328
+
329
+ private clearSessionTimeout(): void {
330
+ if (this.sessionTimeoutId != null) {
331
+ clearTimeout(this.sessionTimeoutId);
332
+ this.sessionTimeoutId = null;
333
+ }
334
+ }
335
+
336
+ private playGoodSound(onEnded?: () => void): void {
337
+ const url = this.getSoundUrl("good");
338
+ if (url) this.playSound(url, onEnded);
339
+ else onEnded?.();
340
+ }
341
+
342
+ private playCaptureSound(onEnded?: () => void): void {
343
+ const url = this.getSoundUrl("capture");
344
+ if (url) this.playSound(url, onEnded);
345
+ else onEnded?.();
346
+ }
347
+
348
+ // ── Public ─────────────────────────────────────────────────────────────────
349
+
350
+ async start(): Promise<void> {
351
+ this.stopDetectionOnly();
352
+ this.running = true;
353
+ this.stepIndex = 0;
354
+ this.lastDetectTs = -1;
355
+ this.lastOvalState = null;
356
+ const now = performance.now();
357
+ this.stepStart = now + config.readyMs;
358
+ this.resetStepState();
359
+ this.stepSoundPlayedForCurrentStep = false;
360
+ this.clearSessionTimeout();
361
+ this.sessionTimeoutId = setTimeout(() => {
362
+ if (this.running) this.fail("Timed out. Please try again.");
363
+ }, config.sessionTimeoutMs);
364
+ this.opts.callbacks?.onChallengeChanged?.(steps[0].index, steps[0].label);
365
+ await this.ensureVideo();
366
+ this.landmarker = await createLandmarkerWithRetry(this.opts, MAX_CDN_RETRIES);
367
+ this.loop();
368
+ }
369
+
370
+ stop(): void {
371
+ this.stopDetectionOnly();
372
+ this.stream?.getTracks().forEach(t => t.stop());
373
+ this.stream = null;
374
+ }
375
+
376
+ private stopDetectionOnly(): void {
377
+ this.running = false;
378
+ this.clearSessionTimeout();
379
+ this.stopStepSound();
380
+ if (this.rafId != null) { cancelAnimationFrame(this.rafId); this.rafId = null; }
381
+ if (this.landmarker) { this.landmarker.close(); this.landmarker = null; }
382
+ }
383
+
384
+ // ── Video ──────────────────────────────────────────────────────────────────
385
+
386
+ private async ensureVideo(): Promise<void> {
387
+ const video = this.opts.videoElement;
388
+ if (!video.srcObject) {
389
+ this.stream = await navigator.mediaDevices.getUserMedia({
390
+ video: { facingMode: "user", width: { ideal: 640 }, height: { ideal: 480 } },
391
+ audio: false,
392
+ });
393
+ video.srcObject = this.stream;
394
+ } else {
395
+ this.stream = video.srcObject as MediaStream;
396
+ }
397
+ video.playsInline = true;
398
+ await video.play();
399
+ await new Promise<void>(resolve => {
400
+ const check = () =>
401
+ video.readyState >= 2 && video.videoWidth > 0
402
+ ? resolve()
403
+ : requestAnimationFrame(check);
404
+ check();
405
+ });
406
+ }
407
+
408
+ // ── Landmarker ─────────────────────────────────────────────────────────────
409
+
410
+ private async createLandmarker(): Promise<FaceLandmarker> {
411
+ return loadLandmarkerOnce(
412
+ this.opts.modelUrl ?? DEFAULT_MODEL_URL,
413
+ this.opts.wasmUrl ?? DEFAULT_WASM_URL
414
+ );
415
+ }
416
+
417
+ // ── Loop ───────────────────────────────────────────────────────────────────
418
+
419
+ private loop(): void {
420
+ if (!this.running || !this.landmarker) return;
421
+
422
+ const now = performance.now();
423
+ const ts = now > this.lastDetectTs ? now : this.lastDetectTs + 1;
424
+ this.lastDetectTs = ts;
425
+
426
+ const result = this.landmarker.detectForVideo(this.opts.videoElement, ts);
427
+ const faceCount = result.faceLandmarks?.length ?? 0;
428
+ if (faceCount > 1) {
429
+ this.fail("Multiple faces detected. Please ensure only one person is in view.");
430
+ return;
431
+ }
432
+ const hasFace = faceCount > 0;
433
+
434
+ if (hasFace) {
435
+ const metrics = extractMetrics(result);
436
+ this.latestMetrics = metrics;
437
+
438
+ const { inside, reason } = this.checkFaceInOval(metrics);
439
+ if (inside !== this.lastOvalState) {
440
+ this.lastOvalState = inside;
441
+ this.opts.callbacks?.onFaceInOval?.(inside, reason);
442
+ }
443
+
444
+ // ── Sample baseline (keep sampling until captured) ─────────────────────
445
+ if (this.baselineYaw === null && inside) {
446
+ this.baselineSamples.push({ yaw: metrics.yaw, pitch: metrics.pitch });
447
+ if (this.baselineSamples.length >= config.baselineFrames) {
448
+ const yaws = this.baselineSamples.map(s => s.yaw).sort((a, b) => a - b);
449
+ const pitches = this.baselineSamples.map(s => s.pitch).sort((a, b) => a - b);
450
+ const mid = Math.floor(yaws.length / 2);
451
+ this.baselineYaw = yaws[mid];
452
+ this.baselinePitch = pitches[mid];
453
+ }
454
+ }
455
+
456
+ this.opts.callbacks?.onDebugFrame?.({
457
+ hasFace: true, metrics,
458
+ step: steps[this.stepIndex]?.label ?? "done",
459
+ });
460
+
461
+ if (inside) {
462
+ if (!this.stepSoundPlayedForCurrentStep && this.stepIndex < steps.length) {
463
+ this.stepSoundPlayedForCurrentStep = true;
464
+ this.playStepSound(steps[this.stepIndex].label);
465
+ }
466
+ if (this.updateState(metrics, now) === "passed") {
467
+ this.scheduleCapture();
468
+ return;
469
+ }
470
+ }
471
+ } else {
472
+ if (this.lastOvalState !== false) {
473
+ this.lastOvalState = false;
474
+ this.opts.callbacks?.onFaceInOval?.(false, "No face detected");
475
+ }
476
+ this.opts.callbacks?.onDebugFrame?.({
477
+ hasFace: false, metrics: null,
478
+ step: steps[this.stepIndex]?.label ?? "done",
479
+ });
480
+ }
481
+
482
+ this.rafId = requestAnimationFrame(() => this.loop());
483
+ }
484
+
485
+ // ── Oval check ─────────────────────────────────────────────────────────────
486
+
487
+ private checkFaceInOval(m: Metrics): { inside: boolean; reason?: string } {
488
+ const isHeadTurn = config.headTurnSteps.has(steps[this.stepIndex]?.label ?? "");
489
+ const mx = 1 - m.faceCx; // mirror x to match CSS scaleX(-1)
490
+ const dy = (m.faceCy - config.ovalCy) / config.ovalRy;
491
+ const dx = (mx - config.ovalCx) / config.ovalRx;
492
+
493
+ // During head turns only check vertical position — x drifts intentionally
494
+ const inEllipse = isHeadTurn
495
+ ? Math.abs(dy) <= 1
496
+ : dx * dx + dy * dy <= 1;
497
+
498
+ if (!inEllipse) {
499
+ if (Math.abs(dy) >= Math.abs(dx)) {
500
+ return { inside: false, reason: dy < 0 ? "Move down slightly" : "Move up slightly" };
501
+ }
502
+ return { inside: false, reason: dx < 0 ? "Move right" : "Move left" };
503
+ }
504
+
505
+ if (m.faceSize < config.minFaceSize) return { inside: false, reason: "Move closer to the camera" };
506
+ if (m.faceSize > config.maxFaceSize) return { inside: false, reason: "Move back a little" };
507
+
508
+ return { inside: true };
509
+ }
510
+
511
+ // ── State machine ──────────────────────────────────────────────────────────
512
+
513
+ private resetStepState(): void {
514
+ this.blinkState = "waitingClose";
515
+ this.blinkCloseTs = 0;
516
+ this.nodState = "neutral";
517
+ this.holdStart = null;
518
+ this.baselineYaw = null;
519
+ this.baselinePitch = null;
520
+ this.baselineSamples = [];
521
+ }
522
+
523
+ private updateState(metrics: Metrics, now: number): "passed" | "none" {
524
+ if (now < this.stepStart) return "none"; // in ready countdown
525
+ if (this.baselineYaw === null || this.baselinePitch === null) return "none";
526
+
527
+ const bYaw = this.baselineYaw ?? metrics.yaw;
528
+ const bPitch = this.baselinePitch ?? metrics.pitch;
529
+ const dYaw = metrics.yaw - bYaw;
530
+ const dPitch = metrics.pitch - bPitch;
531
+
532
+ switch (steps[this.stepIndex].label) {
533
+
534
+ // ── LEFT turn (negative yaw delta = turning left from rest) ─────────────
535
+ case "Turn your head LEFT": {
536
+ if (dYaw > config.yawWrongDirDelta) { this.holdStart = null; return "none"; }
537
+ if (dYaw < -config.yawTurnDelta) {
538
+ if (this.holdStart === null) this.holdStart = now;
539
+ if (now - this.holdStart >= config.headTurnHoldMs) return this.advanceStep(now);
540
+ } else {
541
+ this.holdStart = null;
542
+ }
543
+ break;
544
+ }
545
+
546
+ // ── RIGHT turn (positive yaw delta = turning right from rest) ──────────
547
+ case "Turn your head RIGHT": {
548
+ if (dYaw < -config.yawWrongDirDelta) { this.holdStart = null; return "none"; }
549
+ if (dYaw > config.yawTurnDelta) {
550
+ if (this.holdStart === null) this.holdStart = now;
551
+ if (now - this.holdStart >= config.headTurnHoldMs) return this.advanceStep(now);
552
+ } else {
553
+ this.holdStart = null;
554
+ }
555
+ break;
556
+ }
557
+
558
+ // ── BLINK ──────────────────────────────────────────────────────────────
559
+ case "Blink": {
560
+ if (Math.abs(metrics.yaw) > config.maxYawDuringBlink ||
561
+ Math.abs(metrics.pitch) > config.maxPitchDuringBlink) return "none";
562
+
563
+ const isEyeClosed = metrics.blinkScore > 0
564
+ ? metrics.blinkScore > config.blinkClosedThreshold
565
+ : metrics.ear < config.earClosedThreshold;
566
+
567
+ const isEyeOpen = metrics.blinkScore > 0
568
+ ? metrics.blinkScore < config.blinkOpenThreshold
569
+ : metrics.ear > config.earOpenThreshold;
570
+
571
+ if (this.blinkState === "waitingClose" && isEyeClosed) {
572
+ this.blinkState = "closed";
573
+ this.blinkCloseTs = now;
574
+ } else if (this.blinkState === "closed" && isEyeOpen) {
575
+ if (now - this.blinkCloseTs <= config.blinkMaxDurationMs) return this.advanceStep(now);
576
+ this.blinkState = "waitingClose";
577
+ }
578
+ break;
579
+ }
580
+
581
+ // ── NOD (dPitch > 0 = chin dropping; completion = back within nodReturnDelta) ─
582
+ case "Nod your head": {
583
+ if (Math.abs(dYaw) > config.maxYawDuringNod) return "none";
584
+
585
+ if (this.nodState === "neutral") {
586
+ if (dPitch > config.nodDownDelta) {
587
+ this.nodState = "down";
588
+ this.nodPeakDPitch = dPitch;
589
+ }
590
+ } else if (this.nodState === "down") {
591
+ // Keep updating peak in case they nod deeper
592
+ if (dPitch > this.nodPeakDPitch) this.nodPeakDPitch = dPitch;
593
+
594
+ // Return target: proportional to how deep they nodded,
595
+ // capped so a very deep nod doesn't need a huge return
596
+ const returnTarget = Math.min(
597
+ this.nodPeakDPitch * config.nodReturnFraction,
598
+ config.nodReturnMaxDelta
599
+ );
600
+
601
+ if (dPitch < returnTarget) return this.advanceStep(now);
602
+ }
603
+ break;
604
+ }
605
+
606
+ // ── OPEN MOUTH ─────────────────────────────────────────────────────────
607
+ case "Open your mouth": {
608
+ if (Math.abs(metrics.yaw) > config.maxYawDuringMouth ||
609
+ Math.abs(metrics.pitch) > config.maxPitchDuringMouth) return "none";
610
+
611
+ const isMouthOpen = metrics.mouthScore > 0
612
+ ? metrics.mouthScore > config.mouthOpenThreshold
613
+ : metrics.mar > config.mouthOpenMarThreshold;
614
+
615
+ if (isMouthOpen) {
616
+ if (this.holdStart === null) this.holdStart = now;
617
+ // Short hold prevents accidental trigger from talking/yawning
618
+ if (now - this.holdStart >= config.mouthHoldMs) return this.advanceStep(now);
619
+ } else {
620
+ this.holdStart = null;
621
+ }
622
+ break;
623
+ }
624
+ }
625
+
626
+ return "none";
627
+ }
628
+
629
+ private advanceStep(now: number): "passed" | "none" {
630
+ this.stopStepSound();
631
+ this.stepIndex += 1;
632
+ if (this.stepIndex >= steps.length) {
633
+ this.playGoodSound();
634
+ return "passed";
635
+ }
636
+ this.stepStart = now + config.readyMs;
637
+ this.resetStepState();
638
+ this.stepSoundPlayedForCurrentStep = true;
639
+ const step = steps[this.stepIndex];
640
+ this.opts.callbacks?.onChallengeChanged?.(step.index, step.label);
641
+ this.playGoodSound(() => this.playStepSound(step.label));
642
+ return "none";
643
+ }
644
+
645
+ private fail(reason: string): void {
646
+ this.opts.callbacks?.onFailure?.(reason);
647
+ this.stopDetectionOnly();
648
+ }
649
+
650
+ // ── Capture ────────────────────────────────────────────────────────────────
651
+
652
+ private scheduleCapture(): void {
653
+ let attempts = 0;
654
+
655
+ // Tell the UI to prompt the user to relax their face
656
+ this.opts.callbacks?.onChallengeChanged?.(-1, "Relax and look at the camera");
657
+
658
+ const tryCapture = () => {
659
+ if (!this.running || !this.landmarker) return;
660
+ attempts++;
661
+
662
+ const now = performance.now();
663
+ const ts = now > this.lastDetectTs ? now : this.lastDetectTs + 1;
664
+ this.lastDetectTs = ts;
665
+
666
+ const result = this.landmarker.detectForVideo(this.opts.videoElement, ts);
667
+ const faceCount = result.faceLandmarks?.length ?? 0;
668
+ if (faceCount > 1) {
669
+ this.fail("Multiple faces detected. Please ensure only one person is in view.");
670
+ return;
671
+ }
672
+ if (faceCount > 0) {
673
+ const metrics = extractMetrics(result);
674
+ this.latestMetrics = metrics;
675
+
676
+ // ── Neutral face check ─────────────────────────────────────────────
677
+ // Head must be roughly forward
678
+ const headFrontal =
679
+ Math.abs(metrics.yaw) <= config.captureMaxYaw &&
680
+ Math.abs(metrics.pitch) <= config.captureMaxPitch;
681
+
682
+ // Eyes must be open (not blinking or squinting)
683
+ const eyesOpen = metrics.blinkScore > 0
684
+ ? metrics.blinkScore < config.captureMaxBlinkScore
685
+ : metrics.ear >= config.captureMinEar;
686
+
687
+ // Mouth must be closed — this is the key fix
688
+ const mouthClosed = metrics.mouthScore > 0
689
+ ? metrics.mouthScore < config.captureMaxMouthScore
690
+ : metrics.mar < config.captureMaxMar;
691
+
692
+ if (headFrontal && eyesOpen && mouthClosed) {
693
+ this.captureImage();
694
+ return;
695
+ }
696
+ }
697
+
698
+ if (attempts >= config.captureMaxAttempts) {
699
+ this.fail("Please look straight at the camera with a relaxed expression.");
700
+ return;
701
+ }
702
+
703
+ this.rafId = requestAnimationFrame(tryCapture);
704
+ };
705
+
706
+ const startCaptureLoop = () => {
707
+ // Short delay so the user has time to close their mouth after the last step
708
+ setTimeout(() => { this.rafId = requestAnimationFrame(tryCapture); }, config.captureDelayMs);
709
+ };
710
+
711
+ // Play capture sound and only start the capture loop after it finishes
712
+ this.playCaptureSound(startCaptureLoop);
713
+ }
714
+
715
+ private captureImage(): void {
716
+ const canvas = this.opts.canvasElement;
717
+ const video = this.opts.videoElement;
718
+ canvas.width = video.videoWidth;
719
+ canvas.height = video.videoHeight;
720
+ const ctx = canvas.getContext("2d");
721
+ if (!ctx) { this.fail("Canvas unavailable"); return; }
722
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
723
+ const base64 = canvas.toDataURL("image/jpeg", 0.95).split(",")[1] ?? "";
724
+ this.opts.callbacks?.onSuccess?.(base64);
725
+ this.stop();
726
+ }
727
+ }
728
+
729
+ // ── Metric extraction ────────────────────────────────────────────────────────
730
+
731
+ function extractMetrics(result: FaceLandmarkerResult): Metrics {
732
+ const lks = result.faceLandmarks[0];
733
+ const { yaw, pitch } = extractPose(result, lks);
734
+ const { leftEar, rightEar } = computeEar(lks);
735
+ const mar = computeMar(lks);
736
+
737
+ const bs = result.faceBlendshapes?.[0]?.categories ?? [];
738
+ const getBS = (name: string) => bs.find(c => c.categoryName === name)?.score ?? 0;
739
+
740
+ const blinkL = getBS("eyeBlinkLeft"), blinkR = getBS("eyeBlinkRight");
741
+ const blinkScore = (blinkL > 0 || blinkR > 0) ? (blinkL + blinkR) / 2 : 0;
742
+ const mouthScore = getBS("jawOpen");
743
+
744
+ // Face centre: mean of all landmarks
745
+ let sumX = 0, sumY = 0;
746
+ for (const lm of lks) { sumX += lm.x; sumY += lm.y; }
747
+ const faceCx = sumX / lks.length;
748
+ const faceCy = sumY / lks.length;
749
+
750
+ // Face size: normalised inter-eye distance
751
+ const faceSize = dist(lks[33], lks[263]);
752
+
753
+ return {
754
+ yaw, pitch,
755
+ ear: (leftEar + rightEar) / 2,
756
+ mar, blinkScore, mouthScore,
757
+ faceCx, faceCy, faceSize,
758
+ };
759
+ }
760
+
761
+ function extractPose(result: FaceLandmarkerResult, lks: NormalizedLandmark[]) {
762
+ const mats = result.facialTransformationMatrixes;
763
+ const first = Array.isArray(mats) ? mats[0] : undefined;
764
+ const data = Array.isArray(first) ? first
765
+ : first && "data" in (first as object)
766
+ ? (first as { data?: number[] | Float32Array }).data
767
+ : undefined;
768
+ const layout = !Array.isArray(first) && first && "layout" in (first as object)
769
+ ? (first as { layout?: number }).layout
770
+ : undefined;
771
+
772
+ if (data && data.length >= 16) {
773
+ // MatrixData is column-major by default; handle row-major if provided.
774
+ const rowMajor = layout === 1;
775
+ const r00 = rowMajor ? data[0] : data[0];
776
+ const r02 = rowMajor ? data[2] : data[8];
777
+ const r10 = rowMajor ? data[4] : data[1];
778
+ const r12 = rowMajor ? data[6] : data[9];
779
+ const r20 = rowMajor ? data[8] : data[2];
780
+ const r22 = rowMajor ? data[10] : data[10];
781
+
782
+ // Use the face forward vector (column 2) for stable yaw/pitch.
783
+ let fx = r02, fy = r12, fz = r22;
784
+ const fLen = Math.hypot(fx, fy, fz) || 1;
785
+ fx /= fLen; fy /= fLen; fz /= fLen;
786
+
787
+ const yaw = -Math.atan2(fx, fz); // negative=left, positive=right
788
+ const pitch = Math.atan2(-fy, Math.hypot(fx, fz)); // negative=up, positive=down
789
+
790
+ // Approximate roll from the right vector (column 0).
791
+ const rLen = Math.hypot(r00, r10, r20) || 1;
792
+ const roll = Math.atan2(r10 / rLen, r00 / rLen);
793
+ return {
794
+ yaw: toDeg(yaw),
795
+ pitch: toDeg(pitch),
796
+ roll: toDeg(roll),
797
+ };
798
+ }
799
+
800
+ // Landmark fallback
801
+ const le = lks[33], re = lks[263], n = lks[1], ch = lks[152];
802
+ return {
803
+ yaw: toDeg(Math.atan2(re.z - le.z, re.x - le.x)),
804
+ pitch: toDeg(Math.atan2(ch.y - n.y, ch.z - n.z)),
805
+ roll: toDeg(Math.atan2(re.y - le.y, re.x - le.x)),
806
+ };
807
+ }
808
+
809
+ function computeEar(lks: NormalizedLandmark[]) {
810
+ return {
811
+ leftEar: ear(lks[33], lks[133], lks[160], lks[158], lks[153], lks[144]),
812
+ rightEar: ear(lks[362],lks[263], lks[385], lks[387], lks[373], lks[380]),
813
+ };
814
+ }
815
+
816
+ function computeMar(lks: NormalizedLandmark[]) {
817
+ const h = dist(lks[61], lks[291]);
818
+ return h === 0 ? 0 : dist(lks[13], lks[14]) / h;
819
+ }
820
+
821
+ function ear(o: NormalizedLandmark, i: NormalizedLandmark, t1: NormalizedLandmark, t2: NormalizedLandmark, b1: NormalizedLandmark, b2: NormalizedLandmark) {
822
+ const h = dist(o, i);
823
+ return h === 0 ? 0 : (dist(t1, b1) + dist(t2, b2)) / (2 * h);
824
+ }
825
+
826
+ function dist(a: NormalizedLandmark, b: NormalizedLandmark) {
827
+ return Math.hypot(a.x - b.x, a.y - b.y);
828
+ }
829
+
830
+ function toDeg(r: number) { return (r * 180) / Math.PI; }
831
+
832
+ async function loadTasksVision(): Promise<TasksVisionModule> {
833
+ return (await import("https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest")) as unknown as TasksVisionModule;
834
+ }
835
+
836
+ async function loadLandmarkerOnce(modelUrl: string, wasmUrl: string): Promise<FaceLandmarker> {
837
+ const module = await loadTasksVision();
838
+ const vision = await module.FilesetResolver.forVisionTasks(wasmUrl);
839
+ return module.FaceLandmarker.createFromOptions(vision, {
840
+ baseOptions: {
841
+ modelAssetPath: modelUrl,
842
+ delegate: "GPU",
843
+ },
844
+ runningMode: "VIDEO",
845
+ numFaces: 2,
846
+ outputFaceBlendshapes: true,
847
+ outputFacialTransformationMatrixes: true,
848
+ });
849
+ }
850
+
851
+ function withTimeout<T>(p: Promise<T>, ms: number): Promise<T> {
852
+ return new Promise((resolve, reject) => {
853
+ const t = setTimeout(() => reject(new Error("timeout")), ms);
854
+ p.then(
855
+ (v) => {
856
+ clearTimeout(t);
857
+ resolve(v);
858
+ },
859
+ (e) => {
860
+ clearTimeout(t);
861
+ reject(e);
862
+ }
863
+ );
864
+ });
865
+ }
866
+
867
+ async function createLandmarkerWithRetry(
868
+ opts: LivenessOptions,
869
+ maxAttempts: number
870
+ ): Promise<FaceLandmarker> {
871
+ const modelUrl = opts.modelUrl ?? DEFAULT_MODEL_URL;
872
+ const wasmUrl = opts.wasmUrl ?? DEFAULT_WASM_URL;
873
+ let lastError: unknown;
874
+
875
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
876
+ try {
877
+ const landmarker = await withTimeout(
878
+ loadLandmarkerOnce(modelUrl, wasmUrl),
879
+ LOAD_ATTEMPT_TIMEOUT_MS
880
+ );
881
+ return landmarker;
882
+ } catch (err) {
883
+ lastError = err;
884
+ if (!isRetriableCdnError(err)) throw err;
885
+
886
+ if (attempt === 1) {
887
+ const online = await checkConnectivity();
888
+ if (!online) {
889
+ if (typeof console !== "undefined" && console.debug) {
890
+ console.debug("liveness: connectivity check failed (offline)");
891
+ }
892
+ throw new LivenessError(LIVENESS_ERROR_OFFLINE, "No internet connection");
893
+ }
894
+ }
895
+
896
+ if (attempt < maxAttempts) {
897
+ if (typeof console !== "undefined" && console.debug) {
898
+ console.debug(`liveness: cdn-retry attempt ${attempt + 1}/${maxAttempts}`);
899
+ }
900
+ } else {
901
+ if (typeof console !== "undefined" && console.debug) {
902
+ console.debug("liveness: cdnNotAvailable after max retries");
903
+ }
904
+ throw new LivenessError(
905
+ LIVENESS_ERROR_CDN_NOT_AVAILABLE,
906
+ "CDN not available. Please try again later."
907
+ );
908
+ }
909
+ }
910
+ }
911
+
912
+ throw lastError;
913
+ }