@doedja/scenecut 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -107
- package/bin/cli.js +91 -25
- package/dist/decoder/ffmpeg-decoder.d.ts +15 -3
- package/dist/decoder/ffmpeg-decoder.d.ts.map +1 -1
- package/dist/decoder/ffmpeg-decoder.js +135 -14
- package/dist/decoder/ffmpeg-decoder.js.map +1 -1
- package/dist/detection/detector.d.ts.map +1 -1
- package/dist/detection/detector.js +134 -17
- package/dist/detection/detector.js.map +1 -1
- package/dist/detection/temporal-smoother.d.ts +32 -0
- package/dist/detection/temporal-smoother.d.ts.map +1 -0
- package/dist/detection/temporal-smoother.js +88 -0
- package/dist/detection/temporal-smoother.js.map +1 -0
- package/dist/detection/wasm-bridge.d.ts +26 -23
- package/dist/detection/wasm-bridge.d.ts.map +1 -1
- package/dist/detection/wasm-bridge.js +107 -62
- package/dist/detection/wasm-bridge.js.map +1 -1
- package/dist/detection.wasm.js +1 -1
- package/dist/detection.wasm.wasm +0 -0
- package/dist/index.d.ts +13 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +27 -1
- package/dist/index.js.map +1 -1
- package/dist/keyframes.cjs.js +488 -94
- package/dist/keyframes.cjs.js.map +1 -1
- package/dist/keyframes.esm.js +487 -95
- package/dist/keyframes.esm.js.map +1 -1
- package/dist/types/index.d.ts +36 -1
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/keyframes.esm.js
CHANGED
|
@@ -255,31 +255,66 @@ class RingBuffer {
|
|
|
255
255
|
this.availableBytes += chunkSize;
|
|
256
256
|
}
|
|
257
257
|
/**
|
|
258
|
-
* Read data from the ring buffer
|
|
258
|
+
* Read data from the ring buffer (allocates new buffer)
|
|
259
259
|
*/
|
|
260
260
|
read(size) {
|
|
261
261
|
if (size > this.availableBytes) {
|
|
262
262
|
throw new Error('RingBuffer underflow: not enough data available');
|
|
263
263
|
}
|
|
264
264
|
const result = Buffer.allocUnsafe(size);
|
|
265
|
+
this.readIntoBuffer(result, 0, size);
|
|
266
|
+
return result;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Read data directly into a pre-allocated Uint8Array (zero-allocation)
|
|
270
|
+
*/
|
|
271
|
+
readInto(target, offset, size) {
|
|
272
|
+
if (size > this.availableBytes) {
|
|
273
|
+
throw new Error('RingBuffer underflow: not enough data available');
|
|
274
|
+
}
|
|
265
275
|
const endSpace = this.capacity - this.readPos;
|
|
266
276
|
if (size <= endSpace) {
|
|
267
277
|
// No wrap-around needed
|
|
268
|
-
|
|
278
|
+
for (let i = 0; i < size; i++) {
|
|
279
|
+
target[offset + i] = this.buffer[this.readPos + i];
|
|
280
|
+
}
|
|
269
281
|
this.readPos += size;
|
|
270
282
|
}
|
|
271
283
|
else {
|
|
272
284
|
// Wrap-around: split read
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
285
|
+
for (let i = 0; i < endSpace; i++) {
|
|
286
|
+
target[offset + i] = this.buffer[this.readPos + i];
|
|
287
|
+
}
|
|
288
|
+
const remaining = size - endSpace;
|
|
289
|
+
for (let i = 0; i < remaining; i++) {
|
|
290
|
+
target[offset + endSpace + i] = this.buffer[i];
|
|
291
|
+
}
|
|
292
|
+
this.readPos = remaining;
|
|
276
293
|
}
|
|
277
294
|
// Wrap read position if at end
|
|
278
295
|
if (this.readPos >= this.capacity) {
|
|
279
296
|
this.readPos = 0;
|
|
280
297
|
}
|
|
281
298
|
this.availableBytes -= size;
|
|
282
|
-
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Internal: read into a Node.js Buffer
|
|
302
|
+
*/
|
|
303
|
+
readIntoBuffer(result, offset, size) {
|
|
304
|
+
const endSpace = this.capacity - this.readPos;
|
|
305
|
+
if (size <= endSpace) {
|
|
306
|
+
this.buffer.copy(result, offset, this.readPos, this.readPos + size);
|
|
307
|
+
this.readPos += size;
|
|
308
|
+
}
|
|
309
|
+
else {
|
|
310
|
+
this.buffer.copy(result, offset, this.readPos, this.capacity);
|
|
311
|
+
this.buffer.copy(result, offset + endSpace, 0, size - endSpace);
|
|
312
|
+
this.readPos = size - endSpace;
|
|
313
|
+
}
|
|
314
|
+
if (this.readPos >= this.capacity) {
|
|
315
|
+
this.readPos = 0;
|
|
316
|
+
}
|
|
317
|
+
this.availableBytes -= size;
|
|
283
318
|
}
|
|
284
319
|
/**
|
|
285
320
|
* Get number of bytes available to read
|
|
@@ -308,7 +343,7 @@ class FFmpegDecoder {
|
|
|
308
343
|
this.frameBuffer = new FrameBuffer(this.options.maxBufferFrames);
|
|
309
344
|
}
|
|
310
345
|
/**
|
|
311
|
-
* Get video metadata
|
|
346
|
+
* Get video metadata (with richer codec/format info)
|
|
312
347
|
*/
|
|
313
348
|
async getMetadata() {
|
|
314
349
|
if (this.metadata) {
|
|
@@ -335,7 +370,10 @@ class FFmpegDecoder {
|
|
|
335
370
|
resolution: {
|
|
336
371
|
width: videoStream.width || 0,
|
|
337
372
|
height: videoStream.height || 0
|
|
338
|
-
}
|
|
373
|
+
},
|
|
374
|
+
codec: videoStream.codec_name || undefined,
|
|
375
|
+
pixelFormat: videoStream.pix_fmt || undefined,
|
|
376
|
+
bitrate: metadata.format.bit_rate ? parseInt(String(metadata.format.bit_rate)) : undefined
|
|
339
377
|
};
|
|
340
378
|
resolve(this.metadata);
|
|
341
379
|
});
|
|
@@ -354,16 +392,26 @@ class FFmpegDecoder {
|
|
|
354
392
|
/**
|
|
355
393
|
* Extract frames as grayscale data
|
|
356
394
|
*
|
|
395
|
+
* Uses pre-allocated alternating buffers to eliminate double allocation.
|
|
396
|
+
* Auto-sizes ring buffer based on video resolution.
|
|
397
|
+
*
|
|
357
398
|
* @param onFrame Callback for each frame
|
|
358
399
|
* @param onProgress Optional progress callback
|
|
400
|
+
* @param signal Optional AbortSignal for cancellation
|
|
359
401
|
*/
|
|
360
|
-
async extractFrames(onFrame, onProgress) {
|
|
402
|
+
async extractFrames(onFrame, onProgress, signal) {
|
|
361
403
|
const metadata = await this.getMetadata();
|
|
362
404
|
const { width, height } = metadata.resolution;
|
|
363
405
|
return new Promise((resolve, reject) => {
|
|
364
406
|
let frameNumber = 0;
|
|
365
|
-
const ringBuffer = new RingBuffer(); // 8MB ring buffer
|
|
366
407
|
const frameSize = width * height; // Grayscale: 1 byte per pixel
|
|
408
|
+
// Auto-size ring buffer based on resolution (min 4MB, fits 3 frames)
|
|
409
|
+
const ringBufferSize = Math.max(4 * 1024 * 1024, frameSize * 3);
|
|
410
|
+
const ringBuffer = new RingBuffer(ringBufferSize);
|
|
411
|
+
// Pre-allocate two alternating frame buffers (eliminates double allocation)
|
|
412
|
+
const frameBufferA = new Uint8Array(frameSize);
|
|
413
|
+
const frameBufferB = new Uint8Array(frameSize);
|
|
414
|
+
let useBufferA = true;
|
|
367
415
|
const command = ffmpeg.default(this.videoPath)
|
|
368
416
|
.outputOptions([
|
|
369
417
|
'-f', 'image2pipe',
|
|
@@ -377,20 +425,38 @@ class FFmpegDecoder {
|
|
|
377
425
|
resolve();
|
|
378
426
|
});
|
|
379
427
|
const stream = command.pipe();
|
|
428
|
+
// Listen for abort signal
|
|
429
|
+
if (signal) {
|
|
430
|
+
const onAbort = () => {
|
|
431
|
+
stream.destroy();
|
|
432
|
+
reject(new Error('Detection aborted'));
|
|
433
|
+
};
|
|
434
|
+
if (signal.aborted) {
|
|
435
|
+
stream.destroy();
|
|
436
|
+
reject(new Error('Detection aborted'));
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
440
|
+
}
|
|
380
441
|
stream.on('data', async (chunk) => {
|
|
381
|
-
// Write chunk to ring buffer
|
|
442
|
+
// Write chunk to ring buffer
|
|
382
443
|
ringBuffer.write(chunk);
|
|
383
444
|
// Process complete frames
|
|
384
445
|
while (ringBuffer.available() >= frameSize) {
|
|
385
|
-
const frameData = ringBuffer.read(frameSize);
|
|
386
446
|
// Skip frames if requested
|
|
387
447
|
if (this.options.skipFrames > 0 && frameNumber % (this.options.skipFrames + 1) !== 0) {
|
|
448
|
+
// Still need to consume the data from the ring buffer
|
|
449
|
+
ringBuffer.read(frameSize);
|
|
388
450
|
frameNumber++;
|
|
389
451
|
continue;
|
|
390
452
|
}
|
|
391
|
-
//
|
|
453
|
+
// Read directly into pre-allocated buffer (zero-allocation)
|
|
454
|
+
const targetBuffer = useBufferA ? frameBufferA : frameBufferB;
|
|
455
|
+
ringBuffer.readInto(targetBuffer, 0, frameSize);
|
|
456
|
+
useBufferA = !useBufferA;
|
|
457
|
+
// Create RawFrame (reuses the pre-allocated buffer - no copy)
|
|
392
458
|
const frame = {
|
|
393
|
-
data:
|
|
459
|
+
data: targetBuffer,
|
|
394
460
|
width,
|
|
395
461
|
height,
|
|
396
462
|
stride: width,
|
|
@@ -460,6 +526,59 @@ class FFmpegDecoder {
|
|
|
460
526
|
});
|
|
461
527
|
});
|
|
462
528
|
}
|
|
529
|
+
/**
|
|
530
|
+
* Extract multiple frame images in a single FFmpeg invocation
|
|
531
|
+
* Uses FFmpeg's select filter to avoid N+1 process spawning
|
|
532
|
+
*
|
|
533
|
+
* @param frameNumbers Array of frame numbers to extract
|
|
534
|
+
* @param options Image extraction options
|
|
535
|
+
*/
|
|
536
|
+
async extractFrameImages(frameNumbers, options) {
|
|
537
|
+
const metadata = await this.getMetadata();
|
|
538
|
+
// Ensure output directory exists
|
|
539
|
+
if (!fs.existsSync(options.outputDir)) {
|
|
540
|
+
fs.mkdirSync(options.outputDir, { recursive: true });
|
|
541
|
+
}
|
|
542
|
+
const format = options.format || 'jpg';
|
|
543
|
+
const quality = options.quality || 85;
|
|
544
|
+
const template = options.filenameTemplate || 'scene_{frame}';
|
|
545
|
+
// Build FFmpeg select filter expression
|
|
546
|
+
// select='eq(n,100)+eq(n,200)+eq(n,300)'
|
|
547
|
+
const selectExpr = frameNumbers.map(n => `eq(n\\,${n})`).join('+');
|
|
548
|
+
return new Promise((resolve, reject) => {
|
|
549
|
+
const outputPaths = [];
|
|
550
|
+
// Generate output filenames
|
|
551
|
+
for (const frameNum of frameNumbers) {
|
|
552
|
+
const timestamp = frameNum / metadata.fps;
|
|
553
|
+
const filename = template
|
|
554
|
+
.replace('{frame}', String(frameNum))
|
|
555
|
+
.replace('{timestamp}', timestamp.toFixed(3));
|
|
556
|
+
outputPaths.push(path.join(options.outputDir, `${filename}.${format}`));
|
|
557
|
+
}
|
|
558
|
+
// Use FFmpeg with select filter and output pattern
|
|
559
|
+
const outputPattern = path.join(options.outputDir, `${template.replace('{frame}', '%d').replace('{timestamp}', '%d')}.${format}`);
|
|
560
|
+
const outputOptions = [
|
|
561
|
+
'-vf', `select='${selectExpr}',setpts=N/TB`,
|
|
562
|
+
'-vsync', 'vfr'
|
|
563
|
+
];
|
|
564
|
+
if (format === 'jpg') {
|
|
565
|
+
outputOptions.push('-qscale:v', String(Math.round((100 - quality) / 3.33)));
|
|
566
|
+
}
|
|
567
|
+
if (options.width) {
|
|
568
|
+
outputOptions.push('-vf', `select='${selectExpr}',scale=${options.width}:-1,setpts=N/TB`);
|
|
569
|
+
}
|
|
570
|
+
ffmpeg.default(this.videoPath)
|
|
571
|
+
.outputOptions(outputOptions)
|
|
572
|
+
.output(outputPattern)
|
|
573
|
+
.on('error', (err) => {
|
|
574
|
+
reject(new Error(`FFmpeg frame extraction error: ${err.message}`));
|
|
575
|
+
})
|
|
576
|
+
.on('end', () => {
|
|
577
|
+
resolve(outputPaths);
|
|
578
|
+
})
|
|
579
|
+
.run();
|
|
580
|
+
});
|
|
581
|
+
}
|
|
463
582
|
/**
|
|
464
583
|
* Get the frame buffer
|
|
465
584
|
*/
|
|
@@ -482,19 +601,27 @@ class FFmpegDecoder {
|
|
|
482
601
|
* - Memory allocation and management
|
|
483
602
|
* - Calling WASM functions
|
|
484
603
|
* - Data marshalling between JS and WASM
|
|
604
|
+
* - Double-buffering to avoid redundant frame copies
|
|
485
605
|
*/
|
|
486
606
|
class WasmBridge {
|
|
487
607
|
constructor() {
|
|
488
608
|
this.module = null;
|
|
489
609
|
this.initialized = false;
|
|
490
|
-
//
|
|
491
|
-
|
|
492
|
-
this.
|
|
493
|
-
this.
|
|
494
|
-
|
|
495
|
-
this.
|
|
496
|
-
this.
|
|
610
|
+
// Double-buffered WASM pointers for frame processing
|
|
611
|
+
// Slot A and Slot B raw frame buffers
|
|
612
|
+
this.slotARawPtr = 0;
|
|
613
|
+
this.slotBRawPtr = 0;
|
|
614
|
+
// Slot A and Slot B padded frame buffers
|
|
615
|
+
this.slotAPaddedPtr = 0;
|
|
616
|
+
this.slotBPaddedPtr = 0;
|
|
617
|
+
// Which slot currently holds the "previous" frame (true = A, false = B)
|
|
618
|
+
this.prevIsSlotA = true;
|
|
619
|
+
// Whether the previous slot has valid padded data
|
|
620
|
+
this.prevSlotPadded = false;
|
|
621
|
+
this.allocatedFrameSize = 0;
|
|
622
|
+
this.allocatedPaddedSize = 0;
|
|
497
623
|
}
|
|
624
|
+
// Frame dimensions (reserved for future use in validation/resizing)
|
|
498
625
|
/**
|
|
499
626
|
* Initialize the WASM module
|
|
500
627
|
*/
|
|
@@ -527,11 +654,8 @@ class WasmBridge {
|
|
|
527
654
|
}
|
|
528
655
|
}
|
|
529
656
|
/**
|
|
530
|
-
* Pre-allocate WASM buffers for frame processing
|
|
531
|
-
*
|
|
532
|
-
*
|
|
533
|
-
* @param width Frame width
|
|
534
|
-
* @param height Frame height
|
|
657
|
+
* Pre-allocate WASM buffers for frame processing.
|
|
658
|
+
* Allocates double-buffered raw + padded slots and pre-allocates the MB array.
|
|
535
659
|
*/
|
|
536
660
|
allocateBuffers(width, height) {
|
|
537
661
|
this.ensureInitialized();
|
|
@@ -539,63 +663,104 @@ class WasmBridge {
|
|
|
539
663
|
const paddedSize = this.module._calculate_padded_size(width, height);
|
|
540
664
|
// Allocate or re-allocate raw frame buffers if size changed
|
|
541
665
|
if (frameSize !== this.allocatedFrameSize) {
|
|
542
|
-
if (this.
|
|
543
|
-
this.module._free(this.
|
|
544
|
-
if (this.
|
|
545
|
-
this.module._free(this.
|
|
546
|
-
this.
|
|
547
|
-
this.
|
|
666
|
+
if (this.slotARawPtr)
|
|
667
|
+
this.module._free(this.slotARawPtr);
|
|
668
|
+
if (this.slotBRawPtr)
|
|
669
|
+
this.module._free(this.slotBRawPtr);
|
|
670
|
+
this.slotARawPtr = this.module._malloc(frameSize);
|
|
671
|
+
this.slotBRawPtr = this.module._malloc(frameSize);
|
|
548
672
|
this.allocatedFrameSize = frameSize;
|
|
549
673
|
}
|
|
550
674
|
// Allocate or re-allocate padded frame buffers if size changed
|
|
551
675
|
if (paddedSize !== this.allocatedPaddedSize) {
|
|
552
|
-
if (this.
|
|
553
|
-
this.module._free(this.
|
|
554
|
-
if (this.
|
|
555
|
-
this.module._free(this.
|
|
556
|
-
this.
|
|
557
|
-
this.
|
|
676
|
+
if (this.slotAPaddedPtr)
|
|
677
|
+
this.module._free(this.slotAPaddedPtr);
|
|
678
|
+
if (this.slotBPaddedPtr)
|
|
679
|
+
this.module._free(this.slotBPaddedPtr);
|
|
680
|
+
this.slotAPaddedPtr = this.module._malloc(paddedSize);
|
|
681
|
+
this.slotBPaddedPtr = this.module._malloc(paddedSize);
|
|
558
682
|
this.allocatedPaddedSize = paddedSize;
|
|
559
683
|
}
|
|
684
|
+
// Reset double-buffer state
|
|
685
|
+
this.prevIsSlotA = true;
|
|
686
|
+
this.prevSlotPadded = false;
|
|
687
|
+
// Pre-allocate macroblock array in WASM
|
|
688
|
+
const mbResult = this.module._allocate_mb_array(width, height);
|
|
689
|
+
if (mbResult === 0) {
|
|
690
|
+
throw new Error('Failed to pre-allocate macroblock array in WASM');
|
|
691
|
+
}
|
|
560
692
|
}
|
|
561
693
|
/**
|
|
562
|
-
* Detect scene change between two frames
|
|
694
|
+
* Detect scene change between two frames using double-buffering.
|
|
563
695
|
*
|
|
564
|
-
*
|
|
565
|
-
*
|
|
696
|
+
* On first call, both frames are copied and padded.
|
|
697
|
+
* On subsequent calls, only the new current frame is copied and padded;
|
|
698
|
+
* the previous frame is already in WASM memory from the last call.
|
|
566
699
|
*
|
|
567
700
|
* @param prevFrame Previous frame
|
|
568
701
|
* @param curFrame Current frame
|
|
569
702
|
* @param intraCount Number of consecutive non-scene-change frames
|
|
570
|
-
* @param fcode Motion search range parameter
|
|
571
|
-
* @
|
|
703
|
+
* @param fcode Motion search range parameter
|
|
704
|
+
* @param intraThresh Primary intra threshold
|
|
705
|
+
* @param intraThresh2 Secondary intra threshold (sSAD comparison)
|
|
706
|
+
* @returns Scene change result with confidence score
|
|
572
707
|
*/
|
|
573
|
-
detectSceneChange(prevFrame, curFrame, intraCount, fcode = 4) {
|
|
708
|
+
detectSceneChange(prevFrame, curFrame, intraCount, fcode = 4, intraThresh = 2000, intraThresh2 = 90) {
|
|
574
709
|
this.ensureInitialized();
|
|
575
710
|
// Validate inputs
|
|
576
711
|
if (prevFrame.width !== curFrame.width || prevFrame.height !== curFrame.height) {
|
|
577
712
|
throw new Error('Frame dimensions must match');
|
|
578
713
|
}
|
|
579
|
-
// Ensure buffers are allocated
|
|
580
|
-
if (!this.
|
|
714
|
+
// Ensure buffers are allocated
|
|
715
|
+
if (!this.slotARawPtr || this.allocatedFrameSize !== prevFrame.data.length) {
|
|
581
716
|
this.allocateBuffers(prevFrame.width, prevFrame.height);
|
|
582
717
|
}
|
|
583
|
-
//
|
|
584
|
-
this.
|
|
585
|
-
this.
|
|
586
|
-
|
|
587
|
-
this.
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
718
|
+
// Determine which slot is "prev" and which is "cur"
|
|
719
|
+
const prevRawPtr = this.prevIsSlotA ? this.slotARawPtr : this.slotBRawPtr;
|
|
720
|
+
const prevPaddedPtr = this.prevIsSlotA ? this.slotAPaddedPtr : this.slotBPaddedPtr;
|
|
721
|
+
const curRawPtr = this.prevIsSlotA ? this.slotBRawPtr : this.slotARawPtr;
|
|
722
|
+
const curPaddedPtr = this.prevIsSlotA ? this.slotBPaddedPtr : this.slotAPaddedPtr;
|
|
723
|
+
// Copy and pad previous frame only if not already valid in WASM
|
|
724
|
+
if (!this.prevSlotPadded) {
|
|
725
|
+
this.module.HEAPU8.set(prevFrame.data, prevRawPtr);
|
|
726
|
+
this.module._pad_frame(prevRawPtr, prevPaddedPtr, prevFrame.width, prevFrame.height);
|
|
727
|
+
}
|
|
728
|
+
// Always copy and pad the new current frame
|
|
729
|
+
this.module.HEAPU8.set(curFrame.data, curRawPtr);
|
|
730
|
+
this.module._pad_frame(curRawPtr, curPaddedPtr, curFrame.width, curFrame.height);
|
|
731
|
+
// Run motion estimation with parameterized thresholds
|
|
732
|
+
const rawScore = this.module._MEanalysis_js(prevPaddedPtr, curPaddedPtr, prevFrame.width, prevFrame.height, intraCount, fcode, intraThresh, intraThresh2);
|
|
733
|
+
// Check for WASM error
|
|
734
|
+
if (rawScore === -1) {
|
|
735
|
+
throw new Error('WASM memory allocation failed during scene detection. ' +
|
|
736
|
+
`Frame size: ${prevFrame.width}x${prevFrame.height}. ` +
|
|
737
|
+
'The video resolution may be too high for available WASM memory.');
|
|
738
|
+
}
|
|
739
|
+
// Swap roles: current slot becomes previous for next call
|
|
740
|
+
this.prevIsSlotA = !this.prevIsSlotA;
|
|
741
|
+
this.prevSlotPadded = true;
|
|
742
|
+
// Determine scene change and confidence
|
|
743
|
+
const isSceneChange = rawScore >= intraThresh2;
|
|
744
|
+
// Normalize confidence: 0 when at threshold, 1 at 2x threshold
|
|
745
|
+
// For non-scene-changes, confidence represents "how close" (0 = very far from threshold)
|
|
746
|
+
let confidence;
|
|
747
|
+
if (isSceneChange) {
|
|
748
|
+
confidence = Math.min(1.0, rawScore / (intraThresh2 * 2));
|
|
749
|
+
}
|
|
750
|
+
else {
|
|
751
|
+
confidence = intraThresh2 > 0 ? Math.min(1.0, rawScore / intraThresh2) : 0;
|
|
752
|
+
}
|
|
753
|
+
return { isSceneChange, confidence };
|
|
754
|
+
}
|
|
755
|
+
/**
|
|
756
|
+
* Reset double-buffer state (e.g., after a seek or when starting fresh)
|
|
757
|
+
*/
|
|
758
|
+
resetBufferState() {
|
|
759
|
+
this.prevIsSlotA = true;
|
|
760
|
+
this.prevSlotPadded = false;
|
|
592
761
|
}
|
|
593
762
|
/**
|
|
594
763
|
* Calculate required buffer size for a padded frame
|
|
595
|
-
*
|
|
596
|
-
* @param width Original frame width
|
|
597
|
-
* @param height Original frame height
|
|
598
|
-
* @returns Required buffer size in bytes
|
|
599
764
|
*/
|
|
600
765
|
calculatePaddedSize(width, height) {
|
|
601
766
|
this.ensureInitialized();
|
|
@@ -603,10 +768,6 @@ class WasmBridge {
|
|
|
603
768
|
}
|
|
604
769
|
/**
|
|
605
770
|
* Get macroblock parameters for a given frame size
|
|
606
|
-
*
|
|
607
|
-
* @param width Frame width
|
|
608
|
-
* @param height Frame height
|
|
609
|
-
* @returns Macroblock parameters
|
|
610
771
|
*/
|
|
611
772
|
getMBParam(width, height) {
|
|
612
773
|
const mb_width = Math.ceil(width / 16);
|
|
@@ -632,28 +793,119 @@ class WasmBridge {
|
|
|
632
793
|
* Clean up resources
|
|
633
794
|
*/
|
|
634
795
|
destroy() {
|
|
635
|
-
// Free pre-allocated WASM buffers
|
|
636
796
|
if (this.module) {
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
this.
|
|
797
|
+
// Free pre-allocated macroblock array
|
|
798
|
+
this.module._free_mb_array();
|
|
799
|
+
// Free double-buffered WASM frame buffers
|
|
800
|
+
if (this.slotARawPtr)
|
|
801
|
+
this.module._free(this.slotARawPtr);
|
|
802
|
+
if (this.slotBRawPtr)
|
|
803
|
+
this.module._free(this.slotBRawPtr);
|
|
804
|
+
if (this.slotAPaddedPtr)
|
|
805
|
+
this.module._free(this.slotAPaddedPtr);
|
|
806
|
+
if (this.slotBPaddedPtr)
|
|
807
|
+
this.module._free(this.slotBPaddedPtr);
|
|
808
|
+
}
|
|
809
|
+
this.slotARawPtr = 0;
|
|
810
|
+
this.slotBRawPtr = 0;
|
|
811
|
+
this.slotAPaddedPtr = 0;
|
|
812
|
+
this.slotBPaddedPtr = 0;
|
|
650
813
|
this.allocatedFrameSize = 0;
|
|
651
814
|
this.allocatedPaddedSize = 0;
|
|
815
|
+
this.prevSlotPadded = false;
|
|
652
816
|
this.module = null;
|
|
653
817
|
this.initialized = false;
|
|
654
818
|
}
|
|
655
819
|
}
|
|
656
820
|
|
|
821
|
+
/**
|
|
822
|
+
* Temporal Smoother - Sliding window filter to reduce false positives
|
|
823
|
+
*
|
|
824
|
+
* Three rules:
|
|
825
|
+
* 1. Minimum gap: Suppress detections within minConsecutive frames of each other (keep highest confidence)
|
|
826
|
+
* 2. Flash suppression: Isolated single-frame detections with low confidence are suppressed
|
|
827
|
+
* 3. Cluster merging: Consecutive triggered frames (common in dissolves) keep only highest-confidence one
|
|
828
|
+
*/
|
|
829
|
+
class TemporalSmoother {
|
|
830
|
+
constructor(config) {
|
|
831
|
+
// Sliding window of recent detections
|
|
832
|
+
this.recentDetections = [];
|
|
833
|
+
// Last confirmed scene change frame
|
|
834
|
+
this.lastConfirmedFrame = 0;
|
|
835
|
+
// Buffer for cluster detection
|
|
836
|
+
this.pendingCluster = [];
|
|
837
|
+
this.nonDetectionCount = 0;
|
|
838
|
+
// Flash suppression: minimum confidence for isolated detections
|
|
839
|
+
this.flashConfidenceThreshold = 0.4;
|
|
840
|
+
this.windowSize = config.windowSize;
|
|
841
|
+
this.minConsecutive = config.minConsecutive;
|
|
842
|
+
}
|
|
843
|
+
/**
|
|
844
|
+
* Process a frame's detection result through temporal smoothing
|
|
845
|
+
*/
|
|
846
|
+
process(frameNumber, rawIsSceneChange, rawConfidence) {
|
|
847
|
+
// If no detection, track gap and possibly flush pending cluster
|
|
848
|
+
if (!rawIsSceneChange) {
|
|
849
|
+
this.nonDetectionCount++;
|
|
850
|
+
// If we had a pending cluster and enough non-detections have passed,
|
|
851
|
+
// emit the best detection from the cluster
|
|
852
|
+
if (this.pendingCluster.length > 0 && this.nonDetectionCount >= 2) {
|
|
853
|
+
const best = this.flushCluster();
|
|
854
|
+
if (best) {
|
|
855
|
+
return best;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
return { isSceneChange: false, confidence: 0 };
|
|
859
|
+
}
|
|
860
|
+
// We have a detection
|
|
861
|
+
this.nonDetectionCount = 0;
|
|
862
|
+
// Rule 1: Minimum gap enforcement
|
|
863
|
+
if (frameNumber - this.lastConfirmedFrame < this.minConsecutive) {
|
|
864
|
+
// Too close to last confirmed scene change
|
|
865
|
+
// If this has higher confidence, replace pending, but don't emit yet
|
|
866
|
+
if (this.pendingCluster.length > 0) {
|
|
867
|
+
const best = this.pendingCluster.reduce((a, b) => a.confidence > b.confidence ? a : b);
|
|
868
|
+
if (rawConfidence > best.confidence) {
|
|
869
|
+
// Replace entire cluster with this better detection
|
|
870
|
+
this.pendingCluster = [{ frameNumber, confidence: rawConfidence }];
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
return { isSceneChange: false, confidence: 0 };
|
|
874
|
+
}
|
|
875
|
+
// Rule 3: Cluster merging - add to pending cluster
|
|
876
|
+
this.pendingCluster.push({ frameNumber, confidence: rawConfidence });
|
|
877
|
+
// Don't emit immediately; wait to see if more consecutive detections follow
|
|
878
|
+
return { isSceneChange: false, confidence: 0 };
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* Flush the pending cluster, emitting the highest-confidence detection
|
|
882
|
+
*/
|
|
883
|
+
flushCluster() {
|
|
884
|
+
if (this.pendingCluster.length === 0) {
|
|
885
|
+
return null;
|
|
886
|
+
}
|
|
887
|
+
// Find the detection with highest confidence
|
|
888
|
+
const best = this.pendingCluster.reduce((a, b) => a.confidence > b.confidence ? a : b);
|
|
889
|
+
// Rule 2: Flash suppression - isolated single-frame detections with low confidence
|
|
890
|
+
if (this.pendingCluster.length === 1 && best.confidence < this.flashConfidenceThreshold) {
|
|
891
|
+
this.pendingCluster = [];
|
|
892
|
+
return null;
|
|
893
|
+
}
|
|
894
|
+
// Confirm this detection
|
|
895
|
+
this.lastConfirmedFrame = best.frameNumber;
|
|
896
|
+
this.recentDetections.push(best);
|
|
897
|
+
// Keep sliding window bounded
|
|
898
|
+
while (this.recentDetections.length > this.windowSize) {
|
|
899
|
+
this.recentDetections.shift();
|
|
900
|
+
}
|
|
901
|
+
this.pendingCluster = [];
|
|
902
|
+
return {
|
|
903
|
+
isSceneChange: true,
|
|
904
|
+
confidence: best.confidence
|
|
905
|
+
};
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
|
|
657
909
|
/**
|
|
658
910
|
* Frame Processor - Utilities for frame preprocessing
|
|
659
911
|
*/
|
|
@@ -773,16 +1025,17 @@ class SceneDetector {
|
|
|
773
1025
|
constructor(options = {}) {
|
|
774
1026
|
// Set default options
|
|
775
1027
|
this.options = {
|
|
776
|
-
sensitivity: options.sensitivity || '
|
|
1028
|
+
sensitivity: options.sensitivity || 'low',
|
|
777
1029
|
customThresholds: options.customThresholds || { intraThresh: 2000, intraThresh2: 90 },
|
|
778
1030
|
searchRange: options.searchRange || 'medium',
|
|
779
|
-
workers: options.workers || 1,
|
|
1031
|
+
workers: options.workers || 1,
|
|
780
1032
|
progressive: options.progressive || { enabled: false, initialStep: 1, refinementSteps: [] },
|
|
781
1033
|
temporalSmoothing: options.temporalSmoothing || { enabled: false, windowSize: 5, minConsecutive: 2 },
|
|
782
1034
|
frameExtraction: options.frameExtraction || { pixelFormat: 'gray', maxBufferFrames: 2 },
|
|
783
1035
|
onProgress: options.onProgress || (() => { }),
|
|
784
1036
|
onScene: options.onScene || (() => { }),
|
|
785
|
-
format: options.format || 'json'
|
|
1037
|
+
format: options.format || 'json',
|
|
1038
|
+
signal: options.signal || undefined
|
|
786
1039
|
};
|
|
787
1040
|
this.wasmBridge = new WasmBridge();
|
|
788
1041
|
// Initialize detection state
|
|
@@ -809,65 +1062,180 @@ class SceneDetector {
|
|
|
809
1062
|
const metadata = await decoder.getMetadata();
|
|
810
1063
|
// Calculate fcode from search range
|
|
811
1064
|
this.state.fcode = calculateFcode(this.options.searchRange, metadata.resolution.width, metadata.resolution.height);
|
|
812
|
-
//
|
|
1065
|
+
// Calculate thresholds from sensitivity
|
|
1066
|
+
let thresholds;
|
|
1067
|
+
if (this.options.sensitivity === 'custom') {
|
|
1068
|
+
thresholds = this.options.customThresholds;
|
|
1069
|
+
}
|
|
1070
|
+
else {
|
|
1071
|
+
thresholds = calculateThresholds(this.options.sensitivity);
|
|
1072
|
+
}
|
|
1073
|
+
// Pre-allocate WASM buffers
|
|
813
1074
|
this.wasmBridge.allocateBuffers(metadata.resolution.width, metadata.resolution.height);
|
|
1075
|
+
// Initialize temporal smoother if enabled
|
|
1076
|
+
let temporalSmoother = null;
|
|
1077
|
+
if (this.options.temporalSmoothing.enabled) {
|
|
1078
|
+
temporalSmoother = new TemporalSmoother(this.options.temporalSmoothing);
|
|
1079
|
+
}
|
|
814
1080
|
// Initialize scene list (frame 0 is always a scene change)
|
|
815
1081
|
const scenes = [
|
|
816
1082
|
{
|
|
817
1083
|
frameNumber: 0,
|
|
818
1084
|
timestamp: 0,
|
|
819
|
-
timecode: '00:00:00.000'
|
|
1085
|
+
timecode: '00:00:00.000',
|
|
1086
|
+
confidence: 1.0
|
|
820
1087
|
}
|
|
821
1088
|
];
|
|
822
1089
|
// Processing statistics
|
|
823
1090
|
const startTime = Date.now();
|
|
824
1091
|
let processedFrames = 0;
|
|
1092
|
+
let firstFrameValidated = false;
|
|
1093
|
+
// Rolling FPS window for accurate speed metrics (3-second window)
|
|
1094
|
+
const fpsWindow = [];
|
|
1095
|
+
// Fade/dissolve detection state
|
|
1096
|
+
let keyframeData = null;
|
|
1097
|
+
const driftThresholdFactor = 0.6; // Re-run detection at 60% of base thresholds
|
|
1098
|
+
// Quick-reject sampling interval
|
|
1099
|
+
const quickRejectStep = 64;
|
|
1100
|
+
const quickRejectThreshold = 5;
|
|
1101
|
+
// AbortSignal check
|
|
1102
|
+
const signal = this.options.signal;
|
|
825
1103
|
// Process frames
|
|
826
1104
|
await decoder.extractFrames(async (frame) => {
|
|
827
|
-
|
|
1105
|
+
// Check abort signal
|
|
1106
|
+
if (signal && signal.aborted) {
|
|
1107
|
+
throw new Error('Detection aborted');
|
|
1108
|
+
}
|
|
1109
|
+
// Validate only the first frame (dimensions never change within a video)
|
|
1110
|
+
if (!firstFrameValidated) {
|
|
1111
|
+
validateFrame(frame);
|
|
1112
|
+
firstFrameValidated = true;
|
|
1113
|
+
}
|
|
828
1114
|
// Update current frame
|
|
829
1115
|
this.state.curFrame = frame;
|
|
830
1116
|
// Need at least 2 frames to detect scene change
|
|
831
1117
|
if (this.state.prevFrame) {
|
|
832
|
-
|
|
1118
|
+
let isSceneChange = false;
|
|
1119
|
+
let confidence = 0;
|
|
1120
|
+
// Quick-reject: sampled MAD between prev and cur frame
|
|
1121
|
+
const prevData = this.state.prevFrame.data;
|
|
1122
|
+
const curData = this.state.curFrame.data;
|
|
1123
|
+
let sampledDiff = 0;
|
|
1124
|
+
let sampleCount = 0;
|
|
1125
|
+
for (let i = 0; i < curData.length; i += quickRejectStep) {
|
|
1126
|
+
sampledDiff += Math.abs(curData[i] - prevData[i]);
|
|
1127
|
+
sampleCount++;
|
|
1128
|
+
}
|
|
1129
|
+
const avgDiff = sampledDiff / sampleCount;
|
|
1130
|
+
if (avgDiff >= quickRejectThreshold) {
|
|
1131
|
+
// Frame differs enough, run full WASM detection
|
|
1132
|
+
const result = this.wasmBridge.detectSceneChange(this.state.prevFrame, this.state.curFrame, this.state.intraCount, this.state.fcode, thresholds.intraThresh, thresholds.intraThresh2);
|
|
1133
|
+
isSceneChange = result.isSceneChange;
|
|
1134
|
+
confidence = result.confidence;
|
|
1135
|
+
// Fade/dissolve detection: if not detected as scene change,
|
|
1136
|
+
// check drift from last keyframe
|
|
1137
|
+
if (!isSceneChange && keyframeData) {
|
|
1138
|
+
let driftSum = 0;
|
|
1139
|
+
let driftCount = 0;
|
|
1140
|
+
// Sample every 4th pixel for speed
|
|
1141
|
+
for (let i = 0; i < curData.length; i += 4) {
|
|
1142
|
+
driftSum += Math.abs(curData[i] - keyframeData[i]);
|
|
1143
|
+
driftCount++;
|
|
1144
|
+
}
|
|
1145
|
+
const driftAvg = driftSum / driftCount;
|
|
1146
|
+
// If cumulative drift is high but per-frame SAD didn't trigger,
|
|
1147
|
+
// re-run with lowered thresholds
|
|
1148
|
+
if (driftAvg > 30) {
|
|
1149
|
+
const fadeResult = this.wasmBridge.detectSceneChange(this.state.prevFrame, this.state.curFrame, this.state.intraCount, this.state.fcode, Math.round(thresholds.intraThresh * driftThresholdFactor), Math.round(thresholds.intraThresh2 * driftThresholdFactor));
|
|
1150
|
+
if (fadeResult.isSceneChange) {
|
|
1151
|
+
isSceneChange = true;
|
|
1152
|
+
confidence = fadeResult.confidence;
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
// else: quick-reject - frames are nearly identical, skip WASM call
|
|
1158
|
+
// Apply temporal smoothing if enabled
|
|
1159
|
+
if (temporalSmoother) {
|
|
1160
|
+
const smoothed = temporalSmoother.process(frame.frameNumber, isSceneChange, confidence);
|
|
1161
|
+
isSceneChange = smoothed.isSceneChange;
|
|
1162
|
+
confidence = smoothed.confidence;
|
|
1163
|
+
}
|
|
833
1164
|
if (isSceneChange) {
|
|
834
1165
|
const scene = {
|
|
835
1166
|
frameNumber: frame.frameNumber,
|
|
836
1167
|
timestamp: frame.pts,
|
|
837
|
-
timecode: formatTimecode(frame.pts)
|
|
1168
|
+
timecode: formatTimecode(frame.pts),
|
|
1169
|
+
confidence
|
|
838
1170
|
};
|
|
839
1171
|
scenes.push(scene);
|
|
840
1172
|
// Call scene callback
|
|
841
1173
|
this.options.onScene(scene);
|
|
842
1174
|
// Reset intraCount
|
|
843
1175
|
this.state.intraCount = 1;
|
|
1176
|
+
// Update keyframe for drift detection
|
|
1177
|
+
keyframeData = new Uint8Array(curData);
|
|
844
1178
|
}
|
|
845
1179
|
else {
|
|
846
|
-
// Increment intraCount
|
|
847
1180
|
this.state.intraCount++;
|
|
848
1181
|
}
|
|
849
1182
|
}
|
|
1183
|
+
else {
|
|
1184
|
+
// First frame is the initial keyframe for drift detection
|
|
1185
|
+
keyframeData = new Uint8Array(frame.data);
|
|
1186
|
+
}
|
|
850
1187
|
// Move current frame to previous
|
|
851
1188
|
this.state.prevFrame = this.state.curFrame;
|
|
852
1189
|
processedFrames++;
|
|
853
1190
|
}, (current, total) => {
|
|
854
|
-
//
|
|
1191
|
+
// Enhanced progress with rolling FPS window
|
|
1192
|
+
const now = Date.now();
|
|
1193
|
+
const elapsed = (now - startTime) / 1000;
|
|
1194
|
+
// Add to rolling window
|
|
1195
|
+
fpsWindow.push({ time: now, frame: current });
|
|
1196
|
+
// Remove samples older than 3 seconds
|
|
1197
|
+
while (fpsWindow.length > 1 && (now - fpsWindow[0].time) > 3000) {
|
|
1198
|
+
fpsWindow.shift();
|
|
1199
|
+
}
|
|
1200
|
+
// Calculate instantaneous FPS from rolling window
|
|
1201
|
+
let currentFps = 0;
|
|
1202
|
+
if (fpsWindow.length >= 2) {
|
|
1203
|
+
const oldest = fpsWindow[0];
|
|
1204
|
+
const newest = fpsWindow[fpsWindow.length - 1];
|
|
1205
|
+
const dt = (newest.time - oldest.time) / 1000;
|
|
1206
|
+
if (dt > 0) {
|
|
1207
|
+
currentFps = (newest.frame - oldest.frame) / dt;
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
// Calculate ETA from instantaneous FPS
|
|
1211
|
+
const remaining = currentFps > 0 ? (total - current) / currentFps : undefined;
|
|
855
1212
|
const progress = {
|
|
856
1213
|
currentFrame: current,
|
|
857
1214
|
totalFrames: total,
|
|
858
|
-
percent: Math.round((current / total) * 100)
|
|
1215
|
+
percent: Math.round((current / total) * 100),
|
|
1216
|
+
eta: remaining,
|
|
1217
|
+
fps: currentFps,
|
|
1218
|
+
elapsed,
|
|
1219
|
+
scenesDetected: scenes.length
|
|
859
1220
|
};
|
|
860
|
-
// Calculate ETA
|
|
861
|
-
const elapsed = (Date.now() - startTime) / 1000;
|
|
862
|
-
const fps = current / elapsed;
|
|
863
|
-
const remaining = (total - current) / fps;
|
|
864
|
-
progress.eta = remaining;
|
|
865
1221
|
this.options.onProgress(progress);
|
|
866
1222
|
});
|
|
867
1223
|
// Calculate statistics
|
|
868
1224
|
const endTime = Date.now();
|
|
869
1225
|
const processingTime = (endTime - startTime) / 1000;
|
|
870
1226
|
const framesPerSecond = processedFrames / processingTime;
|
|
1227
|
+
// Post-process: compute scene durations
|
|
1228
|
+
for (let i = 0; i < scenes.length; i++) {
|
|
1229
|
+
if (i < scenes.length - 1) {
|
|
1230
|
+
scenes[i].duration = scenes[i + 1].timestamp - scenes[i].timestamp;
|
|
1231
|
+
scenes[i].frameCount = scenes[i + 1].frameNumber - scenes[i].frameNumber;
|
|
1232
|
+
}
|
|
1233
|
+
else {
|
|
1234
|
+
// Last scene: duration until end of video
|
|
1235
|
+
scenes[i].duration = metadata.duration - scenes[i].timestamp;
|
|
1236
|
+
scenes[i].frameCount = metadata.totalFrames - scenes[i].frameNumber;
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
871
1239
|
// Clean up
|
|
872
1240
|
decoder.destroy();
|
|
873
1241
|
return {
|
|
@@ -910,7 +1278,7 @@ class SceneDetector {
|
|
|
910
1278
|
* console.log(`Found ${results.scenes.length} scenes`);
|
|
911
1279
|
*
|
|
912
1280
|
* results.scenes.forEach(scene => {
|
|
913
|
-
* console.log(`Scene at ${scene.timecode}`);
|
|
1281
|
+
* console.log(`Scene at ${scene.timecode} (confidence: ${scene.confidence})`);
|
|
914
1282
|
* });
|
|
915
1283
|
* ```
|
|
916
1284
|
*/
|
|
@@ -924,6 +1292,30 @@ async function detectSceneChanges(videoPath, options) {
|
|
|
924
1292
|
detector.destroy();
|
|
925
1293
|
}
|
|
926
1294
|
}
|
|
1295
|
+
/**
|
|
1296
|
+
* Extract scene thumbnail images from a video
|
|
1297
|
+
*
|
|
1298
|
+
* @param videoPath Path to video file
|
|
1299
|
+
* @param options Detection options
|
|
1300
|
+
* @param imageOptions Image extraction options
|
|
1301
|
+
* @returns Detection results (images are written to disk)
|
|
1302
|
+
*/
|
|
1303
|
+
async function extractSceneImages(videoPath, options, imageOptions) {
|
|
1304
|
+
const detector = new SceneDetector(options);
|
|
1305
|
+
try {
|
|
1306
|
+
const results = await detector.detect(videoPath);
|
|
1307
|
+
if (imageOptions) {
|
|
1308
|
+
const decoder = new FFmpegDecoder(videoPath);
|
|
1309
|
+
const frameNumbers = results.scenes.map(s => s.frameNumber);
|
|
1310
|
+
await decoder.extractFrameImages(frameNumbers, imageOptions);
|
|
1311
|
+
decoder.destroy();
|
|
1312
|
+
}
|
|
1313
|
+
return results;
|
|
1314
|
+
}
|
|
1315
|
+
finally {
|
|
1316
|
+
detector.destroy();
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
927
1319
|
/**
|
|
928
1320
|
* Version information
|
|
929
1321
|
*/
|
|
@@ -944,5 +1336,5 @@ const info = {
|
|
|
944
1336
|
}
|
|
945
1337
|
};
|
|
946
1338
|
|
|
947
|
-
export { BufferPool, FFmpegDecoder, FrameBuffer, SceneDetector, WasmBridge, calculateFcode, calculateFrameMemory, calculateMBParam, calculateThresholds, detectSceneChanges, estimateProcessingTime, formatTimecode, info, validateFrame, validateFrameDimensions, version };
|
|
1339
|
+
export { BufferPool, FFmpegDecoder, FrameBuffer, SceneDetector, TemporalSmoother, WasmBridge, calculateFcode, calculateFrameMemory, calculateMBParam, calculateThresholds, detectSceneChanges, estimateProcessingTime, extractSceneImages, formatTimecode, info, validateFrame, validateFrameDimensions, version };
|
|
948
1340
|
//# sourceMappingURL=keyframes.esm.js.map
|