npm - audio-video-sync - Versions diffs - 0.1.0 - Mend

audio-video-sync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,8 @@
+The MIT License (MIT)
+Copyright © 2026 EuanTop
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,250 @@
+# audio-video-sync
+[![npm version](https://img.shields.io/badge/version-1.0.0-blue.svg)](https://www.npmjs.com/package/audio-video-sync)
+[![license](https://img.shields.io/badge/license-MIT-green.svg)](./LICENSE)
+[![TypeScript](https://img.shields.io/badge/TypeScript-5.3-blue.svg)](https://www.typescriptlang.org/)
+**English** | [中文版](./README.zh-CN.md)
+---
+🙆‍♀️ Multi-camera recording alignment timeline tool, **a must-have for research!**
+📱 Aligns via audio, bypassing the additional processing inconsistencies of video source data on Apple, Android, and Xiaomi devices, as well as the inherent inaccuracies of the recording equipment's system time.
+✨ Multi-camera video synchronization using audio cross-correlation. Automatically align multiple video timelines by analyzing audio waveforms with millisecond precision.
+![eg_img](<cover.png>)
+## Features
+- 🎯 **High Precision** - Millisecond-level sync accuracy (±2ms)
+- ⚡ **High Performance** - FFT-accelerated cross-correlation algorithm
+- 🌐 **Browser Native** - Based on FFmpeg.wasm, no server required
+- 📦 **Zero Config** - Works out of the box with automatic audio extraction
+- 🔧 **Flexible** - Supports custom FFmpeg instances and parameters
+- 📝 **TypeScript** - Full type definitions included
+## How it Works
+Multi-camera recordings may have inaccurate creation_time metadata, but the ambient sound is consistent across all recordings. By performing cross-correlation analysis on audio waveforms, we can precisely calculate the time offset between videos.
+```
+Video A audio: ──────█████████████──────────
+Video B audio: ────────────█████████████────
+                          ↑
+                      offset Δt
+```
+<!-- ## Requirements (uncertain)
+- **Node.js**: >= 16.0.0
+- **Browser**: Must support SharedArrayBuffer (Chrome 92+, Firefox 79+, Safari 15.2+)
+- **HTTPS**: Required for SharedArrayBuffer in production -->
+## Installation
+```bash
+npm install audio-video-sync @ffmpeg/ffmpeg @ffmpeg/util
+```
+## Development
+### Clone and Setup
+```bash
+git clone https://github.com/EuanTop/audio-video-sync.git
+cd audio-video-sync
+npm install
+```
+### Build from Source
+```bash
+# Build the package
+npm run build
+```
+### Test the Build
+```bash
+# Open test.html in browser to test the built package
+# npx serve -p 3333
+open test.html
+```
+### Scripts
+- `npm run build` - Build the package for distribution
+- `npm run test` - Run tests (placeholder)
+- `npm run prepublishOnly` - Automatically builds before publishing
+## Quick Start
+### Basic Usage
+```javascript
+import { syncVideos } from 'audio-video-sync';
+const result = await syncVideos([
+  { file: video1File, id: 'cam1' },
+  { file: video2File, id: 'cam2' },
+  { file: video3File, id: 'cam3' },
+  { file: video4File, id: 'cam4' }
+], {
+  referenceIndex: 0,  // Use first video as reference
+  sampleRate: 16000,  // Sample rate
+  maxDuration: 60     // Only analyze first 60 seconds
+});
+console.log(result);
+// {
+//   referenceId: 'cam1',
+//   results: [
+//     { id: 'cam1', offsetSeconds: 0, confidence: 1 },
+//     { id: 'cam2', offsetSeconds: 0.523, confidence: 0.89 },
+//     { id: 'cam3', offsetSeconds: -0.127, confidence: 0.92 },
+//     { id: 'cam4', offsetSeconds: 1.234, confidence: 0.85 }
+//   ],
+//   success: true
+// }
+```
+### Using Existing FFmpeg Instance
+```javascript
+import { FFmpeg } from '@ffmpeg/ffmpeg';
+import { AudioVideoSync } from 'audio-video-sync';
+const ffmpeg = new FFmpeg();
+await ffmpeg.load();
+const sync = new AudioVideoSync(ffmpeg);
+const result = await sync.syncVideos(videos);
+```
+### Calculate Offset Between Two Videos
+```javascript
+import { createSync } from 'audio-video-sync';
+const sync = createSync();
+const { offsetSeconds, confidence } = await sync.calculateOffset(
+  referenceVideoFile,
+  targetVideoFile
+);
+console.log(`Target video offset: ${offsetSeconds} seconds`);
+console.log(`Confidence: ${(confidence * 100).toFixed(1)}%`);
+```
+### With Progress Callback
+```javascript
+const result = await syncVideos(videos, {
+  onProgress: (stage, progress) => {
+    if (stage === 'extracting') {
+      console.log(`Extracting audio: ${(progress * 100).toFixed(0)}%`);
+    } else if (stage === 'correlating') {
+      console.log(`Computing correlation: ${(progress * 100).toFixed(0)}%`);
+    }
+  }
+});
+```
+## API
+### syncVideos(videos, options)
+Synchronize multiple video files.
+**Parameters:**
+- `videos`: `VideoInput[]` - Array of video inputs
+  - `file`: `File | Blob` - Video file
+  - `id`: `string` (optional) - Video identifier
+  - `originalStartTime`: `Date` (optional) - Original start time
+- `options`: `SyncOptions` (optional)
+  - `referenceIndex`: `number` - Reference video index, default 0
+  - `sampleRate`: `number` - Sample rate, default 16000
+  - `maxDuration`: `number` - Max analysis duration (seconds), default 60
+  - `minConfidence`: `number` - Min confidence threshold, default 0.3
+  - `onProgress`: `(stage, progress) => void` - Progress callback
+**Returns:** `Promise<MultiSyncResult>`
+### AudioVideoSync
+Synchronizer class with FFmpeg instance reuse support.
+```javascript
+const sync = new AudioVideoSync(ffmpeg?);
+await sync.load();
+const result = await sync.syncVideos(videos, options);
+const offset = await sync.calculateOffset(refVideo, targetVideo);
+```
+### Low-level API
+```javascript
+import {
+  extractAudio,       // Extract audio from video
+  crossCorrelate,     // Compute cross-correlation
+  findPeakOffset,     // Find peak offset
+  calculateConfidence // Calculate confidence score
+} from 'audio-video-sync';
+```
+## Types
+```typescript
+interface VideoInput {
+  file: File | Blob;
+  id?: string;
+  originalStartTime?: Date;
+}
+interface SyncResult {
+  id: string;
+  offsetSeconds: number;
+  offsetSamples: number;
+  confidence: number;
+  correctedStartTime: Date | null;
+}
+interface MultiSyncResult {
+  referenceId: string;
+  results: SyncResult[];
+  sampleRate: number;
+  success: boolean;
+  error?: string;
+}
+```
+## Accuracy Comparison
+| Method | Accuracy |
+|--------|----------|
+| creation_time | ±seconds |
+| File timestamp | ±100ms |
+| Audio cross-correlation | **±2ms** |
+## Notes
+1. **Audio Quality**: Ensure videos have clear ambient sound; silent videos cannot be synced
+2. **Sample Rate**: 16000 Hz is sufficient for sync; higher rates increase computation
+3. **Analysis Duration**: Usually 30-60 seconds is enough; no need to process entire video
+4. **Memory Usage**: For long videos, limit `maxDuration` to control memory usage
+5. **Browser Compatibility**: Requires SharedArrayBuffer support
+## Tech Stack
+- FFmpeg.wasm - Video decoding and audio extraction
+- FFT (Fast Fourier Transform) - Frequency domain cross-correlation
+- TypeScript - Type safety
+## Contributing
+Issues and Pull Requests are welcome!
+## License
+[MIT](LICENSE)

package/README.zh-CN.md ADDED Viewed

@@ -0,0 +1,278 @@
+# audio-video-sync
+[![npm version](https://img.shields.io/badge/npm-v1.0.0-blue.svg)](https://www.npmjs.com/package/audio-video-sync)
+[![license](https://img.shields.io/badge/license-MIT-green.svg)](./LICENSE)
+[![TypeScript](https://img.shields.io/badge/TypeScript-Ready-blue.svg)](https://www.typescriptlang.org/)
+[English](./README.md) | **中文版**
+---
+🙆‍♀️  多机位录影对齐时间轴工具，**科研常备！**
+📱 通过音频对齐，绕开视频源数据在不同系统（苹果、安卓和小米）不一致的额外处理，以及录像设备系统时间本身就不正确的问题。
+🌟 基于音频互相关的多机位视频同步库。使用 FFT 加速的互相关算法，通过分析音频波形自动对齐多个视频的时间轴，精度可达毫秒级。
+![eg_img](<cover.png>)
+## 特性
+- 🎯 **高精度** - 毫秒级同步精度（±2ms）
+- ⚡ **高性能** - FFT 加速的互相关算法
+- 🌐 **浏览器原生** - 基于 FFmpeg.wasm，无需服务端
+- 📦 **零配置** - 开箱即用，自动处理音频提取
+- 🔧 **灵活** - 支持自定义 FFmpeg 实例和参数
+- 📝 **TypeScript** - 完整的类型定义
+## 原理
+多机位录制的视频虽然 creation_time 可能不准确，但录制的环境声音是一致的。通过对音频波形进行互相关分析，可以精确计算出各视频之间的时间偏移。
+```
+视频A音频: ──────█████████████──────────
+视频B音频: ────────────█████████████────
+                      ↑
+                  偏移量 Δt
+```
+<!-- ## 环境要求 (不确定)
+- **Node.js**: >= 16.0.0
+- **浏览器**: 需要支持 SharedArrayBuffer（Chrome 92+、Firefox 79+、Safari 15.2+）
+- **HTTPS**: 生产环境需要 HTTPS 才能使用 SharedArrayBuffer -->
+## 安装
+```bash
+npm install audio-video-sync @ffmpeg/ffmpeg @ffmpeg/util
+```
+## 开发
+### 克隆和设置
+```bash
+git clone https://github.com/EuanTop/audio-video-sync.git
+cd audio-video-sync
+npm install
+```
+### 从源码构建
+```bash
+# 构建包
+npm run build
+# 这会生成:
+# - dist/index.js (CommonJS 格式)
+# - dist/index.esm.js (ES Module 格式)
+# - dist/index.d.ts (TypeScript 类型定义)
+```
+### 测试构建结果
+```bash
+# 在浏览器中打开 test.html 来测试构建的包
+open test.html
+```
+### 脚本命令
+- `npm run build` - 构建用于发布的包
+- `npm run test` - 运行测试（占位符）
+- `npm run prepublishOnly` - 发布前自动构建
+## 快速开始
+### 基本用法
+```javascript
+import { syncVideos } from 'audio-video-sync';
+// 同步多个视频文件
+const result = await syncVideos([
+  { file: video1File, id: 'cam1' },
+  { file: video2File, id: 'cam2' },
+  { file: video3File, id: 'cam3' },
+  { file: video4File, id: 'cam4' }
+], {
+  referenceIndex: 0,  // 以第一个视频为参考
+  sampleRate: 16000,  // 采样率
+  maxDuration: 60     // 只分析前60秒
+});
+console.log(result);
+// {
+//   referenceId: 'cam1',
+//   results: [
+//     { id: 'cam1', offsetSeconds: 0, confidence: 1 },
+//     { id: 'cam2', offsetSeconds: 0.523, confidence: 0.89 },
+//     { id: 'cam3', offsetSeconds: -0.127, confidence: 0.92 },
+//     { id: 'cam4', offsetSeconds: 1.234, confidence: 0.85 }
+//   ],
+//   success: true
+// }
+```
+### 使用已有的 FFmpeg 实例
+```javascript
+import { FFmpeg } from '@ffmpeg/ffmpeg';
+import { AudioVideoSync } from 'audio-video-sync';
+// 如果你的项目已经有 FFmpeg 实例
+const ffmpeg = new FFmpeg();
+await ffmpeg.load();
+const sync = new AudioVideoSync(ffmpeg);
+const result = await sync.syncVideos(videos);
+```
+### 计算两个视频的偏移
+```javascript
+import { createSync } from 'audio-video-sync';
+const sync = createSync();
+const { offsetSeconds, confidence } = await sync.calculateOffset(
+  referenceVideoFile,
+  targetVideoFile
+);
+console.log(`目标视频相对参考视频偏移: ${offsetSeconds} 秒`);
+console.log(`置信度: ${(confidence * 100).toFixed(1)}%`);
+```
+### 带进度回调
+```javascript
+const result = await syncVideos(videos, {
+  onProgress: (stage, progress) => {
+    if (stage === 'extracting') {
+      console.log(`提取音频: ${(progress * 100).toFixed(0)}%`);
+    } else if (stage === 'correlating') {
+      console.log(`计算相关: ${(progress * 100).toFixed(0)}%`);
+    }
+  }
+});
+```
+### 与 RFID 数据对齐
+```javascript
+import { syncVideos } from 'audio-video-sync';
+// 假设 cam1 的时间与 RFID 数据是对齐的
+const videos = [
+  {
+    file: cam1File,
+    id: 'cam1',
+    originalStartTime: new Date('2024-01-11T10:00:00') // RFID 对齐的时间
+  },
+  { file: cam2File, id: 'cam2' },
+  { file: cam3File, id: 'cam3' },
+  { file: cam4File, id: 'cam4' }
+];
+const result = await syncVideos(videos, { referenceIndex: 0 });
+// 所有视频都会得到校正后的开始时间
+result.results.forEach(r => {
+  console.log(`${r.id}: 校正后开始时间 = ${r.correctedStartTime}`);
+});
+```
+## API
+### syncVideos(videos, options)
+同步多个视频文件。
+**参数:**
+- `videos`: `VideoInput[]` - 视频输入数组
+  - `file`: `File | Blob` - 视频文件
+  - `id`: `string` (可选) - 视频标识
+  - `originalStartTime`: `Date` (可选) - 原始开始时间
+- `options`: `SyncOptions` (可选)
+  - `referenceIndex`: `number` - 参考视频索引，默认 0
+  - `sampleRate`: `number` - 采样率，默认 16000
+  - `maxDuration`: `number` - 最大分析时长（秒），默认 60
+  - `minConfidence`: `number` - 最小置信度阈值，默认 0.3
+  - `onProgress`: `(stage, progress) => void` - 进度回调
+**返回:** `Promise<MultiSyncResult>`
+### AudioVideoSync
+同步器类，支持复用 FFmpeg 实例。
+```javascript
+const sync = new AudioVideoSync(ffmpeg?);
+await sync.load();
+const result = await sync.syncVideos(videos, options);
+const offset = await sync.calculateOffset(refVideo, targetVideo);
+```
+### 底层 API
+```javascript
+import {
+  extractAudio,      // 从视频提取音频
+  crossCorrelate,    // 计算互相关
+  findPeakOffset,    // 找到峰值偏移
+  calculateConfidence // 计算置信度
+} from 'audio-video-sync';
+```
+## 类型定义
+```typescript
+interface VideoInput {
+  file: File | Blob;
+  id?: string;
+  originalStartTime?: Date;
+}
+interface SyncResult {
+  id: string;
+  offsetSeconds: number;
+  offsetSamples: number;
+  confidence: number;
+  correctedStartTime: Date | null;
+}
+interface MultiSyncResult {
+  referenceId: string;
+  results: SyncResult[];
+  sampleRate: number;
+  success: boolean;
+  error?: string;
+}
+```
+## 精度对比
+| 方法 | 精度 |
+|------|------|
+| creation_time | ±秒级 |
+| 文件时间戳 | ±百毫秒 |
+| 音频互相关 | **±2ms** |
+## 注意事项
+1. **音频质量**: 确保视频有清晰的环境音，纯静音视频无法同步
+2. **采样率**: 16000 Hz 足够用于同步，更高采样率会增加计算量
+3. **分析时长**: 通常分析前 30-60 秒就足够，不需要处理整个视频
+4. **内存占用**: 长视频建议限制 `maxDuration` 以控制内存使用
+5. **浏览器兼容**: 需要支持 SharedArrayBuffer 的浏览器环境
+## 技术栈
+- FFmpeg.wasm - 视频解码和音频提取
+- FFT (Fast Fourier Transform) - 频域互相关计算
+- TypeScript - 类型安全
+## 贡献
+欢迎提交 Issue 和 Pull Request！
+## 许可证
+[MIT](LICENSE)

package/dist/audio.d.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * 音频提取模块
+ * 使用 FFmpeg.wasm 从视频文件中提取音频 PCM 数据
+ */
+import { FFmpeg } from '@ffmpeg/ffmpeg';
+export interface AudioData {
+    samples: Float32Array;
+    sampleRate: number;
+    duration: number;
+    channels: number;
+}
+export interface ExtractOptions {
+    /** 目标采样率，默认 16000 Hz（足够用于同步，且计算快） */
+    sampleRate?: number;
+    /** 是否转为单声道，默认 true */
+    mono?: boolean;
+    /** 只提取前 N 秒用于同步（节省内存和计算），默认 60 秒 */
+    maxDuration?: number;
+}
+/**
+ * 从视频文件提取音频 PCM 数据
+ *
+ * @param ffmpeg 已加载的 FFmpeg 实例
+ * @param videoFile 视频文件（File 或 Blob）
+ * @param options 提取选项
+ * @returns 音频 PCM 数据
+ */
+export declare function extractAudio(ffmpeg: FFmpeg, videoFile: File | Blob, options?: ExtractOptions): Promise<AudioData>;
+/**
+ * 从 AudioBuffer 获取 PCM 数据（用于 Web Audio API）
+ */
+export declare function audioBufferToFloat32(audioBuffer: AudioBuffer): Float32Array;
+/**
+ * 对音频数据进行降采样
+ */
+export declare function downsample(samples: Float32Array, fromRate: number, toRate: number): Float32Array;
+/**
+ * 对音频应用简单的预处理（去直流偏移、归一化）
+ */
+export declare function preprocessAudio(samples: Float32Array): Float32Array;

package/dist/fft.d.ts ADDED Viewed

@@ -0,0 +1,46 @@
+/**
+ * FFT (Fast Fourier Transform) 实现
+ * 用于音频信号的频域分析和互相关计算
+ */
+export type Complex = [number, number];
+/**
+ * 将数组填充到 2 的幂次长度
+ */
+export declare function padToPowerOfTwo(arr: Float32Array, targetLength: number): Float32Array;
+/**
+ * 计算下一个 2 的幂次
+ */
+export declare function nextPowerOfTwo(n: number): number;
+/**
+ * Cooley-Tukey FFT 算法
+ */
+export declare function fft(input: Float32Array): Complex[];
+/**
+ * 逆 FFT
+ */
+export declare function ifft(input: Complex[]): Complex[];
+/**
+ * 复数输入的 FFT
+ */
+export declare function fftComplex(input: Complex[]): Complex[];
+/**
+ * 计算两个信号的互相关
+ * 使用 FFT 加速: corr(a,b) = IFFT(FFT(a) * conj(FFT(b)))
+ *
+ * @param signalA 参考信号
+ * @param signalB 待对齐信号
+ * @returns 互相关结果数组
+ */
+export declare function crossCorrelate(signalA: Float32Array, signalB: Float32Array): Float32Array;
+/**
+ * 从互相关结果中找到最大峰值位置
+ *
+ * @param correlation 互相关结果
+ * @param signalBLength 信号 B 的原始长度
+ * @returns 偏移量（正值表示 B 相对于 A 延迟，负值表示 B 领先）
+ */
+export declare function findPeakOffset(correlation: Float32Array, signalBLength: number): number;
+/**
+ * 计算互相关的置信度（归一化相关系数）
+ */
+export declare function calculateConfidence(signalA: Float32Array, signalB: Float32Array, correlation: Float32Array, peakIndex: number): number;

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * audio-video-sync
+ *
+ * 基于音频互相关的多机位视频同步库
+ * Multi-camera video synchronization using audio cross-correlation
+ */
+export { AudioVideoSync, createSync, syncVideos, type VideoInput, type SyncResult, type MultiSyncResult, type SyncOptions } from './sync';
+export { extractAudio, preprocessAudio, downsample, audioBufferToFloat32, type AudioData, type ExtractOptions } from './audio';
+export { fft, ifft, crossCorrelate, findPeakOffset, calculateConfidence, nextPowerOfTwo, padToPowerOfTwo, type Complex } from './fft';