@apocaliss92/scrypted-reolink-native 0.3.15 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/intercom.ts CHANGED
@@ -1,5 +1,11 @@
1
- import type { ReolinkBaichuanApi } from "@apocaliss92/reolink-baichuan-js" with { "resolution-mode": "import" };
2
- import sdk, { FFmpegInput, MediaObject, ScryptedMimeTypes } from "@scrypted/sdk";
1
+ import type { ReolinkBaichuanApi } from "@apocaliss92/reolink-baichuan-js" with {
2
+ "resolution-mode": "import",
3
+ };
4
+ import sdk, {
5
+ FFmpegInput,
6
+ MediaObject,
7
+ ScryptedMimeTypes,
8
+ } from "@scrypted/sdk";
3
9
  import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
4
10
  import { ReolinkCamera } from "./camera";
5
11
 
@@ -10,520 +16,534 @@ import { ReolinkCamera } from "./camera";
10
16
  const DEFAULT_MAX_BACKLOG_MS = 120;
11
17
 
12
18
  export class ReolinkBaichuanIntercom {
13
- private intercomApi: ReolinkBaichuanApi | undefined;
14
- private session: Awaited<ReturnType<ReolinkBaichuanApi["createTalkSession"]>> | undefined;
15
- private ffmpeg: ChildProcessWithoutNullStreams | undefined;
16
- private stopping: Promise<void> | undefined;
17
- private loggedCodecInfo = false;
18
-
19
- private maxBacklogMs = DEFAULT_MAX_BACKLOG_MS;
20
- private maxBacklogBytes: number | undefined;
21
-
22
- private pcmBuffer: Buffer = Buffer.alloc(0);
19
+ private session:
20
+ | Awaited<ReturnType<ReolinkBaichuanApi["createDedicatedTalkSession"]>>
21
+ | undefined;
22
+ private ffmpeg: ChildProcessWithoutNullStreams | undefined;
23
+ private stopping: Promise<void> | undefined;
24
+ private loggedCodecInfo = false;
25
+
26
+ private maxBacklogMs = DEFAULT_MAX_BACKLOG_MS;
27
+ private maxBacklogBytes: number | undefined;
28
+
29
+ private pcmBuffer: Buffer = Buffer.alloc(0);
30
+
31
+ private pumping = false;
32
+ private pumpPromise: Promise<void> | undefined;
33
+
34
+ private lastBacklogClampLogAtMs = 0;
35
+
36
+ constructor(private camera: ReolinkCamera) {}
37
+
38
+ get blocksPerPayload(): number {
39
+ return Math.max(
40
+ 1,
41
+ Math.min(
42
+ 8,
43
+ this.camera.storageSettings.values.intercomBlocksPerPayload ?? 1,
44
+ ),
45
+ );
46
+ }
47
+
48
+ private get outputGain(): number {
49
+ const configured = Number(this.camera.storageSettings.values.intercomGain);
50
+ // Keep safe bounds: too high can clip and distort.
51
+ if (Number.isFinite(configured))
52
+ return Math.max(0.1, Math.min(10, configured));
53
+ return 1.0;
54
+ }
55
+
56
+ async start(media: MediaObject): Promise<void> {
57
+ const logger = this.camera.getBaichuanLogger();
58
+
59
+ const ffmpegInput =
60
+ await sdk.mediaManager.convertMediaObjectToJSON<FFmpegInput>(
61
+ media,
62
+ ScryptedMimeTypes.FFmpegInput,
63
+ );
64
+
65
+ await this.stop();
66
+ const channel = this.camera.storageSettings.values.rtspChannel;
67
+
68
+ try {
69
+ // Get the main API - library manages dedicated sockets internally
70
+ const api = await this.camera.withBaichuanRetry(async () => {
71
+ return await this.camera.ensureBaichuanClient();
72
+ });
73
+
74
+ // Best-effort: log codec requirements exposed by the camera.
75
+ // This mirrors neolink's source of truth: TalkAbility (cmd_id=10).
76
+ if (!this.loggedCodecInfo) {
77
+ this.loggedCodecInfo = true;
78
+ try {
79
+ const ability = await api.getTalkAbility(channel);
80
+ logger.log("Intercom TalkAbility", {
81
+ channel,
82
+ duplexList: ability.duplexList,
83
+ audioStreamModeList: ability.audioStreamModeList,
84
+ audioConfigList: ability.audioConfigList,
85
+ });
86
+ } catch (e) {
87
+ logger.warn(
88
+ "Intercom: unable to fetch TalkAbility",
89
+ e?.message || String(e),
90
+ );
91
+ }
92
+ }
23
93
 
24
- private pumping = false;
25
- private pumpPromise: Promise<void> | undefined;
94
+ // For UDP/battery cameras, wake up the camera if it's sleeping before creating talk session
95
+ if (this.camera.options?.type === "battery") {
96
+ try {
97
+ const sleepStatus = api.getSleepStatus({ channel });
98
+ if (sleepStatus.state === "sleeping") {
99
+ logger.log("Camera is sleeping, waking up for intercom...");
100
+ await api.wakeUp(channel, { waitAfterWakeMs: 2000 });
101
+ // Wait a bit more to ensure camera is fully awake
102
+ await new Promise((resolve) => setTimeout(resolve, 1000));
103
+ }
104
+ } catch (e) {
105
+ logger.debug(
106
+ "Failed to check/wake camera for intercom, proceeding anyway",
107
+ e?.message || String(e),
108
+ );
109
+ }
110
+ }
111
+
112
+ // Use createDedicatedTalkSession - library manages dedicated socket internally
113
+ // with auto-teardown on idle or when stop() is called
114
+ const session = await this.camera.withBaichuanRetry(async () => {
115
+ return await api.createDedicatedTalkSession(channel, {
116
+ blocksPerPayload: this.blocksPerPayload,
117
+ idleTimeoutMs: 30000, // Auto-teardown if no audio for 30s
118
+ deviceId: this.camera.nativeId,
119
+ logger,
120
+ });
121
+ });
122
+
123
+ this.session = session;
124
+ this.pcmBuffer = Buffer.alloc(0);
125
+ this.pumping = false;
126
+ this.pumpPromise = undefined;
127
+
128
+ const { audioConfig, blockSize, fullBlockSize } = session.info;
129
+ const sampleRate = audioConfig.sampleRate;
130
+
131
+ // Configurable backlog to trade latency vs stability.
132
+ // If the pipeline (ffmpeg decode + encode + send) can't keep up,
133
+ // dropping old audio avoids accumulating multi-second latency.
134
+ const configuredBacklog = Number(
135
+ this.camera.storageSettings.values.intercomMaxBacklogMs,
136
+ );
137
+ if (Number.isFinite(configuredBacklog)) {
138
+ this.maxBacklogMs = Math.max(20, Math.min(5000, configuredBacklog));
139
+ } else {
140
+ this.maxBacklogMs = DEFAULT_MAX_BACKLOG_MS;
141
+ }
142
+
143
+ // Mirror native-api.ts: receive PCM s16le from the forwarder and encode IMA ADPCM in JS.
144
+ const samplesPerBlock = blockSize * 2 + 1;
145
+ const bytesNeeded = samplesPerBlock * 2; // Int16 PCM
146
+ this.maxBacklogBytes = Math.max(
147
+ bytesNeeded,
148
+ // bytes/sec = sampleRate * channels * 2 (s16)
149
+ Math.floor((this.maxBacklogMs / 1000) * sampleRate * 1 * 2),
150
+ );
151
+
152
+ if (!Number.isFinite(sampleRate) || sampleRate <= 0) {
153
+ await this.stop();
154
+ throw new Error(`Invalid talk sampleRate: ${sampleRate}`);
155
+ }
156
+ if (
157
+ !Number.isFinite(blockSize) ||
158
+ blockSize <= 0 ||
159
+ !Number.isFinite(fullBlockSize) ||
160
+ fullBlockSize !== blockSize + 4
161
+ ) {
162
+ await this.stop();
163
+ throw new Error(
164
+ `Invalid talk block sizes: blockSize=${blockSize} fullBlockSize=${fullBlockSize}`,
165
+ );
166
+ }
167
+
168
+ logger.log("Starting intercom (baichuan/native-api flow)", {
169
+ channel,
170
+ audioType: audioConfig.audioType,
171
+ sampleRate: audioConfig.sampleRate,
172
+ samplePrecision: audioConfig.samplePrecision,
173
+ lengthPerEncoder: audioConfig.lengthPerEncoder,
174
+ soundTrack: audioConfig.soundTrack,
175
+ blockSize,
176
+ fullBlockSize,
177
+ samplesPerBlock,
178
+ bytesNeeded,
179
+ maxBacklogMs: this.maxBacklogMs,
180
+ maxBacklogBytes: this.maxBacklogBytes,
181
+ blocksPerPayload: this.blocksPerPayload,
182
+ });
183
+
184
+ // IMPORTANT: incoming audio from Scrypted/WebRTC is typically Opus.
185
+ // We must decode to PCM before IMA ADPCM encoding, otherwise it will be noise.
186
+ const gain = this.outputGain;
187
+ const ffmpegArgs = this.buildFfmpegPcmArgs(ffmpegInput, {
188
+ sampleRate,
189
+ channels: 1,
190
+ gain,
191
+ logger,
192
+ });
193
+
194
+ logger.log("Intercom ffmpeg decode args", ffmpegArgs);
195
+
196
+ const ffmpeg = spawn("ffmpeg", ffmpegArgs, {
197
+ stdio: ["ignore", "pipe", "pipe"],
198
+ });
199
+
200
+ if (this.session !== session) {
201
+ try {
202
+ ffmpeg.kill("SIGKILL");
203
+ } catch {}
204
+ return;
205
+ }
26
206
 
27
- private lastBacklogClampLogAtMs = 0;
207
+ this.ffmpeg = ffmpeg;
28
208
 
29
- constructor(private camera: ReolinkCamera) {
209
+ ffmpeg.stdout.on("data", (chunk: Buffer) => {
210
+ if (this.session !== session) return;
211
+ if (!chunk?.length) return;
212
+ this.enqueuePcm(session, chunk, bytesNeeded, blockSize);
213
+ });
214
+
215
+ let stderrLines = 0;
216
+ ffmpeg.stderr.on("data", (d: Buffer) => {
217
+ // Avoid spamming logs.
218
+ if (stderrLines++ < 12) {
219
+ logger.warn("Intercom ffmpeg", d.toString().trim());
220
+ }
221
+ });
222
+
223
+ ffmpeg.on("exit", (code, signal) => {
224
+ logger.warn(`Intercom ffmpeg exited code=${code} signal=${signal}`);
225
+ this.stop().catch(() => {});
226
+ });
227
+
228
+ logger.log("Intercom started (ffmpeg decode -> PCM -> IMA ADPCM)");
229
+ } catch (e) {
230
+ // Ensure the dedicated session gets torn down even if start fails half-way.
231
+ await this.stop();
232
+ throw e;
30
233
  }
234
+ }
31
235
 
32
- get blocksPerPayload(): number {
33
- return Math.max(1, Math.min(8, this.camera.storageSettings.values.intercomBlocksPerPayload ?? 1));
34
- }
236
+ stop(): Promise<void> {
237
+ if (this.stopping) return this.stopping;
35
238
 
36
- private get outputGain(): number {
37
- const configured = Number(this.camera.storageSettings.values.intercomGain);
38
- // Keep safe bounds: too high can clip and distort.
39
- if (Number.isFinite(configured)) return Math.max(0.1, Math.min(10, configured));
40
- return 1.0;
41
- }
239
+ this.stopping = (async () => {
240
+ const logger = this.camera.getBaichuanLogger();
42
241
 
43
- async start(media: MediaObject): Promise<void> {
44
- const logger = this.camera.getBaichuanLogger();
242
+ const ffmpeg = this.ffmpeg;
243
+ this.ffmpeg = undefined;
45
244
 
46
- const ffmpegInput = await sdk.mediaManager.convertMediaObjectToJSON<FFmpegInput>(
47
- media,
48
- ScryptedMimeTypes.FFmpegInput,
49
- );
245
+ const session = this.session;
246
+ this.session = undefined;
50
247
 
51
- await this.stop();
52
- const channel = this.camera.storageSettings.values.rtspChannel;
248
+ this.pcmBuffer = Buffer.alloc(0);
249
+
250
+ const sleepMs = async (ms: number) =>
251
+ new Promise<void>((resolve) => setTimeout(resolve, ms));
53
252
 
253
+ if (ffmpeg && ffmpeg.exitCode == null) {
54
254
  try {
55
- // IMPORTANT: intercom must run on its own independent Baichuan session (separate socket)
56
- // to avoid interference with any other sessions (streams/events/etc).
57
- const intercomStreamKey = `intercom_${Date.now()}_${Math.random().toString(16).slice(2)}`;
58
- const intercomApi = await this.camera.withBaichuanRetry(async () => {
59
- return await this.camera.createStreamClient(intercomStreamKey);
60
- });
61
- this.intercomApi = intercomApi;
62
-
63
- // Best-effort: log codec requirements exposed by the camera.
64
- // This mirrors neolink's source of truth: TalkAbility (cmd_id=10).
65
- if (!this.loggedCodecInfo) {
66
- this.loggedCodecInfo = true;
67
- try {
68
- const ability = await intercomApi.getTalkAbility(channel);
69
- const audioConfigs = ability.audioConfigList?.map((c) => ({
70
- audioType: c.audioType,
71
- sampleRate: c.sampleRate,
72
- samplePrecision: c.samplePrecision,
73
- lengthPerEncoder: c.lengthPerEncoder,
74
- soundTrack: c.soundTrack,
75
- }));
76
- logger.log("Intercom TalkAbility", {
77
- channel,
78
- duplexList: ability.duplexList,
79
- audioStreamModeList: ability.audioStreamModeList,
80
- audioConfigList: audioConfigs,
81
- });
82
- }
83
- catch (e) {
84
- logger.warn("Intercom: unable to fetch TalkAbility", e?.message || String(e));
85
- }
255
+ ffmpeg.kill("SIGKILL");
256
+ } catch {
257
+ // ignore
86
258
  }
87
259
 
88
- const session = await this.camera.withBaichuanRetry(async () => {
89
- const api = intercomApi;
90
-
91
- // For UDP/battery cameras, wake up the camera if it's sleeping before creating talk session
92
- if (this.camera.options?.type === 'battery') {
93
- try {
94
- const sleepStatus = api.getSleepStatus({ channel });
95
- if (sleepStatus.state === 'sleeping') {
96
- logger.log('Camera is sleeping, waking up for intercom...');
97
- await api.wakeUp(channel, { waitAfterWakeMs: 2000 });
98
- // Wait a bit more to ensure camera is fully awake
99
- await new Promise(resolve => setTimeout(resolve, 1000));
100
- }
101
- } catch (e) {
102
- logger.debug('Failed to check/wake camera for intercom, proceeding anyway', e?.message || String(e));
103
- }
104
- }
105
-
106
- return await api.createTalkSession(channel, {
107
- blocksPerPayload: this.blocksPerPayload,
108
- // IMPORTANT: for dedicated intercom sessions, teardown should be owned by the socket/session.
109
- // This mirrors stream behavior (closeApiOnTeardown) but for talk: session.stop() will close.
110
- closeSocketOnStop: true,
111
- });
112
- });
113
-
114
- this.session = session;
115
- this.pcmBuffer = Buffer.alloc(0);
116
- this.pumping = false;
117
- this.pumpPromise = undefined;
118
-
119
- const { audioConfig, blockSize, fullBlockSize } = session.info;
120
- const sampleRate = audioConfig.sampleRate;
121
-
122
- // Configurable backlog to trade latency vs stability.
123
- // If the pipeline (ffmpeg decode + encode + send) can't keep up,
124
- // dropping old audio avoids accumulating multi-second latency.
125
- const configuredBacklog = Number(this.camera.storageSettings.values.intercomMaxBacklogMs);
126
- if (Number.isFinite(configuredBacklog)) {
127
- this.maxBacklogMs = Math.max(20, Math.min(5000, configuredBacklog));
128
- }
129
- else {
130
- this.maxBacklogMs = DEFAULT_MAX_BACKLOG_MS;
260
+ try {
261
+ await Promise.race([
262
+ new Promise<void>((resolve) =>
263
+ ffmpeg.once("exit", () => resolve()),
264
+ ),
265
+ sleepMs(1000),
266
+ ]);
267
+ } catch {
268
+ // ignore
131
269
  }
270
+ }
132
271
 
133
- // Mirror native-api.ts: receive PCM s16le from the forwarder and encode IMA ADPCM in JS.
134
- const samplesPerBlock = blockSize * 2 + 1;
135
- const bytesNeeded = samplesPerBlock * 2; // Int16 PCM
136
- this.maxBacklogBytes = Math.max(
137
- bytesNeeded,
138
- // bytes/sec = sampleRate * channels * 2 (s16)
139
- Math.floor((this.maxBacklogMs / 1000) * sampleRate * 1 * 2),
140
- );
272
+ try {
273
+ await Promise.race([
274
+ this.pumpPromise ?? Promise.resolve(),
275
+ sleepMs(250),
276
+ ]);
277
+ } catch {
278
+ // ignore
279
+ }
280
+ this.pumpPromise = undefined;
141
281
 
142
- if (!Number.isFinite(sampleRate) || sampleRate <= 0) {
143
- await this.stop();
144
- throw new Error(`Invalid talk sampleRate: ${sampleRate}`);
145
- }
146
- if (!Number.isFinite(blockSize) || blockSize <= 0 || !Number.isFinite(fullBlockSize) || fullBlockSize !== blockSize + 4) {
147
- await this.stop();
148
- throw new Error(`Invalid talk block sizes: blockSize=${blockSize} fullBlockSize=${fullBlockSize}`);
282
+ // session.stop() handles socket teardown - library manages dedicated socket internally
283
+ if (session) {
284
+ try {
285
+ await Promise.race([session.stop(), sleepMs(2000)]);
286
+ } catch (e) {
287
+ logger.warn("Intercom session stop error", e?.message || String(e));
149
288
  }
150
-
151
- logger.log("Starting intercom (baichuan/native-api flow)", {
152
- channel,
153
- audioType: audioConfig.audioType,
154
- sampleRate: audioConfig.sampleRate,
155
- samplePrecision: audioConfig.samplePrecision,
156
- lengthPerEncoder: audioConfig.lengthPerEncoder,
157
- soundTrack: audioConfig.soundTrack,
158
- blockSize,
159
- fullBlockSize,
160
- samplesPerBlock,
161
- bytesNeeded,
162
- maxBacklogMs: this.maxBacklogMs,
163
- maxBacklogBytes: this.maxBacklogBytes,
164
- blocksPerPayload: this.blocksPerPayload,
165
- });
166
-
167
- // IMPORTANT: incoming audio from Scrypted/WebRTC is typically Opus.
168
- // We must decode to PCM before IMA ADPCM encoding, otherwise it will be noise.
169
- const gain = this.outputGain;
170
- const ffmpegArgs = this.buildFfmpegPcmArgs(ffmpegInput, {
171
- sampleRate,
172
- channels: 1,
173
- gain,
174
- logger,
289
+ }
290
+ })().finally(() => {
291
+ this.stopping = undefined;
292
+ });
293
+
294
+ return this.stopping;
295
+ }
296
+
297
+ private clamp16(x: number): number {
298
+ if (x > 32767) return 32767;
299
+ if (x < -32768) return -32768;
300
+ return x | 0;
301
+ }
302
+
303
+ private enqueuePcm(
304
+ session: Awaited<ReturnType<ReolinkBaichuanApi["createTalkSession"]>>,
305
+ pcmChunk: Buffer,
306
+ bytesNeeded: number,
307
+ blockSize: number,
308
+ ): void {
309
+ const logger = this.camera.getBaichuanLogger();
310
+
311
+ if (this.session !== session) return;
312
+
313
+ this.pcmBuffer = this.pcmBuffer.length
314
+ ? Buffer.concat([this.pcmBuffer, pcmChunk])
315
+ : pcmChunk;
316
+
317
+ // Cap backlog to keep latency bounded (drop oldest samples).
318
+ // IMPORTANT: do this on the shared buffer (not in a promise chain),
319
+ // otherwise old PCM chunks can pile up in queued closures and bypass
320
+ // this clamp, causing multi-second latency and degraded audio.
321
+ const maxBytes = this.maxBacklogBytes ?? bytesNeeded;
322
+ if (this.pcmBuffer.length > maxBytes) {
323
+ // Align to 16-bit samples.
324
+ const keep = maxBytes - (maxBytes % 2);
325
+ const dropped = this.pcmBuffer.length - keep;
326
+ this.pcmBuffer = this.pcmBuffer.subarray(this.pcmBuffer.length - keep);
327
+
328
+ const now = Date.now();
329
+ if (now - this.lastBacklogClampLogAtMs > 2000) {
330
+ this.lastBacklogClampLogAtMs = now;
331
+ logger.warn("Intercom backlog clamped (dropping PCM)", {
332
+ droppedBytes: dropped,
333
+ keptBytes: keep,
334
+ maxBytes,
175
335
  });
336
+ }
337
+ }
176
338
 
177
- logger.log("Intercom ffmpeg decode args", ffmpegArgs);
178
-
179
- const ffmpeg = spawn("ffmpeg", ffmpegArgs, {
180
- stdio: ["ignore", "pipe", "pipe"],
181
- });
182
-
183
- if (this.session !== session) {
184
- try { ffmpeg.kill("SIGKILL"); } catch { }
185
- return;
186
- }
187
-
188
- this.ffmpeg = ffmpeg;
339
+ if (this.pumping) return;
189
340
 
190
- ffmpeg.stdout.on("data", (chunk: Buffer) => {
191
- if (this.session !== session) return;
192
- if (!chunk?.length) return;
193
- this.enqueuePcm(session, chunk, bytesNeeded, blockSize);
194
- });
341
+ this.pumping = true;
342
+ this.pumpPromise = (async () => {
343
+ try {
344
+ while (true) {
345
+ if (this.session !== session) return;
346
+ if (this.pcmBuffer.length < bytesNeeded) return;
195
347
 
196
- let stderrLines = 0;
197
- ffmpeg.stderr.on("data", (d: Buffer) => {
198
- // Avoid spamming logs.
199
- if (stderrLines++ < 12) {
200
- logger.warn("Intercom ffmpeg", d.toString().trim());
201
- }
202
- });
348
+ const chunk = this.pcmBuffer.subarray(0, bytesNeeded);
349
+ this.pcmBuffer = this.pcmBuffer.subarray(bytesNeeded);
203
350
 
204
- ffmpeg.on("exit", (code, signal) => {
205
- logger.warn(`Intercom ffmpeg exited code=${code} signal=${signal}`);
206
- this.stop().catch(() => { });
207
- });
351
+ const pcmSamples = new Int16Array(
352
+ chunk.buffer,
353
+ chunk.byteOffset,
354
+ chunk.length / 2,
355
+ );
208
356
 
209
- logger.log("Intercom started (ffmpeg decode -> PCM -> IMA ADPCM)");
357
+ const adpcmChunk = this.encodeImaAdpcm(pcmSamples, blockSize);
358
+ await session.sendAudio(adpcmChunk);
210
359
  }
211
- catch (e) {
212
- // Ensure the dedicated session gets torn down even if start fails half-way.
213
- await this.stop();
214
- throw e;
360
+ } catch (e) {
361
+ logger.warn(
362
+ "Intercom PCM->ADPCM pipeline error",
363
+ e?.message || String(e),
364
+ );
365
+ } finally {
366
+ this.pumping = false;
367
+ }
368
+ })();
369
+ }
370
+
371
+ private buildFfmpegPcmArgs(
372
+ ffmpegInput: FFmpegInput,
373
+ options: {
374
+ sampleRate: number;
375
+ channels: number;
376
+ gain?: number;
377
+ logger?: any;
378
+ },
379
+ ): string[] {
380
+ const inputArgs = ffmpegInput.inputArguments ?? [];
381
+
382
+ // FFmpegInput may already contain one or more "-i" entries.
383
+ // For intercom decode, we only need a single input and only the first audio stream.
384
+ const sanitizedArgs: string[] = [];
385
+ let chosenInput: string | undefined;
386
+
387
+ for (let i = 0; i < inputArgs.length; i++) {
388
+ const arg = inputArgs[i];
389
+ if (arg === "-i") {
390
+ const maybeUrl = inputArgs[i + 1];
391
+ if (typeof maybeUrl === "string") {
392
+ if (!chosenInput) {
393
+ chosenInput = maybeUrl;
394
+ }
395
+ // Skip all inputs after the first.
396
+ i++;
397
+ continue;
215
398
  }
216
- }
217
-
218
- stop(): Promise<void> {
219
- if (this.stopping) return this.stopping;
220
-
221
- this.stopping = (async () => {
222
- const logger = this.camera.getBaichuanLogger();
223
-
224
- const ffmpeg = this.ffmpeg;
225
- this.ffmpeg = undefined;
226
-
227
- const session = this.session;
228
- this.session = undefined;
229
-
230
- const intercomApi = this.intercomApi;
231
- this.intercomApi = undefined;
232
-
233
- this.pcmBuffer = Buffer.alloc(0);
234
-
235
- const sleepMs = async (ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms));
236
-
237
- if (ffmpeg && ffmpeg.exitCode == null) {
238
- try {
239
- ffmpeg.kill("SIGKILL");
240
- }
241
- catch {
242
- // ignore
243
- }
244
-
245
- try {
246
- await Promise.race([
247
- new Promise<void>((resolve) => ffmpeg.once("exit", () => resolve())),
248
- sleepMs(1000),
249
- ]);
250
- }
251
- catch {
252
- // ignore
253
- }
254
- }
255
-
256
- try {
257
- await Promise.race([this.pumpPromise ?? Promise.resolve(), sleepMs(250)]);
258
- }
259
- catch {
260
- // ignore
261
- }
262
- this.pumpPromise = undefined;
263
-
264
- if (session) {
265
- try {
266
- await Promise.race([session.stop(), sleepMs(2000)]);
267
- }
268
- catch (e) {
269
- logger.warn("Intercom session stop error", e?.message || String(e));
270
- }
271
- }
272
-
273
- // Socket teardown is handled by session.stop() (closeSocketOnStop).
274
- // Fallback cleanup: if we never created a session but we did create a dedicated client,
275
- // ensure it doesn't leak.
276
- if (!session && intercomApi) {
277
- try {
278
- await Promise.race([intercomApi.close(), sleepMs(2000)]);
279
- }
280
- catch (e) {
281
- logger.warn("Intercom client close error", e?.message || String(e));
282
- }
283
- }
284
- })().finally(() => {
285
- this.stopping = undefined;
286
- });
399
+ }
287
400
 
288
- return this.stopping;
401
+ sanitizedArgs.push(arg);
289
402
  }
290
403
 
291
- private clamp16(x: number): number {
292
- if (x > 32767) return 32767;
293
- if (x < -32768) return -32768;
294
- return x | 0;
404
+ const url = chosenInput ?? ffmpegInput.url;
405
+ if (!url) {
406
+ throw new Error("FFmpegInput missing url/input");
295
407
  }
296
408
 
297
- private enqueuePcm(
298
- session: Awaited<ReturnType<ReolinkBaichuanApi["createTalkSession"]>>,
299
- pcmChunk: Buffer,
300
- bytesNeeded: number,
301
- blockSize: number,
302
- ): void {
303
- const logger = this.camera.getBaichuanLogger();
304
-
305
- if (this.session !== session) return;
306
-
307
- this.pcmBuffer = this.pcmBuffer.length
308
- ? Buffer.concat([this.pcmBuffer, pcmChunk])
309
- : pcmChunk;
310
-
311
- // Cap backlog to keep latency bounded (drop oldest samples).
312
- // IMPORTANT: do this on the shared buffer (not in a promise chain),
313
- // otherwise old PCM chunks can pile up in queued closures and bypass
314
- // this clamp, causing multi-second latency and degraded audio.
315
- const maxBytes = this.maxBacklogBytes ?? bytesNeeded;
316
- if (this.pcmBuffer.length > maxBytes) {
317
- // Align to 16-bit samples.
318
- const keep = maxBytes - (maxBytes % 2);
319
- const dropped = this.pcmBuffer.length - keep;
320
- this.pcmBuffer = this.pcmBuffer.subarray(this.pcmBuffer.length - keep);
321
-
322
- const now = Date.now();
323
- if (now - this.lastBacklogClampLogAtMs > 2000) {
324
- this.lastBacklogClampLogAtMs = now;
325
- logger.warn("Intercom backlog clamped (dropping PCM)", {
326
- droppedBytes: dropped,
327
- keptBytes: keep,
328
- maxBytes,
329
- });
330
- }
409
+ const gain = options.gain ?? 1.0;
410
+ const hasExistingAudioFilter =
411
+ sanitizedArgs.includes("-af") ||
412
+ sanitizedArgs.includes("-filter:a") ||
413
+ sanitizedArgs.includes("-filter_complex");
414
+ const gainArgs =
415
+ gain !== 1.0
416
+ ? hasExistingAudioFilter
417
+ ? (options.logger?.warn?.(
418
+ "Intercom gain skipped: FFmpegInput already contains audio filters",
419
+ ) ?? undefined,
420
+ [])
421
+ : ["-filter:a", `volume=${gain}`]
422
+ : [];
423
+
424
+ return [
425
+ ...sanitizedArgs,
426
+ "-i",
427
+ url,
428
+ // Ensure we only decode the first input's audio stream.
429
+ "-map",
430
+ "0:a:0?",
431
+
432
+ // Low-latency decode settings.
433
+ "-fflags",
434
+ "nobuffer",
435
+ "-flags",
436
+ "low_delay",
437
+ "-flush_packets",
438
+ "1",
439
+
440
+ "-vn",
441
+ "-sn",
442
+ "-dn",
443
+ ...gainArgs,
444
+ "-acodec",
445
+ "pcm_s16le",
446
+ "-ar",
447
+ options.sampleRate.toString(),
448
+ "-ac",
449
+ options.channels.toString(),
450
+ "-f",
451
+ "s16le",
452
+ "pipe:1",
453
+ ];
454
+ }
455
+
456
+ private encodeImaAdpcm(pcm: Int16Array, blockSizeBytes: number): Buffer {
457
+ const samplesPerBlock = blockSizeBytes * 2 + 1;
458
+ const totalBlocks = Math.ceil(pcm.length / samplesPerBlock);
459
+ const outBlocks: Buffer[] = [];
460
+
461
+ const imaIndexTable = Int8Array.from([
462
+ -1, -1, -1, -1, 2, 4, 6, 8, -1, -1, -1, -1, 2, 4, 6, 8,
463
+ ]);
464
+
465
+ const imaStepTable = Int16Array.from([
466
+ 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 23, 25, 28, 31, 34, 37, 41,
467
+ 45, 50, 55, 60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157, 173, 190,
468
+ 209, 230, 253, 279, 307, 337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
469
+ 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066, 2272, 2499,
470
+ 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845,
471
+ 8630, 9493, 10442, 11487, 12635, 13899, 15289, 16818, 18500, 20350, 22385,
472
+ 24623, 27086, 29794, 32767,
473
+ ]);
474
+
475
+ let sampleIndex = 0;
476
+
477
+ for (let b = 0; b < totalBlocks; b++) {
478
+ const block = Buffer.alloc(4 + blockSizeBytes);
479
+
480
+ // Block header
481
+ const first = pcm[sampleIndex] ?? 0;
482
+ let predictor = first;
483
+ let index = 0;
484
+
485
+ block.writeInt16LE(predictor, 0);
486
+ block.writeUInt8(index, 2);
487
+ block.writeUInt8(0, 3);
488
+
489
+ sampleIndex++;
490
+
491
+ // Encode samples into nibbles
492
+ const codes = new Uint8Array(blockSizeBytes * 2);
493
+ for (let i = 0; i < codes.length; i++) {
494
+ const sample = pcm[sampleIndex] ?? predictor;
495
+ sampleIndex++;
496
+
497
+ let diff = sample - predictor;
498
+ let sign = 0;
499
+ if (diff < 0) {
500
+ sign = 8;
501
+ diff = -diff;
331
502
  }
332
503
 
333
- if (this.pumping) return;
334
-
335
- this.pumping = true;
336
- this.pumpPromise = (async () => {
337
- try {
338
- while (true) {
339
- if (this.session !== session) return;
340
- if (this.pcmBuffer.length < bytesNeeded) return;
341
-
342
- const chunk = this.pcmBuffer.subarray(0, bytesNeeded);
343
- this.pcmBuffer = this.pcmBuffer.subarray(bytesNeeded);
344
-
345
- const pcmSamples = new Int16Array(
346
- chunk.buffer,
347
- chunk.byteOffset,
348
- chunk.length / 2,
349
- );
350
-
351
- const adpcmChunk = this.encodeImaAdpcm(pcmSamples, blockSize);
352
- await session.sendAudio(adpcmChunk);
353
- }
354
- }
355
- catch (e) {
356
- logger.warn("Intercom PCM->ADPCM pipeline error", e?.message || String(e));
357
- }
358
- finally {
359
- this.pumping = false;
360
- }
361
- })();
362
- }
504
+ let step = imaStepTable[index] ?? 7;
505
+ let delta = 0;
506
+ let vpdiff = step >> 3;
363
507
 
364
- private buildFfmpegPcmArgs(
365
- ffmpegInput: FFmpegInput,
366
- options: {
367
- sampleRate: number;
368
- channels: number;
369
- gain?: number;
370
- logger?: any;
371
- },
372
- ): string[] {
373
- const inputArgs = ffmpegInput.inputArguments ?? [];
374
-
375
- // FFmpegInput may already contain one or more "-i" entries.
376
- // For intercom decode, we only need a single input and only the first audio stream.
377
- const sanitizedArgs: string[] = [];
378
- let chosenInput: string | undefined;
379
-
380
- for (let i = 0; i < inputArgs.length; i++) {
381
- const arg = inputArgs[i];
382
- if (arg === "-i") {
383
- const maybeUrl = inputArgs[i + 1];
384
- if (typeof maybeUrl === "string") {
385
- if (!chosenInput) {
386
- chosenInput = maybeUrl;
387
- }
388
- // Skip all inputs after the first.
389
- i++;
390
- continue;
391
- }
392
- }
393
-
394
- sanitizedArgs.push(arg);
508
+ if (diff >= step) {
509
+ delta |= 4;
510
+ diff -= step;
511
+ vpdiff += step;
395
512
  }
396
-
397
- const url = chosenInput ?? ffmpegInput.url;
398
- if (!url) {
399
- throw new Error("FFmpegInput missing url/input");
513
+ step >>= 1;
514
+ if (diff >= step) {
515
+ delta |= 2;
516
+ diff -= step;
517
+ vpdiff += step;
518
+ }
519
+ step >>= 1;
520
+ if (diff >= step) {
521
+ delta |= 1;
522
+ vpdiff += step;
400
523
  }
401
524
 
402
- const gain = options.gain ?? 1.0;
403
- const hasExistingAudioFilter = sanitizedArgs.includes("-af") || sanitizedArgs.includes("-filter:a") || sanitizedArgs.includes("-filter_complex");
404
- const gainArgs = (gain !== 1.0)
405
- ? (
406
- hasExistingAudioFilter
407
- ? (options.logger?.warn?.("Intercom gain skipped: FFmpegInput already contains audio filters") ?? undefined, [])
408
- : ["-filter:a", `volume=${gain}`]
409
- )
410
- : [];
411
-
412
- return [
413
- ...sanitizedArgs,
414
- "-i", url,
415
- // Ensure we only decode the first input's audio stream.
416
- "-map", "0:a:0?",
417
-
418
- // Low-latency decode settings.
419
- "-fflags", "nobuffer",
420
- "-flags", "low_delay",
421
- "-flush_packets", "1",
422
-
423
- "-vn", "-sn", "-dn",
424
- ...gainArgs,
425
- "-acodec", "pcm_s16le",
426
- "-ar", options.sampleRate.toString(),
427
- "-ac", options.channels.toString(),
428
- "-f", "s16le",
429
- "pipe:1",
430
- ];
431
- }
525
+ if (sign) predictor -= vpdiff;
526
+ else predictor += vpdiff;
432
527
 
433
- private encodeImaAdpcm(pcm: Int16Array, blockSizeBytes: number): Buffer {
434
- const samplesPerBlock = blockSizeBytes * 2 + 1;
435
- const totalBlocks = Math.ceil(pcm.length / samplesPerBlock);
436
- const outBlocks: Buffer[] = [];
528
+ predictor = this.clamp16(predictor);
437
529
 
438
- const imaIndexTable = Int8Array.from([
439
- -1, -1, -1, -1, 2, 4, 6, 8,
440
- -1, -1, -1, -1, 2, 4, 6, 8,
441
- ]);
530
+ index += imaIndexTable[delta] ?? 0;
531
+ if (index < 0) index = 0;
532
+ if (index > 88) index = 88;
442
533
 
443
- const imaStepTable = Int16Array.from([
444
- 7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
445
- 19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
446
- 50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
447
- 130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
448
- 337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
449
- 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
450
- 2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
451
- 5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
452
- 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767,
453
- ]);
534
+ codes[i] = (delta | sign) & 0x0f;
535
+ }
454
536
 
455
- let sampleIndex = 0;
456
-
457
- for (let b = 0; b < totalBlocks; b++) {
458
- const block = Buffer.alloc(4 + blockSizeBytes);
459
-
460
- // Block header
461
- const first = pcm[sampleIndex] ?? 0;
462
- let predictor = first;
463
- let index = 0;
464
-
465
- block.writeInt16LE(predictor, 0);
466
- block.writeUInt8(index, 2);
467
- block.writeUInt8(0, 3);
468
-
469
- sampleIndex++;
470
-
471
- // Encode samples into nibbles
472
- const codes = new Uint8Array(blockSizeBytes * 2);
473
- for (let i = 0; i < codes.length; i++) {
474
- const sample = pcm[sampleIndex] ?? predictor;
475
- sampleIndex++;
476
-
477
- let diff = sample - predictor;
478
- let sign = 0;
479
- if (diff < 0) {
480
- sign = 8;
481
- diff = -diff;
482
- }
483
-
484
- let step = imaStepTable[index] ?? 7;
485
- let delta = 0;
486
- let vpdiff = step >> 3;
487
-
488
- if (diff >= step) {
489
- delta |= 4;
490
- diff -= step;
491
- vpdiff += step;
492
- }
493
- step >>= 1;
494
- if (diff >= step) {
495
- delta |= 2;
496
- diff -= step;
497
- vpdiff += step;
498
- }
499
- step >>= 1;
500
- if (diff >= step) {
501
- delta |= 1;
502
- vpdiff += step;
503
- }
504
-
505
- if (sign) predictor -= vpdiff;
506
- else predictor += vpdiff;
507
-
508
- predictor = this.clamp16(predictor);
509
-
510
- index += imaIndexTable[delta] ?? 0;
511
- if (index < 0) index = 0;
512
- if (index > 88) index = 88;
513
-
514
- codes[i] = (delta | sign) & 0x0f;
515
- }
516
-
517
- // Pack nibble: low nibble first, then high nibble
518
- for (let i = 0; i < blockSizeBytes; i++) {
519
- const lo = codes[i * 2] ?? 0;
520
- const hi = codes[i * 2 + 1] ?? 0;
521
- block[4 + i] = (lo & 0x0f) | ((hi & 0x0f) << 4);
522
- }
523
-
524
- outBlocks.push(block);
525
- }
537
+ // Pack nibble: low nibble first, then high nibble
538
+ for (let i = 0; i < blockSizeBytes; i++) {
539
+ const lo = codes[i * 2] ?? 0;
540
+ const hi = codes[i * 2 + 1] ?? 0;
541
+ block[4 + i] = (lo & 0x0f) | ((hi & 0x0f) << 4);
542
+ }
526
543
 
527
- return Buffer.concat(outBlocks);
544
+ outBlocks.push(block);
528
545
  }
546
+
547
+ return Buffer.concat(outBlocks);
548
+ }
529
549
  }