dspx 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dspx",
3
- "version": "1.3.3",
3
+ "version": "1.3.5",
4
4
  "description": "High-performance DSP library with native C++ acceleration and Redis state persistence",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
Binary file
@@ -51,6 +51,30 @@ namespace dsp
51
51
  #include <cstdlib>
52
52
  #include "utils/Toon.h"
53
53
 
54
+ // SIMD optimizations for timestamp interpolation
55
+ // Priority: AVX2 (8-wide) > SSE (4-wide) > NEON (4-wide) > Scalar
56
+ #if defined(__AVX2__) || (defined(_MSC_VER) && defined(__AVX2__))
57
+ #include <immintrin.h>
58
+ #define HAS_AVX2 1
59
+ #define HAS_SSE 0
60
+ #define HAS_NEON 0
61
+ #elif defined(__SSE__) || defined(__SSE2__) || (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
62
+ #include <emmintrin.h> // SSE2
63
+ #include <xmmintrin.h> // SSE
64
+ #define HAS_AVX2 0
65
+ #define HAS_SSE 1
66
+ #define HAS_NEON 0
67
+ #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
68
+ #include <arm_neon.h>
69
+ #define HAS_AVX2 0
70
+ #define HAS_SSE 0
71
+ #define HAS_NEON 1
72
+ #else
73
+ #define HAS_AVX2 0
74
+ #define HAS_SSE 0
75
+ #define HAS_NEON 0
76
+ #endif
77
+
54
78
  namespace dsp
55
79
  {
56
80
 
@@ -84,9 +108,13 @@ namespace dsp
84
108
  DspPipeline::DspPipeline(const Napi::CallbackInfo &info)
85
109
  : Napi::ObjectWrap<DspPipeline>(info)
86
110
  {
111
+ // std::cout << "[DEBUG] DspPipeline::Constructor - this=" << this
112
+ // << ", creating pipeline" << std::endl;
87
113
  // Initialize the lock
88
114
  m_isBusy = std::make_shared<std::atomic<bool>>(false);
115
+ // std::cout << "[DEBUG] DspPipeline::Constructor - m_isBusy=" << m_isBusy.get() << std::endl;
89
116
  InitializeStageFactories();
117
+ // std::cout << "[DEBUG] DspPipeline::Constructor - complete, this=" << this << std::endl;
90
118
  }
91
119
 
92
120
  /**
@@ -1146,22 +1174,26 @@ namespace dsp
1146
1174
  Napi::Value DspPipeline::AddStage(const Napi::CallbackInfo &info)
1147
1175
  {
1148
1176
  Napi::Env env = info.Env();
1177
+ // std::cout << "[DEBUG] DspPipeline::AddStage - this=" << this << std::endl;
1149
1178
 
1150
1179
  // Check if pipeline is disposed
1151
1180
  if (m_disposed)
1152
1181
  {
1182
+ // std::cout << "[DEBUG] AddStage - pipeline disposed, this=" << this << std::endl;
1153
1183
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
1154
1184
  return env.Undefined();
1155
1185
  }
1156
1186
 
1157
1187
  if (*m_isBusy)
1158
1188
  {
1189
+ // std::cout << "[DEBUG] AddStage - pipeline busy, this=" << this << std::endl;
1159
1190
  Napi::Error::New(env, "Cannot add stage while processing").ThrowAsJavaScriptException();
1160
1191
  return env.Undefined();
1161
1192
  }
1162
1193
 
1163
1194
  // 1. Get arguments from TypeScript
1164
1195
  std::string stageName = info[0].As<Napi::String>();
1196
+ // std::cout << "[DEBUG] AddStage - stageName=" << stageName << ", this=" << this << std::endl;
1165
1197
  Napi::Object params = info[1].As<Napi::Object>();
1166
1198
 
1167
1199
  // 2. Look up the stage factory in the map
@@ -1202,10 +1234,12 @@ namespace dsp
1202
1234
  Napi::Value DspPipeline::AddFilterStage(const Napi::CallbackInfo &info)
1203
1235
  {
1204
1236
  Napi::Env env = info.Env();
1237
+ // std::cout << "[DEBUG] DspPipeline::AddFilterStage - this=" << this << std::endl;
1205
1238
 
1206
1239
  // Check if pipeline is disposed
1207
1240
  if (m_disposed)
1208
1241
  {
1242
+ // std::cout << "[DEBUG] AddFilterStage - pipeline disposed, this=" << this << std::endl;
1209
1243
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
1210
1244
  return env.Undefined();
1211
1245
  }
@@ -1249,6 +1283,569 @@ namespace dsp
1249
1283
  return env.Undefined();
1250
1284
  }
1251
1285
 
1286
+ /**
1287
+ * SIMD-optimized timestamp interpolation for resizing stages
1288
+ * Multi-platform support:
1289
+ * - AVX2 (x86_64): 8-wide vectorization
1290
+ * - SSE2 (x86): 4-wide vectorization
1291
+ * - NEON (ARM): 4-wide vectorization
1292
+ * - Scalar fallback for all other platforms
1293
+ *
1294
+ * @param timestamps Source timestamp array (channel-major layout)
1295
+ * @param prevNumSamples Number of samples in source
1296
+ * @param prevChannels Number of channels in source
1297
+ * @param numOutputSamples Number of samples to generate
1298
+ * @param outputChannels Number of channels in output
1299
+ * @param timeScale Time scaling factor from stage
1300
+ * @param output Output timestamp vector
1301
+ */
1302
+ inline void interpolateTimestampsSIMD(
1303
+ const float *timestamps,
1304
+ size_t prevNumSamples,
1305
+ int prevChannels,
1306
+ size_t numOutputSamples,
1307
+ int outputChannels,
1308
+ double timeScale,
1309
+ std::vector<float> &output)
1310
+ {
1311
+ #if HAS_AVX2
1312
+ // ========================================
1313
+ // AVX2 Implementation (8-wide)
1314
+ // ========================================
1315
+ // Process 8 output samples at a time with AVX2
1316
+ const size_t simdWidth = 8;
1317
+ const size_t simdIterations = numOutputSamples / simdWidth;
1318
+ const size_t remainder = numOutputSamples % simdWidth;
1319
+
1320
+ // Precompute constants for SIMD
1321
+ const __m256 vTimeScale = _mm256_set1_ps(static_cast<float>(timeScale));
1322
+ const __m256i vPrevChannels = _mm256_set1_epi32(prevChannels);
1323
+ const __m256 vPrevNumSamples = _mm256_set1_ps(static_cast<float>(prevNumSamples));
1324
+ const __m256 vOne = _mm256_set1_ps(1.0f);
1325
+
1326
+ // SIMD loop: Process 8 timestamps at once
1327
+ for (size_t iter = 0; iter < simdIterations; ++iter)
1328
+ {
1329
+ size_t baseIdx = iter * simdWidth;
1330
+
1331
+ // Generate indices: [baseIdx, baseIdx+1, ..., baseIdx+7]
1332
+ __m256 vIdx = _mm256_set_ps(
1333
+ static_cast<float>(baseIdx + 7),
1334
+ static_cast<float>(baseIdx + 6),
1335
+ static_cast<float>(baseIdx + 5),
1336
+ static_cast<float>(baseIdx + 4),
1337
+ static_cast<float>(baseIdx + 3),
1338
+ static_cast<float>(baseIdx + 2),
1339
+ static_cast<float>(baseIdx + 1),
1340
+ static_cast<float>(baseIdx + 0));
1341
+
1342
+ // Calculate input time: i * timeScale
1343
+ __m256 vInputTime = _mm256_mul_ps(vIdx, vTimeScale);
1344
+
1345
+ // Extract integer and fractional parts
1346
+ __m256i vInputIdx = _mm256_cvttps_epi32(vInputTime);
1347
+ __m256 vInputIdxFloat = _mm256_cvtepi32_ps(vInputIdx);
1348
+ __m256 vFrac = _mm256_sub_ps(vInputTime, vInputIdxFloat);
1349
+
1350
+ // Process each of the 8 values (can't easily vectorize the conditional logic)
1351
+ alignas(32) float inputTimes[8];
1352
+ alignas(32) int inputIndices[8];
1353
+ alignas(32) float fracs[8];
1354
+
1355
+ _mm256_store_ps(inputTimes, vInputTime);
1356
+ _mm256_store_si256((__m256i *)inputIndices, vInputIdx);
1357
+ _mm256_store_ps(fracs, vFrac);
1358
+
1359
+ for (size_t j = 0; j < simdWidth; ++j)
1360
+ {
1361
+ size_t i = baseIdx + j;
1362
+ size_t inputIdx = inputIndices[j];
1363
+ float frac = fracs[j];
1364
+ float timestamp;
1365
+
1366
+ if (inputIdx >= prevNumSamples)
1367
+ {
1368
+ size_t lastIdx = prevNumSamples - 1;
1369
+ timestamp = timestamps[lastIdx * prevChannels] +
1370
+ static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
1371
+ }
1372
+ else if (inputIdx + 1 >= prevNumSamples)
1373
+ {
1374
+ timestamp = timestamps[inputIdx * prevChannels];
1375
+ }
1376
+ else
1377
+ {
1378
+ float t0 = timestamps[inputIdx * prevChannels];
1379
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1380
+ timestamp = t0 + frac * (t1 - t0);
1381
+ }
1382
+
1383
+ // Replicate timestamp across all output channels
1384
+ for (int ch = 0; ch < outputChannels; ++ch)
1385
+ {
1386
+ output[i * outputChannels + ch] = timestamp;
1387
+ }
1388
+ }
1389
+ }
1390
+
1391
+ // Handle remainder samples with scalar code
1392
+ for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
1393
+ {
1394
+ double inputTime = i * timeScale;
1395
+ size_t inputIdx = static_cast<size_t>(inputTime);
1396
+ double frac = inputTime - inputIdx;
1397
+ float timestamp;
1398
+
1399
+ if (inputIdx >= prevNumSamples)
1400
+ {
1401
+ size_t lastIdx = prevNumSamples - 1;
1402
+ timestamp = timestamps[lastIdx * prevChannels] +
1403
+ static_cast<float>((inputTime - lastIdx) * timeScale);
1404
+ }
1405
+ else if (inputIdx + 1 >= prevNumSamples)
1406
+ {
1407
+ timestamp = timestamps[inputIdx * prevChannels];
1408
+ }
1409
+ else
1410
+ {
1411
+ float t0 = timestamps[inputIdx * prevChannels];
1412
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1413
+ timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1414
+ }
1415
+
1416
+ for (int ch = 0; ch < outputChannels; ++ch)
1417
+ {
1418
+ output[i * outputChannels + ch] = timestamp;
1419
+ }
1420
+ }
1421
+ #elif HAS_SSE
1422
+ // ========================================
1423
+ // SSE2 Implementation (4-wide)
1424
+ // ========================================
1425
+ const size_t simdWidth = 4;
1426
+ const size_t simdIterations = numOutputSamples / simdWidth;
1427
+
1428
+ const __m128 vTimeScale = _mm_set1_ps(static_cast<float>(timeScale));
1429
+ const __m128 vPrevNumSamples = _mm_set1_ps(static_cast<float>(prevNumSamples));
1430
+
1431
+ for (size_t iter = 0; iter < simdIterations; ++iter)
1432
+ {
1433
+ size_t baseIdx = iter * simdWidth;
1434
+
1435
+ // Generate indices [baseIdx, baseIdx+1, baseIdx+2, baseIdx+3]
1436
+ alignas(16) float indices[4] = {
1437
+ static_cast<float>(baseIdx),
1438
+ static_cast<float>(baseIdx + 1),
1439
+ static_cast<float>(baseIdx + 2),
1440
+ static_cast<float>(baseIdx + 3)};
1441
+ __m128 vIndices = _mm_load_ps(indices);
1442
+ __m128 vInputTime = _mm_mul_ps(vIndices, vTimeScale);
1443
+
1444
+ // Convert to int and back to get integer part
1445
+ __m128i vInputIdx = _mm_cvttps_epi32(vInputTime);
1446
+ __m128 vInputIdxFloat = _mm_cvtepi32_ps(vInputIdx);
1447
+ __m128 vFrac = _mm_sub_ps(vInputTime, vInputIdxFloat);
1448
+
1449
+ // Store for scalar processing
1450
+ alignas(16) float inputTimes[4];
1451
+ _mm_store_ps(inputTimes, vInputTime);
1452
+ alignas(16) int inputIndices[4];
1453
+ _mm_store_si128(reinterpret_cast<__m128i *>(inputIndices), vInputIdx);
1454
+ alignas(16) float fractions[4];
1455
+ _mm_store_ps(fractions, vFrac);
1456
+
1457
+ // Process each sample
1458
+ for (size_t j = 0; j < simdWidth; ++j)
1459
+ {
1460
+ size_t i = baseIdx + j;
1461
+ size_t inputIdx = inputIndices[j];
1462
+ double frac = fractions[j];
1463
+ float timestamp;
1464
+
1465
+ if (inputIdx >= prevNumSamples)
1466
+ {
1467
+ size_t lastIdx = prevNumSamples - 1;
1468
+ timestamp = timestamps[lastIdx * prevChannels] +
1469
+ static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
1470
+ }
1471
+ else if (inputIdx + 1 >= prevNumSamples)
1472
+ {
1473
+ timestamp = timestamps[inputIdx * prevChannels];
1474
+ }
1475
+ else
1476
+ {
1477
+ float t0 = timestamps[inputIdx * prevChannels];
1478
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1479
+ timestamp = t0 + frac * (t1 - t0);
1480
+ }
1481
+
1482
+ for (int ch = 0; ch < outputChannels; ++ch)
1483
+ {
1484
+ output[i * outputChannels + ch] = timestamp;
1485
+ }
1486
+ }
1487
+ }
1488
+
1489
+ // Handle remainder
1490
+ for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
1491
+ {
1492
+ double inputTime = i * timeScale;
1493
+ size_t inputIdx = static_cast<size_t>(inputTime);
1494
+ double frac = inputTime - inputIdx;
1495
+ float timestamp;
1496
+
1497
+ if (inputIdx >= prevNumSamples)
1498
+ {
1499
+ size_t lastIdx = prevNumSamples - 1;
1500
+ timestamp = timestamps[lastIdx * prevChannels] +
1501
+ static_cast<float>((inputTime - lastIdx) * timeScale);
1502
+ }
1503
+ else if (inputIdx + 1 >= prevNumSamples)
1504
+ {
1505
+ timestamp = timestamps[inputIdx * prevChannels];
1506
+ }
1507
+ else
1508
+ {
1509
+ float t0 = timestamps[inputIdx * prevChannels];
1510
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1511
+ timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1512
+ }
1513
+
1514
+ for (int ch = 0; ch < outputChannels; ++ch)
1515
+ {
1516
+ output[i * outputChannels + ch] = timestamp;
1517
+ }
1518
+ }
1519
+ #elif HAS_NEON
1520
+ // ========================================
1521
+ // ARM NEON Implementation (4-wide)
1522
+ // ========================================
1523
+ const size_t simdWidth = 4;
1524
+ const size_t simdIterations = numOutputSamples / simdWidth;
1525
+
1526
+ const float32x4_t vTimeScale = vdupq_n_f32(static_cast<float>(timeScale));
1527
+ const float32x4_t vPrevNumSamples = vdupq_n_f32(static_cast<float>(prevNumSamples));
1528
+
1529
+ for (size_t iter = 0; iter < simdIterations; ++iter)
1530
+ {
1531
+ size_t baseIdx = iter * simdWidth;
1532
+
1533
+ // Generate indices
1534
+ alignas(16) float indices[4] = {
1535
+ static_cast<float>(baseIdx),
1536
+ static_cast<float>(baseIdx + 1),
1537
+ static_cast<float>(baseIdx + 2),
1538
+ static_cast<float>(baseIdx + 3)};
1539
+ float32x4_t vIndices = vld1q_f32(indices);
1540
+ float32x4_t vInputTime = vmulq_f32(vIndices, vTimeScale);
1541
+
1542
+ // Extract integer and fractional parts
1543
+ int32x4_t vInputIdx = vcvtq_s32_f32(vInputTime);
1544
+ float32x4_t vInputIdxFloat = vcvtq_f32_s32(vInputIdx);
1545
+ float32x4_t vFrac = vsubq_f32(vInputTime, vInputIdxFloat);
1546
+
1547
+ // Store for processing
1548
+ alignas(16) float inputTimes[4];
1549
+ vst1q_f32(inputTimes, vInputTime);
1550
+ alignas(16) int inputIndices[4];
1551
+ vst1q_s32(inputIndices, vInputIdx);
1552
+ alignas(16) float fractions[4];
1553
+ vst1q_f32(fractions, vFrac);
1554
+
1555
+ // Process each sample
1556
+ for (size_t j = 0; j < simdWidth; ++j)
1557
+ {
1558
+ size_t i = baseIdx + j;
1559
+ size_t inputIdx = inputIndices[j];
1560
+ double frac = fractions[j];
1561
+ float timestamp;
1562
+
1563
+ if (inputIdx >= prevNumSamples)
1564
+ {
1565
+ size_t lastIdx = prevNumSamples - 1;
1566
+ timestamp = timestamps[lastIdx * prevChannels] +
1567
+ static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
1568
+ }
1569
+ else if (inputIdx + 1 >= prevNumSamples)
1570
+ {
1571
+ timestamp = timestamps[inputIdx * prevChannels];
1572
+ }
1573
+ else
1574
+ {
1575
+ float t0 = timestamps[inputIdx * prevChannels];
1576
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1577
+ timestamp = t0 + frac * (t1 - t0);
1578
+ }
1579
+
1580
+ for (int ch = 0; ch < outputChannels; ++ch)
1581
+ {
1582
+ output[i * outputChannels + ch] = timestamp;
1583
+ }
1584
+ }
1585
+ }
1586
+
1587
+ // Handle remainder
1588
+ for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
1589
+ {
1590
+ double inputTime = i * timeScale;
1591
+ size_t inputIdx = static_cast<size_t>(inputTime);
1592
+ double frac = inputTime - inputIdx;
1593
+ float timestamp;
1594
+
1595
+ if (inputIdx >= prevNumSamples)
1596
+ {
1597
+ size_t lastIdx = prevNumSamples - 1;
1598
+ timestamp = timestamps[lastIdx * prevChannels] +
1599
+ static_cast<float>((inputTime - lastIdx) * timeScale);
1600
+ }
1601
+ else if (inputIdx + 1 >= prevNumSamples)
1602
+ {
1603
+ timestamp = timestamps[inputIdx * prevChannels];
1604
+ }
1605
+ else
1606
+ {
1607
+ float t0 = timestamps[inputIdx * prevChannels];
1608
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1609
+ timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1610
+ }
1611
+
1612
+ for (int ch = 0; ch < outputChannels; ++ch)
1613
+ {
1614
+ output[i * outputChannels + ch] = timestamp;
1615
+ }
1616
+ }
1617
+ #elif HAS_SSE
1618
+ // ========================================
1619
+ // SSE2 Implementation (4-wide)
1620
+ // ========================================
1621
+ const size_t simdWidth = 4;
1622
+ const size_t simdIterations = numOutputSamples / simdWidth;
1623
+
1624
+ const __m128 vTimeScale = _mm_set1_ps(static_cast<float>(timeScale));
1625
+ const __m128 vPrevNumSamples = _mm_set1_ps(static_cast<float>(prevNumSamples));
1626
+
1627
+ for (size_t iter = 0; iter < simdIterations; ++iter)
1628
+ {
1629
+ size_t baseIdx = iter * simdWidth;
1630
+
1631
+ // Generate indices [baseIdx, baseIdx+1, baseIdx+2, baseIdx+3]
1632
+ alignas(16) float indices[4] = {
1633
+ static_cast<float>(baseIdx),
1634
+ static_cast<float>(baseIdx + 1),
1635
+ static_cast<float>(baseIdx + 2),
1636
+ static_cast<float>(baseIdx + 3)};
1637
+ __m128 vIndices = _mm_load_ps(indices);
1638
+ __m128 vInputTime = _mm_mul_ps(vIndices, vTimeScale);
1639
+
1640
+ // Convert to int and back to get integer part
1641
+ __m128i vInputIdx = _mm_cvttps_epi32(vInputTime);
1642
+ __m128 vInputIdxFloat = _mm_cvtepi32_ps(vInputIdx);
1643
+ __m128 vFrac = _mm_sub_ps(vInputTime, vInputIdxFloat);
1644
+
1645
+ // Store for scalar processing
1646
+ alignas(16) float inputTimes[4];
1647
+ _mm_store_ps(inputTimes, vInputTime);
1648
+ alignas(16) int inputIndices[4];
1649
+ _mm_store_si128(reinterpret_cast<__m128i *>(inputIndices), vInputIdx);
1650
+ alignas(16) float fractions[4];
1651
+ _mm_store_ps(fractions, vFrac);
1652
+
1653
+ // Process each sample
1654
+ for (size_t j = 0; j < simdWidth; ++j)
1655
+ {
1656
+ size_t i = baseIdx + j;
1657
+ size_t inputIdx = inputIndices[j];
1658
+ double frac = fractions[j];
1659
+ float timestamp;
1660
+
1661
+ if (inputIdx >= prevNumSamples)
1662
+ {
1663
+ size_t lastIdx = prevNumSamples - 1;
1664
+ timestamp = timestamps[lastIdx * prevChannels] +
1665
+ static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
1666
+ }
1667
+ else if (inputIdx + 1 >= prevNumSamples)
1668
+ {
1669
+ timestamp = timestamps[inputIdx * prevChannels];
1670
+ }
1671
+ else
1672
+ {
1673
+ float t0 = timestamps[inputIdx * prevChannels];
1674
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1675
+ timestamp = t0 + frac * (t1 - t0);
1676
+ }
1677
+
1678
+ for (int ch = 0; ch < outputChannels; ++ch)
1679
+ {
1680
+ output[i * outputChannels + ch] = timestamp;
1681
+ }
1682
+ }
1683
+ }
1684
+
1685
+ // Handle remainder
1686
+ for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
1687
+ {
1688
+ double inputTime = i * timeScale;
1689
+ size_t inputIdx = static_cast<size_t>(inputTime);
1690
+ double frac = inputTime - inputIdx;
1691
+ float timestamp;
1692
+
1693
+ if (inputIdx >= prevNumSamples)
1694
+ {
1695
+ size_t lastIdx = prevNumSamples - 1;
1696
+ timestamp = timestamps[lastIdx * prevChannels] +
1697
+ static_cast<float>((inputTime - lastIdx) * timeScale);
1698
+ }
1699
+ else if (inputIdx + 1 >= prevNumSamples)
1700
+ {
1701
+ timestamp = timestamps[inputIdx * prevChannels];
1702
+ }
1703
+ else
1704
+ {
1705
+ float t0 = timestamps[inputIdx * prevChannels];
1706
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1707
+ timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1708
+ }
1709
+
1710
+ for (int ch = 0; ch < outputChannels; ++ch)
1711
+ {
1712
+ output[i * outputChannels + ch] = timestamp;
1713
+ }
1714
+ }
1715
+ #elif HAS_NEON
1716
+ // ========================================
1717
+ // ARM NEON Implementation (4-wide)
1718
+ // ========================================
1719
+ const size_t simdWidth = 4;
1720
+ const size_t simdIterations = numOutputSamples / simdWidth;
1721
+
1722
+ const float32x4_t vTimeScale = vdupq_n_f32(static_cast<float>(timeScale));
1723
+ const float32x4_t vPrevNumSamples = vdupq_n_f32(static_cast<float>(prevNumSamples));
1724
+
1725
+ for (size_t iter = 0; iter < simdIterations; ++iter)
1726
+ {
1727
+ size_t baseIdx = iter * simdWidth;
1728
+
1729
+ // Generate indices
1730
+ alignas(16) float indices[4] = {
1731
+ static_cast<float>(baseIdx),
1732
+ static_cast<float>(baseIdx + 1),
1733
+ static_cast<float>(baseIdx + 2),
1734
+ static_cast<float>(baseIdx + 3)};
1735
+ float32x4_t vIndices = vld1q_f32(indices);
1736
+ float32x4_t vInputTime = vmulq_f32(vIndices, vTimeScale);
1737
+
1738
+ // Extract integer and fractional parts
1739
+ int32x4_t vInputIdx = vcvtq_s32_f32(vInputTime);
1740
+ float32x4_t vInputIdxFloat = vcvtq_f32_s32(vInputIdx);
1741
+ float32x4_t vFrac = vsubq_f32(vInputTime, vInputIdxFloat);
1742
+
1743
+ // Store for processing
1744
+ alignas(16) float inputTimes[4];
1745
+ vst1q_f32(inputTimes, vInputTime);
1746
+ alignas(16) int inputIndices[4];
1747
+ vst1q_s32(inputIndices, vInputIdx);
1748
+ alignas(16) float fractions[4];
1749
+ vst1q_f32(fractions, vFrac);
1750
+
1751
+ // Process each sample
1752
+ for (size_t j = 0; j < simdWidth; ++j)
1753
+ {
1754
+ size_t i = baseIdx + j;
1755
+ size_t inputIdx = inputIndices[j];
1756
+ double frac = fractions[j];
1757
+ float timestamp;
1758
+
1759
+ if (inputIdx >= prevNumSamples)
1760
+ {
1761
+ size_t lastIdx = prevNumSamples - 1;
1762
+ timestamp = timestamps[lastIdx * prevChannels] +
1763
+ static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
1764
+ }
1765
+ else if (inputIdx + 1 >= prevNumSamples)
1766
+ {
1767
+ timestamp = timestamps[inputIdx * prevChannels];
1768
+ }
1769
+ else
1770
+ {
1771
+ float t0 = timestamps[inputIdx * prevChannels];
1772
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1773
+ timestamp = t0 + frac * (t1 - t0);
1774
+ }
1775
+
1776
+ for (int ch = 0; ch < outputChannels; ++ch)
1777
+ {
1778
+ output[i * outputChannels + ch] = timestamp;
1779
+ }
1780
+ }
1781
+ }
1782
+
1783
+ // Handle remainder
1784
+ for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
1785
+ {
1786
+ double inputTime = i * timeScale;
1787
+ size_t inputIdx = static_cast<size_t>(inputTime);
1788
+ double frac = inputTime - inputIdx;
1789
+ float timestamp;
1790
+
1791
+ if (inputIdx >= prevNumSamples)
1792
+ {
1793
+ size_t lastIdx = prevNumSamples - 1;
1794
+ timestamp = timestamps[lastIdx * prevChannels] +
1795
+ static_cast<float>((inputTime - lastIdx) * timeScale);
1796
+ }
1797
+ else if (inputIdx + 1 >= prevNumSamples)
1798
+ {
1799
+ timestamp = timestamps[inputIdx * prevChannels];
1800
+ }
1801
+ else
1802
+ {
1803
+ float t0 = timestamps[inputIdx * prevChannels];
1804
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1805
+ timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1806
+ }
1807
+
1808
+ for (int ch = 0; ch < outputChannels; ++ch)
1809
+ {
1810
+ output[i * outputChannels + ch] = timestamp;
1811
+ }
1812
+ }
1813
+ #else
1814
+ // ========================================
1815
+ // Scalar Fallback (universal)
1816
+ // ========================================
1817
+ for (size_t i = 0; i < numOutputSamples; ++i)
1818
+ {
1819
+ double inputTime = i * timeScale;
1820
+ size_t inputIdx = static_cast<size_t>(inputTime);
1821
+ double frac = inputTime - inputIdx;
1822
+ float timestamp;
1823
+
1824
+ if (inputIdx >= prevNumSamples)
1825
+ {
1826
+ size_t lastIdx = prevNumSamples - 1;
1827
+ timestamp = timestamps[lastIdx * prevChannels] +
1828
+ static_cast<float>((inputTime - lastIdx) * timeScale);
1829
+ }
1830
+ else if (inputIdx + 1 >= prevNumSamples)
1831
+ {
1832
+ timestamp = timestamps[inputIdx * prevChannels];
1833
+ }
1834
+ else
1835
+ {
1836
+ float t0 = timestamps[inputIdx * prevChannels];
1837
+ float t1 = timestamps[(inputIdx + 1) * prevChannels];
1838
+ timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1839
+ }
1840
+
1841
+ for (int ch = 0; ch < outputChannels; ++ch)
1842
+ {
1843
+ output[i * outputChannels + ch] = timestamp;
1844
+ }
1845
+ }
1846
+ #endif
1847
+ }
1848
+
1252
1849
  /**
1253
1850
  * AsyncWorker for processing DSP pipeline in background thread
1254
1851
  */
@@ -1278,34 +1875,46 @@ namespace dsp
1278
1875
  m_timestampRef(std::move(timestampRef)),
1279
1876
  m_busyLock(busyLock)
1280
1877
  {
1878
+ // std::cout << "[DEBUG] ProcessWorker::ProcessWorker - this=" << this << std::endl;
1879
+ m_stageCount = m_stages.size();
1880
+ m_stageTypes.reserve(m_stageCount);
1881
+ for (const auto &stage : m_stages)
1882
+ {
1883
+ m_stageTypes.push_back(stage->getType());
1884
+ }
1281
1885
  }
1282
1886
 
1283
1887
  protected:
1284
1888
  // This runs on a worker thread (not blocking the event loop)
1285
1889
  void Execute() override
1286
1890
  {
1287
- // Local storage for generated timestamps (RAII - automatically freed when function exits)
1891
+ // std::cout << "[DEBUG] ProcessWorker::Execute - START, this=" << this
1892
+ // << ", data=" << m_data << ", numSamples=" << m_numSamples
1893
+ // << ", channels=" << m_channels << std::endl;
1894
+ // std::cout << "[WORKER-" << std::this_thread::get_id() << "] Execute START (stages="
1895
+ // << m_stages.size() << ")" << std::endl;
1896
+
1897
+ // CRITICAL FIX: Use a unique_ptr for timestamp ownership
1288
1898
  std::vector<float> generatedTimestamps;
1899
+ std::unique_ptr<std::vector<float>> allocatedTimestamps;
1289
1900
 
1290
1901
  try
1291
1902
  {
1292
- // 1. Generate Timestamps if missing (Optimization)
1903
+ // 1. Generate Timestamps if missing
1293
1904
  if (m_timestamps == nullptr)
1294
1905
  {
1295
- generatedTimestamps.resize(m_numSamples);
1906
+ // std::cout << "[DEBUG] Execute - generating timestamps, sampleRate=" << m_sampleRate << std::endl;
1296
1907
 
1297
- // Calculate time step (dt) in milliseconds
1298
- // If sampleRate is 0 or invalid, default to 1.0 (treating indices as time)
1908
+ generatedTimestamps.resize(m_numSamples);
1299
1909
  double dt = (m_sampleRate > 0.0) ? (1000.0 / m_sampleRate) : 1.0;
1300
1910
 
1301
- // Fill timestamps linearly: t[i] = i * dt
1302
1911
  for (size_t i = 0; i < m_numSamples; ++i)
1303
1912
  {
1304
1913
  generatedTimestamps[i] = static_cast<float>(i * dt);
1305
1914
  }
1306
1915
 
1307
- // Point the main processing pointer to our locally generated data
1308
1916
  m_timestamps = generatedTimestamps.data();
1917
+ // std::cout << "[DEBUG] Execute - timestamps generated, addr=" << m_timestamps << std::endl;
1309
1918
  }
1310
1919
 
1311
1920
  // 2. Process the buffer through all stages
@@ -1315,79 +1924,105 @@ namespace dsp
1315
1924
  bool usingTempBuffer = false;
1316
1925
 
1317
1926
  const bool debugStageDumps = std::getenv("DSPX_DEBUG_STAGE_DUMPS") != nullptr;
1318
- for (const auto &stage : m_stages)
1927
+
1928
+ // std::cout << "[DEBUG] Execute - processing through " << m_stages.size() << " stages" << std::endl;
1929
+ for (size_t stageIdx = 0; stageIdx < m_stages.size(); ++stageIdx)
1319
1930
  {
1931
+ const auto &stage = m_stages[stageIdx];
1932
+
1933
+ // std::cout << "[DEBUG] Execute - stage " << stageIdx << ", type="
1934
+ // << stage->getType() << ", addr=" << stage.get()
1935
+ // << ", isResizing=" << stage->isResizing() << std::endl;
1936
+
1320
1937
  if (stage->isResizing())
1321
1938
  {
1322
- // Resizing logic (same as before)
1939
+ // Calculate output size
1323
1940
  size_t outputSize = stage->calculateOutputSize(currentSize);
1324
1941
  float *outputBuffer = new float[outputSize];
1325
1942
 
1943
+ // std::cout << "[DEBUG] Execute - allocated output buffer, size=" << outputSize
1944
+ // << ", addr=" << outputBuffer << std::endl;
1945
+
1946
+ // CRITICAL: Save the PREVIOUS size before processResizing updates currentSize
1947
+ size_t prevSize = currentSize;
1948
+
1326
1949
  size_t actualOutputSize = 0;
1327
1950
  stage->processResizing(currentBuffer, currentSize,
1328
1951
  outputBuffer, actualOutputSize,
1329
1952
  m_channels, m_timestamps);
1330
1953
 
1331
- if (usingTempBuffer)
1332
- delete[] currentBuffer;
1954
+ // std::cout << "[DEBUG] Execute - stage " << stageIdx << " resized: "
1955
+ // << prevSize << " -> " << actualOutputSize // Use prevSize!
1956
+ // << ", buffer=" << outputBuffer << std::endl;
1957
+
1958
+ // Free previous temp buffer if we owned it
1959
+ if (usingTempBuffer && tempBuffer != nullptr)
1960
+ {
1961
+ // std::cout << "[DEBUG] Execute - freeing previous temp buffer=" << tempBuffer << std::endl;
1962
+ delete[] tempBuffer;
1963
+ }
1964
+
1965
+ // Update buffer tracking
1966
+ tempBuffer = outputBuffer;
1333
1967
  currentBuffer = outputBuffer;
1334
1968
  currentSize = actualOutputSize;
1335
1969
  usingTempBuffer = true;
1336
1970
 
1971
+ // Save previous channel count BEFORE updating
1972
+ int prevChannels = m_channels;
1973
+
1974
+ // Update channel count if stage changed it
1337
1975
  int outputChannels = stage->getOutputChannels();
1338
1976
  if (outputChannels > 0)
1977
+ {
1978
+ // std::cout << "[DEBUG] Execute - channels changed: " << m_channels
1979
+ // << " -> " << outputChannels << std::endl;
1339
1980
  m_channels = outputChannels;
1981
+ }
1340
1982
 
1341
- // Re-interpolate timestamps if needed (same as before)
1983
+ // Re-interpolate timestamps if needed
1342
1984
  if (m_timestamps != nullptr)
1343
1985
  {
1986
+ // std::cout << "[DEBUG] Execute - reinterpolating timestamps" << std::endl;
1987
+
1344
1988
  double timeScale = stage->getTimeScaleFactor();
1345
1989
  size_t numOutputSamples = actualOutputSize / m_channels;
1346
- float *newTimestamps = new float[actualOutputSize];
1347
1990
 
1348
- for (size_t i = 0; i < numOutputSamples; ++i)
1349
- {
1350
- double inputTime = i * timeScale;
1351
- size_t inputIdx = static_cast<size_t>(inputTime);
1352
- double frac = inputTime - inputIdx;
1353
- float timestamp;
1354
-
1355
- if (inputIdx >= (currentSize / m_channels))
1356
- {
1357
- size_t lastIdx = (currentSize / m_channels) - 1;
1358
- timestamp = m_timestamps[lastIdx * m_channels] +
1359
- static_cast<float>((inputTime - lastIdx) * timeScale);
1360
- }
1361
- else if (inputIdx + 1 >= (currentSize / m_channels))
1362
- {
1363
- timestamp = m_timestamps[inputIdx * m_channels];
1364
- }
1365
- else
1366
- {
1367
- float t0 = m_timestamps[inputIdx * m_channels];
1368
- float t1 = m_timestamps[(inputIdx + 1) * m_channels];
1369
- timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
1370
- }
1371
-
1372
- for (int ch = 0; ch < m_channels; ++ch)
1373
- {
1374
- newTimestamps[i * m_channels + ch] = timestamp;
1375
- }
1376
- }
1377
- m_timestamps = newTimestamps;
1378
- m_timestampBuffer.reset(newTimestamps);
1991
+ // CRITICAL FIX: Use prevSize and prevChannels!
1992
+ size_t prevNumSamples = prevSize / prevChannels;
1993
+
1994
+ // Create new timestamp vector
1995
+ auto newTimestamps = std::make_unique<std::vector<float>>(actualOutputSize);
1996
+
1997
+ // Use SIMD-optimized interpolation
1998
+ interpolateTimestampsSIMD(
1999
+ m_timestamps,
2000
+ prevNumSamples,
2001
+ prevChannels,
2002
+ numOutputSamples,
2003
+ m_channels,
2004
+ timeScale,
2005
+ *newTimestamps);
2006
+
2007
+ // CRITICAL FIX: Transfer ownership safely
2008
+ allocatedTimestamps = std::move(newTimestamps);
2009
+ m_timestamps = allocatedTimestamps->data();
2010
+
2011
+ // std::cout << "[DEBUG] Execute - timestamps reinterpolated (SIMD), new addr="
2012
+ // << m_timestamps << std::endl;
1379
2013
  }
1380
2014
  }
1381
2015
  else
1382
2016
  {
1383
2017
  // In-place processing
2018
+ // std::cout << "[DEBUG] Execute - stage " << stageIdx << " in-place processing" << std::endl;
1384
2019
  stage->process(currentBuffer, currentSize, m_channels, m_timestamps);
1385
2020
 
1386
2021
  if (debugStageDumps)
1387
2022
  {
1388
2023
  const char *stype = stage->getType();
1389
2024
  size_t toShow = std::min<size_t>(8, currentSize);
1390
- std::cout << "[DUMP] after '" << stype << "':";
2025
+ // std::cout << "[DUMP] after '" << stype << "':";
1391
2026
  for (size_t i = 0; i < toShow; ++i)
1392
2027
  {
1393
2028
  std::cout << (i == 0 ? ' ' : ',') << currentBuffer[i];
@@ -1400,16 +2035,22 @@ namespace dsp
1400
2035
  m_finalBuffer = currentBuffer;
1401
2036
  m_finalSize = currentSize;
1402
2037
  m_ownsBuffer = usingTempBuffer;
2038
+
2039
+ // std::cout << "[DEBUG] Execute - COMPLETE, finalBuffer=" << m_finalBuffer
2040
+ // << ", finalSize=" << m_finalSize << ", ownsBuffer=" << m_ownsBuffer << std::endl;
1403
2041
  }
1404
2042
  catch (const std::exception &e)
1405
2043
  {
2044
+ // std::cout << "[DEBUG] Execute - EXCEPTION: " << e.what() << ", this=" << this << std::endl;
2045
+ // std::cout << "[WORKER-" << std::this_thread::get_id() << "] EXCEPTION: " << e.what() << std::endl;
1406
2046
  SetError(e.what());
1407
2047
  }
1408
- }
2048
+ } // This runs on the main thread after Execute() completes
1409
2049
 
1410
- // This runs on the main thread after Execute() completes
1411
2050
  void OnOK() override
1412
2051
  {
2052
+ // std::cout << "[DEBUG] ProcessWorker::OnOK - START, this=" << this
2053
+ // << ", finalBuffer=" << (void *)m_finalBuffer << ", finalSize=" << m_finalSize << std::endl;
1413
2054
  *m_busyLock = false; // unlock the pipeline
1414
2055
 
1415
2056
  Napi::Env env = Env();
@@ -1423,22 +2064,29 @@ namespace dsp
1423
2064
  // Clean up temporary buffer if we allocated one
1424
2065
  if (m_ownsBuffer)
1425
2066
  {
2067
+ // std::cout << "[DEBUG] OnOK - deleting temp buffer=" << (void *)m_finalBuffer << std::endl;
1426
2068
  delete[] m_finalBuffer;
1427
2069
  }
1428
2070
 
2071
+ // std::cout << "[DEBUG] OnOK - COMPLETE, resolving promise, this=" << this << std::endl;
1429
2072
  // Resolve the promise with the processed buffer
1430
2073
  m_deferred.Resolve(outputArray);
1431
2074
  }
1432
2075
 
1433
2076
  void OnError(const Napi::Error &error) override
1434
2077
  {
2078
+ // std::cout << "[DEBUG] ProcessWorker::OnError - this=" << this
2079
+ // << ", error=" << error.Message() << std::endl;
1435
2080
  m_deferred.Reject(error.Value());
1436
2081
  *m_busyLock = false; // unlock the pipeline
2082
+ // std::cout << "[DEBUG] OnError - COMPLETE, this=" << this << std::endl;
1437
2083
  }
1438
2084
 
1439
2085
  private:
1440
2086
  Napi::Promise::Deferred m_deferred;
1441
2087
  std::vector<std::unique_ptr<IDspStage>> &m_stages;
2088
+ size_t m_stageCount;
2089
+ std::vector<std::string> m_stageTypes;
1442
2090
  float *m_data;
1443
2091
  float *m_timestamps;
1444
2092
  double m_sampleRate;
@@ -1469,16 +2117,19 @@ namespace dsp
1469
2117
  Napi::Value DspPipeline::ProcessAsync(const Napi::CallbackInfo &info)
1470
2118
  {
1471
2119
  Napi::Env env = info.Env();
2120
+ // std::cout << "[DEBUG] DspPipeline::ProcessAsync - this=" << this << std::endl;
1472
2121
 
1473
2122
  // Check if pipeline is disposed
1474
2123
  if (m_disposed)
1475
2124
  {
2125
+ // std::cout << "[DEBUG] ProcessAsync - pipeline disposed, this=" << this << std::endl;
1476
2126
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
1477
2127
  return env.Undefined();
1478
2128
  }
1479
2129
 
1480
2130
  if (*m_isBusy)
1481
2131
  {
2132
+ // std::cout << "[DEBUG] ProcessAsync - pipeline busy, this=" << this << std::endl;
1482
2133
  Napi::Error::New(env, "Pipeline is busy: Cannot call process() while another operation is running.").ThrowAsJavaScriptException();
1483
2134
  return env.Undefined();
1484
2135
  }
@@ -1547,8 +2198,13 @@ namespace dsp
1547
2198
  }
1548
2199
 
1549
2200
  *m_isBusy = true; // lock the pipeline
2201
+ // std::cout << "[DEBUG] ProcessAsync - creating worker, data=" << (void *)data
2202
+ // << ", numSamples=" << numSamples << ", channels=" << channels
2203
+ // << ", this=" << this << std::endl;
1550
2204
 
1551
2205
  ProcessWorker *worker = new ProcessWorker(env, std::move(deferred), m_stages, data, timestamps, sampleRate, numSamples, channels, std::move(bufferRef), std::move(timestampRef), m_isBusy);
2206
+ // std::cout << "[DEBUG] ProcessAsync - queuing worker=" << (void *)worker
2207
+ // << ", this=" << this << std::endl;
1552
2208
  worker->Queue();
1553
2209
 
1554
2210
  return promise;
@@ -1566,16 +2222,19 @@ namespace dsp
1566
2222
  Napi::Value DspPipeline::ProcessSync(const Napi::CallbackInfo &info)
1567
2223
  {
1568
2224
  Napi::Env env = info.Env();
2225
+ // std::cout << "[DEBUG] DspPipeline::ProcessSync - this=" << this << std::endl;
1569
2226
 
1570
2227
  // Check if pipeline is disposed
1571
2228
  if (m_disposed)
1572
2229
  {
2230
+ // std::cout << "[DEBUG] ProcessSync - pipeline disposed, this=" << this << std::endl;
1573
2231
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
1574
2232
  return env.Undefined();
1575
2233
  }
1576
2234
 
1577
2235
  if (*m_isBusy)
1578
2236
  {
2237
+ // std::cout << "[DEBUG] ProcessSync - pipeline busy, this=" << this << std::endl;
1579
2238
  Napi::Error::New(env, "Pipeline is busy: Cannot call processSync() while an async operation is running.").ThrowAsJavaScriptException();
1580
2239
  return env.Undefined();
1581
2240
  }
@@ -1702,10 +2361,13 @@ namespace dsp
1702
2361
  Napi::Value DspPipeline::SaveState(const Napi::CallbackInfo &info)
1703
2362
  {
1704
2363
  Napi::Env env = info.Env();
2364
+ // std::cout << "[DEBUG] DspPipeline::SaveState - this=" << this
2365
+ // << ", stages=" << m_stages.size() << std::endl;
1705
2366
 
1706
2367
  // Check if pipeline is disposed
1707
2368
  if (m_disposed)
1708
2369
  {
2370
+ // std::cout << "[DEBUG] SaveState - pipeline disposed, this=" << this << std::endl;
1709
2371
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
1710
2372
  return env.Undefined();
1711
2373
  }
@@ -1799,10 +2461,12 @@ namespace dsp
1799
2461
  Napi::Value DspPipeline::LoadState(const Napi::CallbackInfo &info)
1800
2462
  {
1801
2463
  Napi::Env env = info.Env();
1802
-
2464
+ // std::cout << "[DEBUG] DspPipeline::LoadState - this=" << this
2465
+ // << ", current stages=" << m_stages.size() << std::endl;
1803
2466
  // Check if pipeline is disposed
1804
2467
  if (m_disposed)
1805
2468
  {
2469
+ // std::cout << "[DEBUG] LoadState - pipeline disposed, this=" << this << std::endl;
1806
2470
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
1807
2471
  return env.Undefined();
1808
2472
  }
@@ -2046,21 +2710,27 @@ namespace dsp
2046
2710
  Napi::Value DspPipeline::ClearState(const Napi::CallbackInfo &info)
2047
2711
  {
2048
2712
  Napi::Env env = info.Env();
2713
+ // std::cout << "[DEBUG] DspPipeline::ClearState - this=" << this
2714
+ // << ", stages=" << m_stages.size() << std::endl;
2049
2715
 
2050
2716
  // Check if pipeline is disposed
2051
2717
  if (m_disposed)
2052
2718
  {
2719
+ // std::cout << "[DEBUG] ClearState - pipeline disposed, this=" << this << std::endl;
2053
2720
  Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
2054
2721
  return env.Undefined();
2055
2722
  }
2056
2723
 
2057
2724
  // Reset all stages
2058
- for (auto &stage : m_stages)
2725
+ for (size_t i = 0; i < m_stages.size(); ++i)
2059
2726
  {
2060
- stage->reset();
2727
+ // std::cout << "[DEBUG] ClearState - resetting stage " << i
2728
+ // << ", addr=" << m_stages[i].get() << std::endl;
2729
+ m_stages[i]->reset();
2061
2730
  }
2062
2731
 
2063
- std::cout << "Pipeline state cleared (" << m_stages.size() << " stages reset)" << std::endl;
2732
+ // std::cout << "[DEBUG] Pipeline state cleared (" << m_stages.size()
2733
+ // << " stages reset), this=" << this << std::endl;
2064
2734
 
2065
2735
  return env.Undefined();
2066
2736
  }
@@ -2156,21 +2826,27 @@ namespace dsp
2156
2826
  Napi::Value DspPipeline::Dispose(const Napi::CallbackInfo &info)
2157
2827
  {
2158
2828
  Napi::Env env = info.Env();
2829
+ // std::cout << "[DEBUG] DspPipeline::Dispose - this=" << this
2830
+ // << ", stages=" << m_stages.size() << ", disposed=" << m_disposed << std::endl;
2159
2831
 
2160
2832
  // Already disposed - silently succeed (idempotent behavior)
2161
2833
  if (m_disposed)
2162
2834
  {
2835
+ // std::cout << "[DEBUG] Dispose - already disposed, this=" << this << std::endl;
2163
2836
  return env.Undefined();
2164
2837
  }
2165
2838
 
2166
2839
  // Cannot dispose while processing is in progress
2167
2840
  if (*m_isBusy)
2168
2841
  {
2842
+ // std::cout << "[DEBUG] Dispose - pipeline busy, cannot dispose, this=" << this << std::endl;
2169
2843
  Napi::Error::New(env, "Cannot dispose pipeline: process() is still running.")
2170
2844
  .ThrowAsJavaScriptException();
2171
2845
  return env.Undefined();
2172
2846
  }
2173
2847
 
2848
+ // std::cout << "[DEBUG] Dispose - clearing " << m_stages.size()
2849
+ // << " stages, this=" << this << std::endl;
2174
2850
  // Clear all stages - triggers RAII cleanup of all stage resources
2175
2851
  // This will:
2176
2852
  // - Free all stage internal buffers
@@ -2179,12 +2855,14 @@ namespace dsp
2179
2855
  // - Free all detachable buffers
2180
2856
  // - Free timestamp and resize buffers
2181
2857
  m_stages.clear();
2858
+ // std::cout << "[DEBUG] Dispose - stages cleared, this=" << this << std::endl;
2182
2859
 
2183
2860
  // Reset busy flag (defensive programming)
2184
2861
  *m_isBusy = false;
2185
2862
 
2186
2863
  // Mark as disposed to prevent further operations
2187
2864
  m_disposed = true;
2865
+ // std::cout << "[DEBUG] Dispose - complete, this=" << this << std::endl;
2188
2866
 
2189
2867
  return env.Undefined();
2190
2868
  }