dspx 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/prebuilds/win32-x64/dspx.node +0 -0
- package/src/native/DspPipeline.cc +729 -51
package/package.json
CHANGED
|
Binary file
|
|
@@ -51,6 +51,30 @@ namespace dsp
|
|
|
51
51
|
#include <cstdlib>
|
|
52
52
|
#include "utils/Toon.h"
|
|
53
53
|
|
|
54
|
+
// SIMD optimizations for timestamp interpolation
|
|
55
|
+
// Priority: AVX2 (8-wide) > SSE (4-wide) > NEON (4-wide) > Scalar
|
|
56
|
+
#if defined(__AVX2__) || (defined(_MSC_VER) && defined(__AVX2__))
|
|
57
|
+
#include <immintrin.h>
|
|
58
|
+
#define HAS_AVX2 1
|
|
59
|
+
#define HAS_SSE 0
|
|
60
|
+
#define HAS_NEON 0
|
|
61
|
+
#elif defined(__SSE__) || defined(__SSE2__) || (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
|
|
62
|
+
#include <emmintrin.h> // SSE2
|
|
63
|
+
#include <xmmintrin.h> // SSE
|
|
64
|
+
#define HAS_AVX2 0
|
|
65
|
+
#define HAS_SSE 1
|
|
66
|
+
#define HAS_NEON 0
|
|
67
|
+
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
|
|
68
|
+
#include <arm_neon.h>
|
|
69
|
+
#define HAS_AVX2 0
|
|
70
|
+
#define HAS_SSE 0
|
|
71
|
+
#define HAS_NEON 1
|
|
72
|
+
#else
|
|
73
|
+
#define HAS_AVX2 0
|
|
74
|
+
#define HAS_SSE 0
|
|
75
|
+
#define HAS_NEON 0
|
|
76
|
+
#endif
|
|
77
|
+
|
|
54
78
|
namespace dsp
|
|
55
79
|
{
|
|
56
80
|
|
|
@@ -84,9 +108,13 @@ namespace dsp
|
|
|
84
108
|
DspPipeline::DspPipeline(const Napi::CallbackInfo &info)
|
|
85
109
|
: Napi::ObjectWrap<DspPipeline>(info)
|
|
86
110
|
{
|
|
111
|
+
// std::cout << "[DEBUG] DspPipeline::Constructor - this=" << this
|
|
112
|
+
// << ", creating pipeline" << std::endl;
|
|
87
113
|
// Initialize the lock
|
|
88
114
|
m_isBusy = std::make_shared<std::atomic<bool>>(false);
|
|
115
|
+
// std::cout << "[DEBUG] DspPipeline::Constructor - m_isBusy=" << m_isBusy.get() << std::endl;
|
|
89
116
|
InitializeStageFactories();
|
|
117
|
+
// std::cout << "[DEBUG] DspPipeline::Constructor - complete, this=" << this << std::endl;
|
|
90
118
|
}
|
|
91
119
|
|
|
92
120
|
/**
|
|
@@ -1146,22 +1174,26 @@ namespace dsp
|
|
|
1146
1174
|
Napi::Value DspPipeline::AddStage(const Napi::CallbackInfo &info)
|
|
1147
1175
|
{
|
|
1148
1176
|
Napi::Env env = info.Env();
|
|
1177
|
+
// std::cout << "[DEBUG] DspPipeline::AddStage - this=" << this << std::endl;
|
|
1149
1178
|
|
|
1150
1179
|
// Check if pipeline is disposed
|
|
1151
1180
|
if (m_disposed)
|
|
1152
1181
|
{
|
|
1182
|
+
// std::cout << "[DEBUG] AddStage - pipeline disposed, this=" << this << std::endl;
|
|
1153
1183
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
1154
1184
|
return env.Undefined();
|
|
1155
1185
|
}
|
|
1156
1186
|
|
|
1157
1187
|
if (*m_isBusy)
|
|
1158
1188
|
{
|
|
1189
|
+
// std::cout << "[DEBUG] AddStage - pipeline busy, this=" << this << std::endl;
|
|
1159
1190
|
Napi::Error::New(env, "Cannot add stage while processing").ThrowAsJavaScriptException();
|
|
1160
1191
|
return env.Undefined();
|
|
1161
1192
|
}
|
|
1162
1193
|
|
|
1163
1194
|
// 1. Get arguments from TypeScript
|
|
1164
1195
|
std::string stageName = info[0].As<Napi::String>();
|
|
1196
|
+
// std::cout << "[DEBUG] AddStage - stageName=" << stageName << ", this=" << this << std::endl;
|
|
1165
1197
|
Napi::Object params = info[1].As<Napi::Object>();
|
|
1166
1198
|
|
|
1167
1199
|
// 2. Look up the stage factory in the map
|
|
@@ -1202,10 +1234,12 @@ namespace dsp
|
|
|
1202
1234
|
Napi::Value DspPipeline::AddFilterStage(const Napi::CallbackInfo &info)
|
|
1203
1235
|
{
|
|
1204
1236
|
Napi::Env env = info.Env();
|
|
1237
|
+
// std::cout << "[DEBUG] DspPipeline::AddFilterStage - this=" << this << std::endl;
|
|
1205
1238
|
|
|
1206
1239
|
// Check if pipeline is disposed
|
|
1207
1240
|
if (m_disposed)
|
|
1208
1241
|
{
|
|
1242
|
+
// std::cout << "[DEBUG] AddFilterStage - pipeline disposed, this=" << this << std::endl;
|
|
1209
1243
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
1210
1244
|
return env.Undefined();
|
|
1211
1245
|
}
|
|
@@ -1249,6 +1283,569 @@ namespace dsp
|
|
|
1249
1283
|
return env.Undefined();
|
|
1250
1284
|
}
|
|
1251
1285
|
|
|
1286
|
+
/**
|
|
1287
|
+
* SIMD-optimized timestamp interpolation for resizing stages
|
|
1288
|
+
* Multi-platform support:
|
|
1289
|
+
* - AVX2 (x86_64): 8-wide vectorization
|
|
1290
|
+
* - SSE2 (x86): 4-wide vectorization
|
|
1291
|
+
* - NEON (ARM): 4-wide vectorization
|
|
1292
|
+
* - Scalar fallback for all other platforms
|
|
1293
|
+
*
|
|
1294
|
+
* @param timestamps Source timestamp array (channel-major layout)
|
|
1295
|
+
* @param prevNumSamples Number of samples in source
|
|
1296
|
+
* @param prevChannels Number of channels in source
|
|
1297
|
+
* @param numOutputSamples Number of samples to generate
|
|
1298
|
+
* @param outputChannels Number of channels in output
|
|
1299
|
+
* @param timeScale Time scaling factor from stage
|
|
1300
|
+
* @param output Output timestamp vector
|
|
1301
|
+
*/
|
|
1302
|
+
inline void interpolateTimestampsSIMD(
|
|
1303
|
+
const float *timestamps,
|
|
1304
|
+
size_t prevNumSamples,
|
|
1305
|
+
int prevChannels,
|
|
1306
|
+
size_t numOutputSamples,
|
|
1307
|
+
int outputChannels,
|
|
1308
|
+
double timeScale,
|
|
1309
|
+
std::vector<float> &output)
|
|
1310
|
+
{
|
|
1311
|
+
#if HAS_AVX2
|
|
1312
|
+
// ========================================
|
|
1313
|
+
// AVX2 Implementation (8-wide)
|
|
1314
|
+
// ========================================
|
|
1315
|
+
// Process 8 output samples at a time with AVX2
|
|
1316
|
+
const size_t simdWidth = 8;
|
|
1317
|
+
const size_t simdIterations = numOutputSamples / simdWidth;
|
|
1318
|
+
const size_t remainder = numOutputSamples % simdWidth;
|
|
1319
|
+
|
|
1320
|
+
// Precompute constants for SIMD
|
|
1321
|
+
const __m256 vTimeScale = _mm256_set1_ps(static_cast<float>(timeScale));
|
|
1322
|
+
const __m256i vPrevChannels = _mm256_set1_epi32(prevChannels);
|
|
1323
|
+
const __m256 vPrevNumSamples = _mm256_set1_ps(static_cast<float>(prevNumSamples));
|
|
1324
|
+
const __m256 vOne = _mm256_set1_ps(1.0f);
|
|
1325
|
+
|
|
1326
|
+
// SIMD loop: Process 8 timestamps at once
|
|
1327
|
+
for (size_t iter = 0; iter < simdIterations; ++iter)
|
|
1328
|
+
{
|
|
1329
|
+
size_t baseIdx = iter * simdWidth;
|
|
1330
|
+
|
|
1331
|
+
// Generate indices: [baseIdx, baseIdx+1, ..., baseIdx+7]
|
|
1332
|
+
__m256 vIdx = _mm256_set_ps(
|
|
1333
|
+
static_cast<float>(baseIdx + 7),
|
|
1334
|
+
static_cast<float>(baseIdx + 6),
|
|
1335
|
+
static_cast<float>(baseIdx + 5),
|
|
1336
|
+
static_cast<float>(baseIdx + 4),
|
|
1337
|
+
static_cast<float>(baseIdx + 3),
|
|
1338
|
+
static_cast<float>(baseIdx + 2),
|
|
1339
|
+
static_cast<float>(baseIdx + 1),
|
|
1340
|
+
static_cast<float>(baseIdx + 0));
|
|
1341
|
+
|
|
1342
|
+
// Calculate input time: i * timeScale
|
|
1343
|
+
__m256 vInputTime = _mm256_mul_ps(vIdx, vTimeScale);
|
|
1344
|
+
|
|
1345
|
+
// Extract integer and fractional parts
|
|
1346
|
+
__m256i vInputIdx = _mm256_cvttps_epi32(vInputTime);
|
|
1347
|
+
__m256 vInputIdxFloat = _mm256_cvtepi32_ps(vInputIdx);
|
|
1348
|
+
__m256 vFrac = _mm256_sub_ps(vInputTime, vInputIdxFloat);
|
|
1349
|
+
|
|
1350
|
+
// Process each of the 8 values (can't easily vectorize the conditional logic)
|
|
1351
|
+
alignas(32) float inputTimes[8];
|
|
1352
|
+
alignas(32) int inputIndices[8];
|
|
1353
|
+
alignas(32) float fracs[8];
|
|
1354
|
+
|
|
1355
|
+
_mm256_store_ps(inputTimes, vInputTime);
|
|
1356
|
+
_mm256_store_si256((__m256i *)inputIndices, vInputIdx);
|
|
1357
|
+
_mm256_store_ps(fracs, vFrac);
|
|
1358
|
+
|
|
1359
|
+
for (size_t j = 0; j < simdWidth; ++j)
|
|
1360
|
+
{
|
|
1361
|
+
size_t i = baseIdx + j;
|
|
1362
|
+
size_t inputIdx = inputIndices[j];
|
|
1363
|
+
float frac = fracs[j];
|
|
1364
|
+
float timestamp;
|
|
1365
|
+
|
|
1366
|
+
if (inputIdx >= prevNumSamples)
|
|
1367
|
+
{
|
|
1368
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1369
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1370
|
+
static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
|
|
1371
|
+
}
|
|
1372
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1373
|
+
{
|
|
1374
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1375
|
+
}
|
|
1376
|
+
else
|
|
1377
|
+
{
|
|
1378
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1379
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1380
|
+
timestamp = t0 + frac * (t1 - t0);
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
// Replicate timestamp across all output channels
|
|
1384
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1385
|
+
{
|
|
1386
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
// Handle remainder samples with scalar code
|
|
1392
|
+
for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
|
|
1393
|
+
{
|
|
1394
|
+
double inputTime = i * timeScale;
|
|
1395
|
+
size_t inputIdx = static_cast<size_t>(inputTime);
|
|
1396
|
+
double frac = inputTime - inputIdx;
|
|
1397
|
+
float timestamp;
|
|
1398
|
+
|
|
1399
|
+
if (inputIdx >= prevNumSamples)
|
|
1400
|
+
{
|
|
1401
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1402
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1403
|
+
static_cast<float>((inputTime - lastIdx) * timeScale);
|
|
1404
|
+
}
|
|
1405
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1406
|
+
{
|
|
1407
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1408
|
+
}
|
|
1409
|
+
else
|
|
1410
|
+
{
|
|
1411
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1412
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1413
|
+
timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1417
|
+
{
|
|
1418
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
#elif HAS_SSE
|
|
1422
|
+
// ========================================
|
|
1423
|
+
// SSE2 Implementation (4-wide)
|
|
1424
|
+
// ========================================
|
|
1425
|
+
const size_t simdWidth = 4;
|
|
1426
|
+
const size_t simdIterations = numOutputSamples / simdWidth;
|
|
1427
|
+
|
|
1428
|
+
const __m128 vTimeScale = _mm_set1_ps(static_cast<float>(timeScale));
|
|
1429
|
+
const __m128 vPrevNumSamples = _mm_set1_ps(static_cast<float>(prevNumSamples));
|
|
1430
|
+
|
|
1431
|
+
for (size_t iter = 0; iter < simdIterations; ++iter)
|
|
1432
|
+
{
|
|
1433
|
+
size_t baseIdx = iter * simdWidth;
|
|
1434
|
+
|
|
1435
|
+
// Generate indices [baseIdx, baseIdx+1, baseIdx+2, baseIdx+3]
|
|
1436
|
+
alignas(16) float indices[4] = {
|
|
1437
|
+
static_cast<float>(baseIdx),
|
|
1438
|
+
static_cast<float>(baseIdx + 1),
|
|
1439
|
+
static_cast<float>(baseIdx + 2),
|
|
1440
|
+
static_cast<float>(baseIdx + 3)};
|
|
1441
|
+
__m128 vIndices = _mm_load_ps(indices);
|
|
1442
|
+
__m128 vInputTime = _mm_mul_ps(vIndices, vTimeScale);
|
|
1443
|
+
|
|
1444
|
+
// Convert to int and back to get integer part
|
|
1445
|
+
__m128i vInputIdx = _mm_cvttps_epi32(vInputTime);
|
|
1446
|
+
__m128 vInputIdxFloat = _mm_cvtepi32_ps(vInputIdx);
|
|
1447
|
+
__m128 vFrac = _mm_sub_ps(vInputTime, vInputIdxFloat);
|
|
1448
|
+
|
|
1449
|
+
// Store for scalar processing
|
|
1450
|
+
alignas(16) float inputTimes[4];
|
|
1451
|
+
_mm_store_ps(inputTimes, vInputTime);
|
|
1452
|
+
alignas(16) int inputIndices[4];
|
|
1453
|
+
_mm_store_si128(reinterpret_cast<__m128i *>(inputIndices), vInputIdx);
|
|
1454
|
+
alignas(16) float fractions[4];
|
|
1455
|
+
_mm_store_ps(fractions, vFrac);
|
|
1456
|
+
|
|
1457
|
+
// Process each sample
|
|
1458
|
+
for (size_t j = 0; j < simdWidth; ++j)
|
|
1459
|
+
{
|
|
1460
|
+
size_t i = baseIdx + j;
|
|
1461
|
+
size_t inputIdx = inputIndices[j];
|
|
1462
|
+
double frac = fractions[j];
|
|
1463
|
+
float timestamp;
|
|
1464
|
+
|
|
1465
|
+
if (inputIdx >= prevNumSamples)
|
|
1466
|
+
{
|
|
1467
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1468
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1469
|
+
static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
|
|
1470
|
+
}
|
|
1471
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1472
|
+
{
|
|
1473
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1474
|
+
}
|
|
1475
|
+
else
|
|
1476
|
+
{
|
|
1477
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1478
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1479
|
+
timestamp = t0 + frac * (t1 - t0);
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1483
|
+
{
|
|
1484
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
// Handle remainder
|
|
1490
|
+
for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
|
|
1491
|
+
{
|
|
1492
|
+
double inputTime = i * timeScale;
|
|
1493
|
+
size_t inputIdx = static_cast<size_t>(inputTime);
|
|
1494
|
+
double frac = inputTime - inputIdx;
|
|
1495
|
+
float timestamp;
|
|
1496
|
+
|
|
1497
|
+
if (inputIdx >= prevNumSamples)
|
|
1498
|
+
{
|
|
1499
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1500
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1501
|
+
static_cast<float>((inputTime - lastIdx) * timeScale);
|
|
1502
|
+
}
|
|
1503
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1504
|
+
{
|
|
1505
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1506
|
+
}
|
|
1507
|
+
else
|
|
1508
|
+
{
|
|
1509
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1510
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1511
|
+
timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1515
|
+
{
|
|
1516
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1517
|
+
}
|
|
1518
|
+
}
|
|
1519
|
+
#elif HAS_NEON
|
|
1520
|
+
// ========================================
|
|
1521
|
+
// ARM NEON Implementation (4-wide)
|
|
1522
|
+
// ========================================
|
|
1523
|
+
const size_t simdWidth = 4;
|
|
1524
|
+
const size_t simdIterations = numOutputSamples / simdWidth;
|
|
1525
|
+
|
|
1526
|
+
const float32x4_t vTimeScale = vdupq_n_f32(static_cast<float>(timeScale));
|
|
1527
|
+
const float32x4_t vPrevNumSamples = vdupq_n_f32(static_cast<float>(prevNumSamples));
|
|
1528
|
+
|
|
1529
|
+
for (size_t iter = 0; iter < simdIterations; ++iter)
|
|
1530
|
+
{
|
|
1531
|
+
size_t baseIdx = iter * simdWidth;
|
|
1532
|
+
|
|
1533
|
+
// Generate indices
|
|
1534
|
+
alignas(16) float indices[4] = {
|
|
1535
|
+
static_cast<float>(baseIdx),
|
|
1536
|
+
static_cast<float>(baseIdx + 1),
|
|
1537
|
+
static_cast<float>(baseIdx + 2),
|
|
1538
|
+
static_cast<float>(baseIdx + 3)};
|
|
1539
|
+
float32x4_t vIndices = vld1q_f32(indices);
|
|
1540
|
+
float32x4_t vInputTime = vmulq_f32(vIndices, vTimeScale);
|
|
1541
|
+
|
|
1542
|
+
// Extract integer and fractional parts
|
|
1543
|
+
int32x4_t vInputIdx = vcvtq_s32_f32(vInputTime);
|
|
1544
|
+
float32x4_t vInputIdxFloat = vcvtq_f32_s32(vInputIdx);
|
|
1545
|
+
float32x4_t vFrac = vsubq_f32(vInputTime, vInputIdxFloat);
|
|
1546
|
+
|
|
1547
|
+
// Store for processing
|
|
1548
|
+
alignas(16) float inputTimes[4];
|
|
1549
|
+
vst1q_f32(inputTimes, vInputTime);
|
|
1550
|
+
alignas(16) int inputIndices[4];
|
|
1551
|
+
vst1q_s32(inputIndices, vInputIdx);
|
|
1552
|
+
alignas(16) float fractions[4];
|
|
1553
|
+
vst1q_f32(fractions, vFrac);
|
|
1554
|
+
|
|
1555
|
+
// Process each sample
|
|
1556
|
+
for (size_t j = 0; j < simdWidth; ++j)
|
|
1557
|
+
{
|
|
1558
|
+
size_t i = baseIdx + j;
|
|
1559
|
+
size_t inputIdx = inputIndices[j];
|
|
1560
|
+
double frac = fractions[j];
|
|
1561
|
+
float timestamp;
|
|
1562
|
+
|
|
1563
|
+
if (inputIdx >= prevNumSamples)
|
|
1564
|
+
{
|
|
1565
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1566
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1567
|
+
static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
|
|
1568
|
+
}
|
|
1569
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1570
|
+
{
|
|
1571
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1572
|
+
}
|
|
1573
|
+
else
|
|
1574
|
+
{
|
|
1575
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1576
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1577
|
+
timestamp = t0 + frac * (t1 - t0);
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1581
|
+
{
|
|
1582
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
// Handle remainder
|
|
1588
|
+
for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
|
|
1589
|
+
{
|
|
1590
|
+
double inputTime = i * timeScale;
|
|
1591
|
+
size_t inputIdx = static_cast<size_t>(inputTime);
|
|
1592
|
+
double frac = inputTime - inputIdx;
|
|
1593
|
+
float timestamp;
|
|
1594
|
+
|
|
1595
|
+
if (inputIdx >= prevNumSamples)
|
|
1596
|
+
{
|
|
1597
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1598
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1599
|
+
static_cast<float>((inputTime - lastIdx) * timeScale);
|
|
1600
|
+
}
|
|
1601
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1602
|
+
{
|
|
1603
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1604
|
+
}
|
|
1605
|
+
else
|
|
1606
|
+
{
|
|
1607
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1608
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1609
|
+
timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1613
|
+
{
|
|
1614
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1615
|
+
}
|
|
1616
|
+
}
|
|
1617
|
+
#elif HAS_SSE
|
|
1618
|
+
// ========================================
|
|
1619
|
+
// SSE2 Implementation (4-wide)
|
|
1620
|
+
// ========================================
|
|
1621
|
+
const size_t simdWidth = 4;
|
|
1622
|
+
const size_t simdIterations = numOutputSamples / simdWidth;
|
|
1623
|
+
|
|
1624
|
+
const __m128 vTimeScale = _mm_set1_ps(static_cast<float>(timeScale));
|
|
1625
|
+
const __m128 vPrevNumSamples = _mm_set1_ps(static_cast<float>(prevNumSamples));
|
|
1626
|
+
|
|
1627
|
+
for (size_t iter = 0; iter < simdIterations; ++iter)
|
|
1628
|
+
{
|
|
1629
|
+
size_t baseIdx = iter * simdWidth;
|
|
1630
|
+
|
|
1631
|
+
// Generate indices [baseIdx, baseIdx+1, baseIdx+2, baseIdx+3]
|
|
1632
|
+
alignas(16) float indices[4] = {
|
|
1633
|
+
static_cast<float>(baseIdx),
|
|
1634
|
+
static_cast<float>(baseIdx + 1),
|
|
1635
|
+
static_cast<float>(baseIdx + 2),
|
|
1636
|
+
static_cast<float>(baseIdx + 3)};
|
|
1637
|
+
__m128 vIndices = _mm_load_ps(indices);
|
|
1638
|
+
__m128 vInputTime = _mm_mul_ps(vIndices, vTimeScale);
|
|
1639
|
+
|
|
1640
|
+
// Convert to int and back to get integer part
|
|
1641
|
+
__m128i vInputIdx = _mm_cvttps_epi32(vInputTime);
|
|
1642
|
+
__m128 vInputIdxFloat = _mm_cvtepi32_ps(vInputIdx);
|
|
1643
|
+
__m128 vFrac = _mm_sub_ps(vInputTime, vInputIdxFloat);
|
|
1644
|
+
|
|
1645
|
+
// Store for scalar processing
|
|
1646
|
+
alignas(16) float inputTimes[4];
|
|
1647
|
+
_mm_store_ps(inputTimes, vInputTime);
|
|
1648
|
+
alignas(16) int inputIndices[4];
|
|
1649
|
+
_mm_store_si128(reinterpret_cast<__m128i *>(inputIndices), vInputIdx);
|
|
1650
|
+
alignas(16) float fractions[4];
|
|
1651
|
+
_mm_store_ps(fractions, vFrac);
|
|
1652
|
+
|
|
1653
|
+
// Process each sample
|
|
1654
|
+
for (size_t j = 0; j < simdWidth; ++j)
|
|
1655
|
+
{
|
|
1656
|
+
size_t i = baseIdx + j;
|
|
1657
|
+
size_t inputIdx = inputIndices[j];
|
|
1658
|
+
double frac = fractions[j];
|
|
1659
|
+
float timestamp;
|
|
1660
|
+
|
|
1661
|
+
if (inputIdx >= prevNumSamples)
|
|
1662
|
+
{
|
|
1663
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1664
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1665
|
+
static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
|
|
1666
|
+
}
|
|
1667
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1668
|
+
{
|
|
1669
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1670
|
+
}
|
|
1671
|
+
else
|
|
1672
|
+
{
|
|
1673
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1674
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1675
|
+
timestamp = t0 + frac * (t1 - t0);
|
|
1676
|
+
}
|
|
1677
|
+
|
|
1678
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1679
|
+
{
|
|
1680
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
// Handle remainder
|
|
1686
|
+
for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
|
|
1687
|
+
{
|
|
1688
|
+
double inputTime = i * timeScale;
|
|
1689
|
+
size_t inputIdx = static_cast<size_t>(inputTime);
|
|
1690
|
+
double frac = inputTime - inputIdx;
|
|
1691
|
+
float timestamp;
|
|
1692
|
+
|
|
1693
|
+
if (inputIdx >= prevNumSamples)
|
|
1694
|
+
{
|
|
1695
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1696
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1697
|
+
static_cast<float>((inputTime - lastIdx) * timeScale);
|
|
1698
|
+
}
|
|
1699
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1700
|
+
{
|
|
1701
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1702
|
+
}
|
|
1703
|
+
else
|
|
1704
|
+
{
|
|
1705
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1706
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1707
|
+
timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1711
|
+
{
|
|
1712
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1713
|
+
}
|
|
1714
|
+
}
|
|
1715
|
+
#elif HAS_NEON
|
|
1716
|
+
// ========================================
|
|
1717
|
+
// ARM NEON Implementation (4-wide)
|
|
1718
|
+
// ========================================
|
|
1719
|
+
const size_t simdWidth = 4;
|
|
1720
|
+
const size_t simdIterations = numOutputSamples / simdWidth;
|
|
1721
|
+
|
|
1722
|
+
const float32x4_t vTimeScale = vdupq_n_f32(static_cast<float>(timeScale));
|
|
1723
|
+
const float32x4_t vPrevNumSamples = vdupq_n_f32(static_cast<float>(prevNumSamples));
|
|
1724
|
+
|
|
1725
|
+
for (size_t iter = 0; iter < simdIterations; ++iter)
|
|
1726
|
+
{
|
|
1727
|
+
size_t baseIdx = iter * simdWidth;
|
|
1728
|
+
|
|
1729
|
+
// Generate indices
|
|
1730
|
+
alignas(16) float indices[4] = {
|
|
1731
|
+
static_cast<float>(baseIdx),
|
|
1732
|
+
static_cast<float>(baseIdx + 1),
|
|
1733
|
+
static_cast<float>(baseIdx + 2),
|
|
1734
|
+
static_cast<float>(baseIdx + 3)};
|
|
1735
|
+
float32x4_t vIndices = vld1q_f32(indices);
|
|
1736
|
+
float32x4_t vInputTime = vmulq_f32(vIndices, vTimeScale);
|
|
1737
|
+
|
|
1738
|
+
// Extract integer and fractional parts
|
|
1739
|
+
int32x4_t vInputIdx = vcvtq_s32_f32(vInputTime);
|
|
1740
|
+
float32x4_t vInputIdxFloat = vcvtq_f32_s32(vInputIdx);
|
|
1741
|
+
float32x4_t vFrac = vsubq_f32(vInputTime, vInputIdxFloat);
|
|
1742
|
+
|
|
1743
|
+
// Store for processing
|
|
1744
|
+
alignas(16) float inputTimes[4];
|
|
1745
|
+
vst1q_f32(inputTimes, vInputTime);
|
|
1746
|
+
alignas(16) int inputIndices[4];
|
|
1747
|
+
vst1q_s32(inputIndices, vInputIdx);
|
|
1748
|
+
alignas(16) float fractions[4];
|
|
1749
|
+
vst1q_f32(fractions, vFrac);
|
|
1750
|
+
|
|
1751
|
+
// Process each sample
|
|
1752
|
+
for (size_t j = 0; j < simdWidth; ++j)
|
|
1753
|
+
{
|
|
1754
|
+
size_t i = baseIdx + j;
|
|
1755
|
+
size_t inputIdx = inputIndices[j];
|
|
1756
|
+
double frac = fractions[j];
|
|
1757
|
+
float timestamp;
|
|
1758
|
+
|
|
1759
|
+
if (inputIdx >= prevNumSamples)
|
|
1760
|
+
{
|
|
1761
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1762
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1763
|
+
static_cast<float>((inputTimes[j] - lastIdx) * timeScale);
|
|
1764
|
+
}
|
|
1765
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1766
|
+
{
|
|
1767
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1768
|
+
}
|
|
1769
|
+
else
|
|
1770
|
+
{
|
|
1771
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1772
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1773
|
+
timestamp = t0 + frac * (t1 - t0);
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1777
|
+
{
|
|
1778
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
|
|
1783
|
+
// Handle remainder
|
|
1784
|
+
for (size_t i = simdIterations * simdWidth; i < numOutputSamples; ++i)
|
|
1785
|
+
{
|
|
1786
|
+
double inputTime = i * timeScale;
|
|
1787
|
+
size_t inputIdx = static_cast<size_t>(inputTime);
|
|
1788
|
+
double frac = inputTime - inputIdx;
|
|
1789
|
+
float timestamp;
|
|
1790
|
+
|
|
1791
|
+
if (inputIdx >= prevNumSamples)
|
|
1792
|
+
{
|
|
1793
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1794
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1795
|
+
static_cast<float>((inputTime - lastIdx) * timeScale);
|
|
1796
|
+
}
|
|
1797
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1798
|
+
{
|
|
1799
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1800
|
+
}
|
|
1801
|
+
else
|
|
1802
|
+
{
|
|
1803
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1804
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1805
|
+
timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1809
|
+
{
|
|
1810
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1811
|
+
}
|
|
1812
|
+
}
|
|
1813
|
+
#else
|
|
1814
|
+
// ========================================
|
|
1815
|
+
// Scalar Fallback (universal)
|
|
1816
|
+
// ========================================
|
|
1817
|
+
for (size_t i = 0; i < numOutputSamples; ++i)
|
|
1818
|
+
{
|
|
1819
|
+
double inputTime = i * timeScale;
|
|
1820
|
+
size_t inputIdx = static_cast<size_t>(inputTime);
|
|
1821
|
+
double frac = inputTime - inputIdx;
|
|
1822
|
+
float timestamp;
|
|
1823
|
+
|
|
1824
|
+
if (inputIdx >= prevNumSamples)
|
|
1825
|
+
{
|
|
1826
|
+
size_t lastIdx = prevNumSamples - 1;
|
|
1827
|
+
timestamp = timestamps[lastIdx * prevChannels] +
|
|
1828
|
+
static_cast<float>((inputTime - lastIdx) * timeScale);
|
|
1829
|
+
}
|
|
1830
|
+
else if (inputIdx + 1 >= prevNumSamples)
|
|
1831
|
+
{
|
|
1832
|
+
timestamp = timestamps[inputIdx * prevChannels];
|
|
1833
|
+
}
|
|
1834
|
+
else
|
|
1835
|
+
{
|
|
1836
|
+
float t0 = timestamps[inputIdx * prevChannels];
|
|
1837
|
+
float t1 = timestamps[(inputIdx + 1) * prevChannels];
|
|
1838
|
+
timestamp = t0 + static_cast<float>(frac) * (t1 - t0);
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1841
|
+
for (int ch = 0; ch < outputChannels; ++ch)
|
|
1842
|
+
{
|
|
1843
|
+
output[i * outputChannels + ch] = timestamp;
|
|
1844
|
+
}
|
|
1845
|
+
}
|
|
1846
|
+
#endif
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1252
1849
|
/**
|
|
1253
1850
|
* AsyncWorker for processing DSP pipeline in background thread
|
|
1254
1851
|
*/
|
|
@@ -1278,34 +1875,46 @@ namespace dsp
|
|
|
1278
1875
|
m_timestampRef(std::move(timestampRef)),
|
|
1279
1876
|
m_busyLock(busyLock)
|
|
1280
1877
|
{
|
|
1878
|
+
// std::cout << "[DEBUG] ProcessWorker::ProcessWorker - this=" << this << std::endl;
|
|
1879
|
+
m_stageCount = m_stages.size();
|
|
1880
|
+
m_stageTypes.reserve(m_stageCount);
|
|
1881
|
+
for (const auto &stage : m_stages)
|
|
1882
|
+
{
|
|
1883
|
+
m_stageTypes.push_back(stage->getType());
|
|
1884
|
+
}
|
|
1281
1885
|
}
|
|
1282
1886
|
|
|
1283
1887
|
protected:
|
|
1284
1888
|
// This runs on a worker thread (not blocking the event loop)
|
|
1285
1889
|
void Execute() override
|
|
1286
1890
|
{
|
|
1287
|
-
//
|
|
1891
|
+
// std::cout << "[DEBUG] ProcessWorker::Execute - START, this=" << this
|
|
1892
|
+
// << ", data=" << m_data << ", numSamples=" << m_numSamples
|
|
1893
|
+
// << ", channels=" << m_channels << std::endl;
|
|
1894
|
+
// std::cout << "[WORKER-" << std::this_thread::get_id() << "] Execute START (stages="
|
|
1895
|
+
// << m_stages.size() << ")" << std::endl;
|
|
1896
|
+
|
|
1897
|
+
// CRITICAL FIX: Use a unique_ptr for timestamp ownership
|
|
1288
1898
|
std::vector<float> generatedTimestamps;
|
|
1899
|
+
std::unique_ptr<std::vector<float>> allocatedTimestamps;
|
|
1289
1900
|
|
|
1290
1901
|
try
|
|
1291
1902
|
{
|
|
1292
|
-
// 1. Generate Timestamps if missing
|
|
1903
|
+
// 1. Generate Timestamps if missing
|
|
1293
1904
|
if (m_timestamps == nullptr)
|
|
1294
1905
|
{
|
|
1295
|
-
|
|
1906
|
+
// std::cout << "[DEBUG] Execute - generating timestamps, sampleRate=" << m_sampleRate << std::endl;
|
|
1296
1907
|
|
|
1297
|
-
|
|
1298
|
-
// If sampleRate is 0 or invalid, default to 1.0 (treating indices as time)
|
|
1908
|
+
generatedTimestamps.resize(m_numSamples);
|
|
1299
1909
|
double dt = (m_sampleRate > 0.0) ? (1000.0 / m_sampleRate) : 1.0;
|
|
1300
1910
|
|
|
1301
|
-
// Fill timestamps linearly: t[i] = i * dt
|
|
1302
1911
|
for (size_t i = 0; i < m_numSamples; ++i)
|
|
1303
1912
|
{
|
|
1304
1913
|
generatedTimestamps[i] = static_cast<float>(i * dt);
|
|
1305
1914
|
}
|
|
1306
1915
|
|
|
1307
|
-
// Point the main processing pointer to our locally generated data
|
|
1308
1916
|
m_timestamps = generatedTimestamps.data();
|
|
1917
|
+
// std::cout << "[DEBUG] Execute - timestamps generated, addr=" << m_timestamps << std::endl;
|
|
1309
1918
|
}
|
|
1310
1919
|
|
|
1311
1920
|
// 2. Process the buffer through all stages
|
|
@@ -1315,79 +1924,105 @@ namespace dsp
|
|
|
1315
1924
|
bool usingTempBuffer = false;
|
|
1316
1925
|
|
|
1317
1926
|
const bool debugStageDumps = std::getenv("DSPX_DEBUG_STAGE_DUMPS") != nullptr;
|
|
1318
|
-
|
|
1927
|
+
|
|
1928
|
+
// std::cout << "[DEBUG] Execute - processing through " << m_stages.size() << " stages" << std::endl;
|
|
1929
|
+
for (size_t stageIdx = 0; stageIdx < m_stages.size(); ++stageIdx)
|
|
1319
1930
|
{
|
|
1931
|
+
const auto &stage = m_stages[stageIdx];
|
|
1932
|
+
|
|
1933
|
+
// std::cout << "[DEBUG] Execute - stage " << stageIdx << ", type="
|
|
1934
|
+
// << stage->getType() << ", addr=" << stage.get()
|
|
1935
|
+
// << ", isResizing=" << stage->isResizing() << std::endl;
|
|
1936
|
+
|
|
1320
1937
|
if (stage->isResizing())
|
|
1321
1938
|
{
|
|
1322
|
-
//
|
|
1939
|
+
// Calculate output size
|
|
1323
1940
|
size_t outputSize = stage->calculateOutputSize(currentSize);
|
|
1324
1941
|
float *outputBuffer = new float[outputSize];
|
|
1325
1942
|
|
|
1943
|
+
// std::cout << "[DEBUG] Execute - allocated output buffer, size=" << outputSize
|
|
1944
|
+
// << ", addr=" << outputBuffer << std::endl;
|
|
1945
|
+
|
|
1946
|
+
// CRITICAL: Save the PREVIOUS size before processResizing updates currentSize
|
|
1947
|
+
size_t prevSize = currentSize;
|
|
1948
|
+
|
|
1326
1949
|
size_t actualOutputSize = 0;
|
|
1327
1950
|
stage->processResizing(currentBuffer, currentSize,
|
|
1328
1951
|
outputBuffer, actualOutputSize,
|
|
1329
1952
|
m_channels, m_timestamps);
|
|
1330
1953
|
|
|
1331
|
-
|
|
1332
|
-
|
|
1954
|
+
// std::cout << "[DEBUG] Execute - stage " << stageIdx << " resized: "
|
|
1955
|
+
// << prevSize << " -> " << actualOutputSize // Use prevSize!
|
|
1956
|
+
// << ", buffer=" << outputBuffer << std::endl;
|
|
1957
|
+
|
|
1958
|
+
// Free previous temp buffer if we owned it
|
|
1959
|
+
if (usingTempBuffer && tempBuffer != nullptr)
|
|
1960
|
+
{
|
|
1961
|
+
// std::cout << "[DEBUG] Execute - freeing previous temp buffer=" << tempBuffer << std::endl;
|
|
1962
|
+
delete[] tempBuffer;
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
// Update buffer tracking
|
|
1966
|
+
tempBuffer = outputBuffer;
|
|
1333
1967
|
currentBuffer = outputBuffer;
|
|
1334
1968
|
currentSize = actualOutputSize;
|
|
1335
1969
|
usingTempBuffer = true;
|
|
1336
1970
|
|
|
1971
|
+
// Save previous channel count BEFORE updating
|
|
1972
|
+
int prevChannels = m_channels;
|
|
1973
|
+
|
|
1974
|
+
// Update channel count if stage changed it
|
|
1337
1975
|
int outputChannels = stage->getOutputChannels();
|
|
1338
1976
|
if (outputChannels > 0)
|
|
1977
|
+
{
|
|
1978
|
+
// std::cout << "[DEBUG] Execute - channels changed: " << m_channels
|
|
1979
|
+
// << " -> " << outputChannels << std::endl;
|
|
1339
1980
|
m_channels = outputChannels;
|
|
1981
|
+
}
|
|
1340
1982
|
|
|
1341
|
-
// Re-interpolate timestamps if needed
|
|
1983
|
+
// Re-interpolate timestamps if needed
|
|
1342
1984
|
if (m_timestamps != nullptr)
|
|
1343
1985
|
{
|
|
1986
|
+
// std::cout << "[DEBUG] Execute - reinterpolating timestamps" << std::endl;
|
|
1987
|
+
|
|
1344
1988
|
double timeScale = stage->getTimeScaleFactor();
|
|
1345
1989
|
size_t numOutputSamples = actualOutputSize / m_channels;
|
|
1346
|
-
float *newTimestamps = new float[actualOutputSize];
|
|
1347
1990
|
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
}
|
|
1371
|
-
|
|
1372
|
-
for (int ch = 0; ch < m_channels; ++ch)
|
|
1373
|
-
{
|
|
1374
|
-
newTimestamps[i * m_channels + ch] = timestamp;
|
|
1375
|
-
}
|
|
1376
|
-
}
|
|
1377
|
-
m_timestamps = newTimestamps;
|
|
1378
|
-
m_timestampBuffer.reset(newTimestamps);
|
|
1991
|
+
// CRITICAL FIX: Use prevSize and prevChannels!
|
|
1992
|
+
size_t prevNumSamples = prevSize / prevChannels;
|
|
1993
|
+
|
|
1994
|
+
// Create new timestamp vector
|
|
1995
|
+
auto newTimestamps = std::make_unique<std::vector<float>>(actualOutputSize);
|
|
1996
|
+
|
|
1997
|
+
// Use SIMD-optimized interpolation
|
|
1998
|
+
interpolateTimestampsSIMD(
|
|
1999
|
+
m_timestamps,
|
|
2000
|
+
prevNumSamples,
|
|
2001
|
+
prevChannels,
|
|
2002
|
+
numOutputSamples,
|
|
2003
|
+
m_channels,
|
|
2004
|
+
timeScale,
|
|
2005
|
+
*newTimestamps);
|
|
2006
|
+
|
|
2007
|
+
// CRITICAL FIX: Transfer ownership safely
|
|
2008
|
+
allocatedTimestamps = std::move(newTimestamps);
|
|
2009
|
+
m_timestamps = allocatedTimestamps->data();
|
|
2010
|
+
|
|
2011
|
+
// std::cout << "[DEBUG] Execute - timestamps reinterpolated (SIMD), new addr="
|
|
2012
|
+
// << m_timestamps << std::endl;
|
|
1379
2013
|
}
|
|
1380
2014
|
}
|
|
1381
2015
|
else
|
|
1382
2016
|
{
|
|
1383
2017
|
// In-place processing
|
|
2018
|
+
// std::cout << "[DEBUG] Execute - stage " << stageIdx << " in-place processing" << std::endl;
|
|
1384
2019
|
stage->process(currentBuffer, currentSize, m_channels, m_timestamps);
|
|
1385
2020
|
|
|
1386
2021
|
if (debugStageDumps)
|
|
1387
2022
|
{
|
|
1388
2023
|
const char *stype = stage->getType();
|
|
1389
2024
|
size_t toShow = std::min<size_t>(8, currentSize);
|
|
1390
|
-
std::cout << "[DUMP] after '" << stype << "':";
|
|
2025
|
+
// std::cout << "[DUMP] after '" << stype << "':";
|
|
1391
2026
|
for (size_t i = 0; i < toShow; ++i)
|
|
1392
2027
|
{
|
|
1393
2028
|
std::cout << (i == 0 ? ' ' : ',') << currentBuffer[i];
|
|
@@ -1400,16 +2035,22 @@ namespace dsp
|
|
|
1400
2035
|
m_finalBuffer = currentBuffer;
|
|
1401
2036
|
m_finalSize = currentSize;
|
|
1402
2037
|
m_ownsBuffer = usingTempBuffer;
|
|
2038
|
+
|
|
2039
|
+
// std::cout << "[DEBUG] Execute - COMPLETE, finalBuffer=" << m_finalBuffer
|
|
2040
|
+
// << ", finalSize=" << m_finalSize << ", ownsBuffer=" << m_ownsBuffer << std::endl;
|
|
1403
2041
|
}
|
|
1404
2042
|
catch (const std::exception &e)
|
|
1405
2043
|
{
|
|
2044
|
+
// std::cout << "[DEBUG] Execute - EXCEPTION: " << e.what() << ", this=" << this << std::endl;
|
|
2045
|
+
// std::cout << "[WORKER-" << std::this_thread::get_id() << "] EXCEPTION: " << e.what() << std::endl;
|
|
1406
2046
|
SetError(e.what());
|
|
1407
2047
|
}
|
|
1408
|
-
}
|
|
2048
|
+
} // This runs on the main thread after Execute() completes
|
|
1409
2049
|
|
|
1410
|
-
// This runs on the main thread after Execute() completes
|
|
1411
2050
|
void OnOK() override
|
|
1412
2051
|
{
|
|
2052
|
+
// std::cout << "[DEBUG] ProcessWorker::OnOK - START, this=" << this
|
|
2053
|
+
// << ", finalBuffer=" << (void *)m_finalBuffer << ", finalSize=" << m_finalSize << std::endl;
|
|
1413
2054
|
*m_busyLock = false; // unlock the pipeline
|
|
1414
2055
|
|
|
1415
2056
|
Napi::Env env = Env();
|
|
@@ -1423,22 +2064,29 @@ namespace dsp
|
|
|
1423
2064
|
// Clean up temporary buffer if we allocated one
|
|
1424
2065
|
if (m_ownsBuffer)
|
|
1425
2066
|
{
|
|
2067
|
+
// std::cout << "[DEBUG] OnOK - deleting temp buffer=" << (void *)m_finalBuffer << std::endl;
|
|
1426
2068
|
delete[] m_finalBuffer;
|
|
1427
2069
|
}
|
|
1428
2070
|
|
|
2071
|
+
// std::cout << "[DEBUG] OnOK - COMPLETE, resolving promise, this=" << this << std::endl;
|
|
1429
2072
|
// Resolve the promise with the processed buffer
|
|
1430
2073
|
m_deferred.Resolve(outputArray);
|
|
1431
2074
|
}
|
|
1432
2075
|
|
|
1433
2076
|
void OnError(const Napi::Error &error) override
|
|
1434
2077
|
{
|
|
2078
|
+
// std::cout << "[DEBUG] ProcessWorker::OnError - this=" << this
|
|
2079
|
+
// << ", error=" << error.Message() << std::endl;
|
|
1435
2080
|
m_deferred.Reject(error.Value());
|
|
1436
2081
|
*m_busyLock = false; // unlock the pipeline
|
|
2082
|
+
// std::cout << "[DEBUG] OnError - COMPLETE, this=" << this << std::endl;
|
|
1437
2083
|
}
|
|
1438
2084
|
|
|
1439
2085
|
private:
|
|
1440
2086
|
Napi::Promise::Deferred m_deferred;
|
|
1441
2087
|
std::vector<std::unique_ptr<IDspStage>> &m_stages;
|
|
2088
|
+
size_t m_stageCount;
|
|
2089
|
+
std::vector<std::string> m_stageTypes;
|
|
1442
2090
|
float *m_data;
|
|
1443
2091
|
float *m_timestamps;
|
|
1444
2092
|
double m_sampleRate;
|
|
@@ -1469,16 +2117,19 @@ namespace dsp
|
|
|
1469
2117
|
Napi::Value DspPipeline::ProcessAsync(const Napi::CallbackInfo &info)
|
|
1470
2118
|
{
|
|
1471
2119
|
Napi::Env env = info.Env();
|
|
2120
|
+
// std::cout << "[DEBUG] DspPipeline::ProcessAsync - this=" << this << std::endl;
|
|
1472
2121
|
|
|
1473
2122
|
// Check if pipeline is disposed
|
|
1474
2123
|
if (m_disposed)
|
|
1475
2124
|
{
|
|
2125
|
+
// std::cout << "[DEBUG] ProcessAsync - pipeline disposed, this=" << this << std::endl;
|
|
1476
2126
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
1477
2127
|
return env.Undefined();
|
|
1478
2128
|
}
|
|
1479
2129
|
|
|
1480
2130
|
if (*m_isBusy)
|
|
1481
2131
|
{
|
|
2132
|
+
// std::cout << "[DEBUG] ProcessAsync - pipeline busy, this=" << this << std::endl;
|
|
1482
2133
|
Napi::Error::New(env, "Pipeline is busy: Cannot call process() while another operation is running.").ThrowAsJavaScriptException();
|
|
1483
2134
|
return env.Undefined();
|
|
1484
2135
|
}
|
|
@@ -1547,8 +2198,13 @@ namespace dsp
|
|
|
1547
2198
|
}
|
|
1548
2199
|
|
|
1549
2200
|
*m_isBusy = true; // lock the pipeline
|
|
2201
|
+
// std::cout << "[DEBUG] ProcessAsync - creating worker, data=" << (void *)data
|
|
2202
|
+
// << ", numSamples=" << numSamples << ", channels=" << channels
|
|
2203
|
+
// << ", this=" << this << std::endl;
|
|
1550
2204
|
|
|
1551
2205
|
ProcessWorker *worker = new ProcessWorker(env, std::move(deferred), m_stages, data, timestamps, sampleRate, numSamples, channels, std::move(bufferRef), std::move(timestampRef), m_isBusy);
|
|
2206
|
+
// std::cout << "[DEBUG] ProcessAsync - queuing worker=" << (void *)worker
|
|
2207
|
+
// << ", this=" << this << std::endl;
|
|
1552
2208
|
worker->Queue();
|
|
1553
2209
|
|
|
1554
2210
|
return promise;
|
|
@@ -1566,16 +2222,19 @@ namespace dsp
|
|
|
1566
2222
|
Napi::Value DspPipeline::ProcessSync(const Napi::CallbackInfo &info)
|
|
1567
2223
|
{
|
|
1568
2224
|
Napi::Env env = info.Env();
|
|
2225
|
+
// std::cout << "[DEBUG] DspPipeline::ProcessSync - this=" << this << std::endl;
|
|
1569
2226
|
|
|
1570
2227
|
// Check if pipeline is disposed
|
|
1571
2228
|
if (m_disposed)
|
|
1572
2229
|
{
|
|
2230
|
+
// std::cout << "[DEBUG] ProcessSync - pipeline disposed, this=" << this << std::endl;
|
|
1573
2231
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
1574
2232
|
return env.Undefined();
|
|
1575
2233
|
}
|
|
1576
2234
|
|
|
1577
2235
|
if (*m_isBusy)
|
|
1578
2236
|
{
|
|
2237
|
+
// std::cout << "[DEBUG] ProcessSync - pipeline busy, this=" << this << std::endl;
|
|
1579
2238
|
Napi::Error::New(env, "Pipeline is busy: Cannot call processSync() while an async operation is running.").ThrowAsJavaScriptException();
|
|
1580
2239
|
return env.Undefined();
|
|
1581
2240
|
}
|
|
@@ -1702,10 +2361,13 @@ namespace dsp
|
|
|
1702
2361
|
Napi::Value DspPipeline::SaveState(const Napi::CallbackInfo &info)
|
|
1703
2362
|
{
|
|
1704
2363
|
Napi::Env env = info.Env();
|
|
2364
|
+
// std::cout << "[DEBUG] DspPipeline::SaveState - this=" << this
|
|
2365
|
+
// << ", stages=" << m_stages.size() << std::endl;
|
|
1705
2366
|
|
|
1706
2367
|
// Check if pipeline is disposed
|
|
1707
2368
|
if (m_disposed)
|
|
1708
2369
|
{
|
|
2370
|
+
// std::cout << "[DEBUG] SaveState - pipeline disposed, this=" << this << std::endl;
|
|
1709
2371
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
1710
2372
|
return env.Undefined();
|
|
1711
2373
|
}
|
|
@@ -1799,10 +2461,12 @@ namespace dsp
|
|
|
1799
2461
|
Napi::Value DspPipeline::LoadState(const Napi::CallbackInfo &info)
|
|
1800
2462
|
{
|
|
1801
2463
|
Napi::Env env = info.Env();
|
|
1802
|
-
|
|
2464
|
+
// std::cout << "[DEBUG] DspPipeline::LoadState - this=" << this
|
|
2465
|
+
// << ", current stages=" << m_stages.size() << std::endl;
|
|
1803
2466
|
// Check if pipeline is disposed
|
|
1804
2467
|
if (m_disposed)
|
|
1805
2468
|
{
|
|
2469
|
+
// std::cout << "[DEBUG] LoadState - pipeline disposed, this=" << this << std::endl;
|
|
1806
2470
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
1807
2471
|
return env.Undefined();
|
|
1808
2472
|
}
|
|
@@ -2046,21 +2710,27 @@ namespace dsp
|
|
|
2046
2710
|
Napi::Value DspPipeline::ClearState(const Napi::CallbackInfo &info)
|
|
2047
2711
|
{
|
|
2048
2712
|
Napi::Env env = info.Env();
|
|
2713
|
+
// std::cout << "[DEBUG] DspPipeline::ClearState - this=" << this
|
|
2714
|
+
// << ", stages=" << m_stages.size() << std::endl;
|
|
2049
2715
|
|
|
2050
2716
|
// Check if pipeline is disposed
|
|
2051
2717
|
if (m_disposed)
|
|
2052
2718
|
{
|
|
2719
|
+
// std::cout << "[DEBUG] ClearState - pipeline disposed, this=" << this << std::endl;
|
|
2053
2720
|
Napi::Error::New(env, "Pipeline is disposed").ThrowAsJavaScriptException();
|
|
2054
2721
|
return env.Undefined();
|
|
2055
2722
|
}
|
|
2056
2723
|
|
|
2057
2724
|
// Reset all stages
|
|
2058
|
-
for (
|
|
2725
|
+
for (size_t i = 0; i < m_stages.size(); ++i)
|
|
2059
2726
|
{
|
|
2060
|
-
stage
|
|
2727
|
+
// std::cout << "[DEBUG] ClearState - resetting stage " << i
|
|
2728
|
+
// << ", addr=" << m_stages[i].get() << std::endl;
|
|
2729
|
+
m_stages[i]->reset();
|
|
2061
2730
|
}
|
|
2062
2731
|
|
|
2063
|
-
std::cout << "Pipeline state cleared (" << m_stages.size()
|
|
2732
|
+
// std::cout << "[DEBUG] Pipeline state cleared (" << m_stages.size()
|
|
2733
|
+
// << " stages reset), this=" << this << std::endl;
|
|
2064
2734
|
|
|
2065
2735
|
return env.Undefined();
|
|
2066
2736
|
}
|
|
@@ -2156,21 +2826,27 @@ namespace dsp
|
|
|
2156
2826
|
Napi::Value DspPipeline::Dispose(const Napi::CallbackInfo &info)
|
|
2157
2827
|
{
|
|
2158
2828
|
Napi::Env env = info.Env();
|
|
2829
|
+
// std::cout << "[DEBUG] DspPipeline::Dispose - this=" << this
|
|
2830
|
+
// << ", stages=" << m_stages.size() << ", disposed=" << m_disposed << std::endl;
|
|
2159
2831
|
|
|
2160
2832
|
// Already disposed - silently succeed (idempotent behavior)
|
|
2161
2833
|
if (m_disposed)
|
|
2162
2834
|
{
|
|
2835
|
+
// std::cout << "[DEBUG] Dispose - already disposed, this=" << this << std::endl;
|
|
2163
2836
|
return env.Undefined();
|
|
2164
2837
|
}
|
|
2165
2838
|
|
|
2166
2839
|
// Cannot dispose while processing is in progress
|
|
2167
2840
|
if (*m_isBusy)
|
|
2168
2841
|
{
|
|
2842
|
+
// std::cout << "[DEBUG] Dispose - pipeline busy, cannot dispose, this=" << this << std::endl;
|
|
2169
2843
|
Napi::Error::New(env, "Cannot dispose pipeline: process() is still running.")
|
|
2170
2844
|
.ThrowAsJavaScriptException();
|
|
2171
2845
|
return env.Undefined();
|
|
2172
2846
|
}
|
|
2173
2847
|
|
|
2848
|
+
// std::cout << "[DEBUG] Dispose - clearing " << m_stages.size()
|
|
2849
|
+
// << " stages, this=" << this << std::endl;
|
|
2174
2850
|
// Clear all stages - triggers RAII cleanup of all stage resources
|
|
2175
2851
|
// This will:
|
|
2176
2852
|
// - Free all stage internal buffers
|
|
@@ -2179,12 +2855,14 @@ namespace dsp
|
|
|
2179
2855
|
// - Free all detachable buffers
|
|
2180
2856
|
// - Free timestamp and resize buffers
|
|
2181
2857
|
m_stages.clear();
|
|
2858
|
+
// std::cout << "[DEBUG] Dispose - stages cleared, this=" << this << std::endl;
|
|
2182
2859
|
|
|
2183
2860
|
// Reset busy flag (defensive programming)
|
|
2184
2861
|
*m_isBusy = false;
|
|
2185
2862
|
|
|
2186
2863
|
// Mark as disposed to prevent further operations
|
|
2187
2864
|
m_disposed = true;
|
|
2865
|
+
// std::cout << "[DEBUG] Dispose - complete, this=" << this << std::endl;
|
|
2188
2866
|
|
|
2189
2867
|
return env.Undefined();
|
|
2190
2868
|
}
|