native-recorder-nodejs 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,274 @@
1
+ #import "AVFEngine.h"
2
+ #import "SCKAudioCapture.h"
3
+ #import <AVFoundation/AVFoundation.h>
4
+ #import <CoreMedia/CoreMedia.h>
5
+ #import <ScreenCaptureKit/ScreenCaptureKit.h>
6
+
7
+ @interface AVFRecorderDelegate : NSObject <AVCaptureAudioDataOutputSampleBufferDelegate>
8
+ @property (nonatomic, assign) AudioEngine::DataCallback dataCallback;
9
+ @property (nonatomic, assign) AudioEngine::ErrorCallback errorCallback;
10
+ @end
11
+
12
+ @implementation AVFRecorderDelegate
13
+ - (void)captureOutput:(AVCaptureOutput *)output didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection {
14
+ if (!self.dataCallback) return;
15
+
16
+ CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
17
+ if (!blockBuffer) return;
18
+
19
+ size_t lengthAtOffset, totalLength;
20
+ char *dataPointer;
21
+ OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, &lengthAtOffset, &totalLength, &dataPointer);
22
+
23
+ if (status == kCMBlockBufferNoErr) {
24
+ self.dataCallback((const uint8_t*)dataPointer, totalLength);
25
+ }
26
+ }
27
+ @end
28
+
29
+ struct AVFEngine::Impl {
30
+ AVCaptureSession *session;
31
+ AVFRecorderDelegate *delegate;
32
+ SCKAudioCapture *sckCapture;
33
+ dispatch_queue_t queue;
34
+
35
+ Impl() {
36
+ session = nil;
37
+ delegate = nil;
38
+ sckCapture = [[SCKAudioCapture alloc] init];
39
+ queue = nil;
40
+ }
41
+
42
+ ~Impl() {
43
+ Stop();
44
+ }
45
+
46
+ void Stop() {
47
+ if (session) {
48
+ if ([session isRunning]) {
49
+ [session stopRunning];
50
+ }
51
+ session = nil;
52
+ }
53
+ if (sckCapture) {
54
+ [sckCapture stop];
55
+ }
56
+ delegate = nil;
57
+ queue = nil;
58
+ }
59
+ };
60
+
61
+ AVFEngine::AVFEngine() : impl(std::make_unique<Impl>()) {}
62
+
63
+ AVFEngine::~AVFEngine() = default;
64
+
65
+ void AVFEngine::Start(const std::string &deviceType, const std::string &deviceId,
66
+ DataCallback dataCb, ErrorCallback errorCb) {
67
+ impl->Stop();
68
+
69
+ // Determine if this is output (system audio) or input (microphone)
70
+ bool isOutputDevice = (deviceType == AudioEngine::DEVICE_TYPE_OUTPUT);
71
+
72
+ if (isOutputDevice) {
73
+ // Output device: use ScreenCaptureKit for system audio
74
+ // On macOS, we only support system-wide capture (deviceId should be "system")
75
+ if (deviceId != AudioEngine::SYSTEM_AUDIO_DEVICE_ID) {
76
+ if (errorCb) errorCb("macOS only supports system-wide audio capture for output devices. Use deviceId='system'.");
77
+ return;
78
+ }
79
+
80
+ if (@available(macOS 12.3, *)) {
81
+ [impl->sckCapture startWithCallback:dataCb errorCallback:errorCb];
82
+ } else {
83
+ if (errorCb) errorCb("System audio recording requires macOS 12.3 or later.");
84
+ }
85
+ return;
86
+ }
87
+
88
+ // Input device: use AVFoundation for microphone
89
+ impl->session = [[AVCaptureSession alloc] init];
90
+ impl->delegate = [[AVFRecorderDelegate alloc] init];
91
+ impl->delegate.dataCallback = dataCb;
92
+ impl->delegate.errorCallback = errorCb;
93
+ impl->queue = dispatch_queue_create("com.native-recorder.audio", DISPATCH_QUEUE_SERIAL);
94
+
95
+ // Find device by ID
96
+ AVCaptureDevice *device = [AVCaptureDevice deviceWithUniqueID:[NSString stringWithUTF8String:deviceId.c_str()]];
97
+
98
+ if (!device) {
99
+ if (errorCb) errorCb("Device not found: " + deviceId);
100
+ return;
101
+ }
102
+
103
+ NSError *error = nil;
104
+ AVCaptureDeviceInput *input = [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
105
+ if (error || !input) {
106
+ if (errorCb) errorCb("Could not create device input: " + std::string([error.localizedDescription UTF8String]));
107
+ return;
108
+ }
109
+
110
+ if ([impl->session canAddInput:input]) {
111
+ [impl->session addInput:input];
112
+ } else {
113
+ if (errorCb) errorCb("Cannot add input to session");
114
+ return;
115
+ }
116
+
117
+ AVCaptureAudioDataOutput *output = [[AVCaptureAudioDataOutput alloc] init];
118
+
119
+ // Configure output settings for 48kHz 16-bit stereo PCM
120
+ NSDictionary *settings = @{
121
+ AVFormatIDKey: @(kAudioFormatLinearPCM),
122
+ AVSampleRateKey: @48000.0,
123
+ AVNumberOfChannelsKey: @2,
124
+ AVLinearPCMBitDepthKey: @16,
125
+ AVLinearPCMIsFloatKey: @NO,
126
+ AVLinearPCMIsBigEndianKey: @NO,
127
+ AVLinearPCMIsNonInterleaved: @NO
128
+ };
129
+ [output setAudioSettings:settings];
130
+
131
+ [output setSampleBufferDelegate:impl->delegate queue:impl->queue];
132
+
133
+ if ([impl->session canAddOutput:output]) {
134
+ [impl->session addOutput:output];
135
+ } else {
136
+ if (errorCb) errorCb("Cannot add output to session");
137
+ return;
138
+ }
139
+
140
+ [impl->session startRunning];
141
+ }
142
+
143
+ void AVFEngine::Stop() {
144
+ impl->Stop();
145
+ }
146
+
147
+ std::vector<AudioDevice> AVFEngine::GetDevices() {
148
+ std::vector<AudioDevice> devices;
149
+
150
+ // Get input devices (microphones)
151
+ AVCaptureDeviceDiscoverySession *discoverySession = [AVCaptureDeviceDiscoverySession
152
+ discoverySessionWithDeviceTypes:@[AVCaptureDeviceTypeMicrophone, AVCaptureDeviceTypeExternal]
153
+ mediaType:AVMediaTypeAudio
154
+ position:AVCaptureDevicePositionUnspecified];
155
+
156
+ NSArray<AVCaptureDevice *> *avDevices = discoverySession.devices;
157
+ AVCaptureDevice *defaultDevice = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio];
158
+
159
+ for (AVCaptureDevice *device in avDevices) {
160
+ AudioDevice d;
161
+ d.id = [device.uniqueID UTF8String];
162
+ d.name = [device.localizedName UTF8String];
163
+ d.type = AudioEngine::DEVICE_TYPE_INPUT;
164
+ d.isDefault = (defaultDevice && [device.uniqueID isEqualToString:defaultDevice.uniqueID]);
165
+ devices.push_back(d);
166
+ }
167
+
168
+ // Add system audio output device (only one on macOS)
169
+ if (@available(macOS 12.3, *)) {
170
+ AudioDevice systemDevice;
171
+ systemDevice.id = AudioEngine::SYSTEM_AUDIO_DEVICE_ID;
172
+ systemDevice.name = "System Audio";
173
+ systemDevice.type = AudioEngine::DEVICE_TYPE_OUTPUT;
174
+ systemDevice.isDefault = true; // Only one output device on macOS
175
+ devices.push_back(systemDevice);
176
+ }
177
+
178
+ return devices;
179
+ }
180
+
181
+ AudioFormat AVFEngine::GetDeviceFormat(const std::string &deviceId) {
182
+ AudioFormat format = {0, 0, 0, 0};
183
+
184
+ if (deviceId == AudioEngine::SYSTEM_AUDIO_DEVICE_ID) {
185
+ format.sampleRate = 48000;
186
+ format.channels = 2;
187
+ format.bitDepth = 16;
188
+ format.rawBitDepth = 32;
189
+ return format;
190
+ }
191
+
192
+ AVCaptureDevice *device = [AVCaptureDevice deviceWithUniqueID:[NSString stringWithUTF8String:deviceId.c_str()]];
193
+
194
+ if (!device) return format;
195
+
196
+ // Get the active format description
197
+ CMFormatDescriptionRef formatDesc = device.activeFormat.formatDescription;
198
+ const AudioStreamBasicDescription *asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDesc);
199
+
200
+ if (asbd) {
201
+ format.sampleRate = 48000; // Fixed output sample rate
202
+ format.channels = 2; // Fixed output channels (we force stereo in output settings)
203
+ format.rawBitDepth = (int)asbd->mBitsPerChannel;
204
+ format.bitDepth = 16; // We always output 16-bit
205
+ }
206
+
207
+ return format;
208
+ }
209
+
210
+ PermissionStatus AVFEngine::CheckPermission() {
211
+ PermissionStatus status;
212
+
213
+ // Check microphone permission
214
+ AVAuthorizationStatus micStatus = [AVCaptureDevice authorizationStatusForMediaType:AVMediaTypeAudio];
215
+ status.mic = (micStatus == AVAuthorizationStatusAuthorized);
216
+
217
+ // Check screen capture permission (for system audio)
218
+ // ScreenCaptureKit doesn't have a direct permission check API,
219
+ // but we can check if we can get shareable content
220
+ if (@available(macOS 12.3, *)) {
221
+ __block BOOL hasScreenPermission = NO;
222
+ dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
223
+
224
+ [SCShareableContent getShareableContentWithCompletionHandler:^(SCShareableContent * _Nullable shareableContent, NSError * _Nullable error) {
225
+ // If we can get shareable content without error, we have permission
226
+ // If error is nil and we get valid content, permission is granted
227
+ hasScreenPermission = (error == nil && shareableContent != nil);
228
+ dispatch_semaphore_signal(semaphore);
229
+ }];
230
+
231
+ // Wait for async call with timeout
232
+ dispatch_semaphore_wait(semaphore, dispatch_time(DISPATCH_TIME_NOW, 3 * NSEC_PER_SEC));
233
+ status.system = hasScreenPermission;
234
+ } else {
235
+ // System audio not supported on older macOS
236
+ status.system = NO;
237
+ }
238
+
239
+ return status;
240
+ }
241
+
242
+ bool AVFEngine::RequestPermission(PermissionType type) {
243
+ __block BOOL granted = NO;
244
+ dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
245
+
246
+ if (type == PermissionType::Mic) {
247
+ // Request microphone permission
248
+ [AVCaptureDevice requestAccessForMediaType:AVMediaTypeAudio completionHandler:^(BOOL allowed) {
249
+ granted = allowed;
250
+ dispatch_semaphore_signal(semaphore);
251
+ }];
252
+
253
+ dispatch_semaphore_wait(semaphore, dispatch_time(DISPATCH_TIME_NOW, 30 * NSEC_PER_SEC));
254
+ return granted;
255
+ }
256
+ else if (type == PermissionType::System) {
257
+ // Request screen capture permission for system audio
258
+ if (@available(macOS 12.3, *)) {
259
+ // Attempting to get shareable content will trigger the permission prompt
260
+ // if not already granted
261
+ [SCShareableContent getShareableContentWithCompletionHandler:^(SCShareableContent * _Nullable shareableContent, NSError * _Nullable error) {
262
+ granted = (error == nil && shareableContent != nil);
263
+ dispatch_semaphore_signal(semaphore);
264
+ }];
265
+
266
+ dispatch_semaphore_wait(semaphore, dispatch_time(DISPATCH_TIME_NOW, 30 * NSEC_PER_SEC));
267
+ return granted;
268
+ } else {
269
+ return false;
270
+ }
271
+ }
272
+
273
+ return false;
274
+ }
@@ -0,0 +1,13 @@
1
+ #import <Foundation/Foundation.h>
2
+ #import <ScreenCaptureKit/ScreenCaptureKit.h>
3
+ #include <functional>
4
+ #include <string>
5
+
6
+ typedef std::function<void(const uint8_t *, size_t)> SCKDataCallback;
7
+ typedef std::function<void(std::string)> SCKErrorCallback;
8
+
9
+ @interface SCKAudioCapture : NSObject
10
+ - (void)startWithCallback:(SCKDataCallback)dataCb
11
+ errorCallback:(SCKErrorCallback)errorCb;
12
+ - (void)stop;
13
+ @end
@@ -0,0 +1,213 @@
1
+ #import "SCKAudioCapture.h"
2
+ #import <CoreMedia/CoreMedia.h>
3
+ #include <vector>
4
+
5
+ @interface SCKAudioCapture () <SCStreamOutput, SCStreamDelegate>
6
+ @property (nonatomic, strong) SCStream *stream;
7
+ @property (nonatomic, assign) SCKDataCallback dataCallback;
8
+ @property (nonatomic, assign) SCKErrorCallback errorCallback;
9
+ @property (nonatomic, strong) dispatch_queue_t captureQueue;
10
+ @end
11
+
12
+ @implementation SCKAudioCapture
13
+
14
+ - (instancetype)init {
15
+ self = [super init];
16
+ if (self) {
17
+ // Create a dedicated serial queue for audio capture callbacks
18
+ // This is crucial because Node.js doesn't run the Cocoa main run loop
19
+ _captureQueue = dispatch_queue_create("com.native-recorder.sck-audio", DISPATCH_QUEUE_SERIAL);
20
+ }
21
+ return self;
22
+ }
23
+
24
+ - (void)dealloc {
25
+ [self stop];
26
+ }
27
+
28
+ - (void)startWithCallback:(SCKDataCallback)dataCb errorCallback:(SCKErrorCallback)errorCb {
29
+ self.dataCallback = dataCb;
30
+ self.errorCallback = errorCb;
31
+
32
+ if (@available(macOS 12.3, *)) {
33
+ [SCShareableContent getShareableContentExcludingDesktopWindows:YES
34
+ onScreenWindowsOnly:NO
35
+ completionHandler:^(SCShareableContent *content, NSError *error) {
36
+ dispatch_async(self.captureQueue, ^{
37
+ if (error) {
38
+ if (self.errorCallback) self.errorCallback("Failed to get shareable content: " + std::string(error.localizedDescription.UTF8String));
39
+ return;
40
+ }
41
+
42
+ SCDisplay *display = content.displays.firstObject;
43
+ if (!display) {
44
+ if (self.errorCallback) self.errorCallback("No display found");
45
+ return;
46
+ }
47
+
48
+ SCContentFilter *filter = [[SCContentFilter alloc] initWithDisplay:display excludingWindows:@[]];
49
+
50
+ SCStreamConfiguration *config = [[SCStreamConfiguration alloc] init];
51
+ config.capturesAudio = YES;
52
+ config.sampleRate = 48000;
53
+ config.channelCount = 2;
54
+ config.excludesCurrentProcessAudio = NO;
55
+
56
+ // Minimize video overhead since we only need audio
57
+ config.width = 2;
58
+ config.height = 2;
59
+ config.minimumFrameInterval = CMTimeMake(1, 1); // 1 fps for video
60
+ config.showsCursor = NO;
61
+
62
+ self.stream = [[SCStream alloc] initWithFilter:filter configuration:config delegate:self];
63
+
64
+ NSError *addError = nil;
65
+ [self.stream addStreamOutput:self type:SCStreamOutputTypeAudio sampleHandlerQueue:self.captureQueue error:&addError];
66
+ if (addError) {
67
+ if (self.errorCallback) self.errorCallback("Failed to add stream output: " + std::string(addError.localizedDescription.UTF8String));
68
+ return;
69
+ }
70
+
71
+ [self.stream startCaptureWithCompletionHandler:^(NSError *startError) {
72
+ if (startError) {
73
+ if (self.errorCallback) self.errorCallback("Failed to start capture: " + std::string(startError.localizedDescription.UTF8String));
74
+ }
75
+ }];
76
+ });
77
+ }];
78
+ } else {
79
+ if (self.errorCallback) self.errorCallback("ScreenCaptureKit is only available on macOS 12.3+");
80
+ }
81
+ }
82
+
83
+ - (void)stop {
84
+ if (@available(macOS 12.3, *)) {
85
+ if (self.stream) {
86
+ [self.stream stopCaptureWithCompletionHandler:nil];
87
+ self.stream = nil;
88
+ }
89
+ }
90
+ }
91
+
92
+ - (void)stream:(SCStream *)stream didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer ofType:(SCStreamOutputType)type {
93
+ if (type != SCStreamOutputTypeAudio || !self.dataCallback) return;
94
+
95
+ if (@available(macOS 12.3, *)) {
96
+ CMFormatDescriptionRef formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer);
97
+ const AudioStreamBasicDescription *asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDesc);
98
+
99
+ if (!asbd) return;
100
+
101
+ // Check if audio is non-interleaved (planar)
102
+ bool isNonInterleaved = (asbd->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0;
103
+ bool isFloat = (asbd->mFormatFlags & kAudioFormatFlagIsFloat) != 0;
104
+ int channels = asbd->mChannelsPerFrame;
105
+
106
+ if (isNonInterleaved) {
107
+ // Non-interleaved audio: each channel is in a separate buffer
108
+ // We need to use CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer
109
+ CMBlockBufferRef blockBuffer = NULL;
110
+
111
+ // First, get the required buffer list size
112
+ size_t bufferListSizeNeeded = 0;
113
+ OSStatus status = CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
114
+ sampleBuffer,
115
+ &bufferListSizeNeeded,
116
+ NULL,
117
+ 0,
118
+ NULL,
119
+ NULL,
120
+ 0,
121
+ &blockBuffer
122
+ );
123
+
124
+ if (bufferListSizeNeeded == 0) {
125
+ // Fallback: estimate size based on channel count
126
+ bufferListSizeNeeded = sizeof(AudioBufferList) + (channels - 1) * sizeof(AudioBuffer);
127
+ }
128
+
129
+ AudioBufferList *audioBufferList = (AudioBufferList *)malloc(bufferListSizeNeeded);
130
+ status = CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
131
+ sampleBuffer,
132
+ NULL,
133
+ audioBufferList,
134
+ bufferListSizeNeeded,
135
+ NULL,
136
+ NULL,
137
+ kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment,
138
+ &blockBuffer
139
+ );
140
+
141
+ if (status != noErr || !audioBufferList) {
142
+ if (audioBufferList) free(audioBufferList);
143
+ if (blockBuffer) CFRelease(blockBuffer);
144
+ return;
145
+ }
146
+
147
+ // Get the number of frames
148
+ CMItemCount numFrames = CMSampleBufferGetNumSamples(sampleBuffer);
149
+
150
+ if (isFloat && asbd->mBitsPerChannel == 32) {
151
+ // Interleave channels and convert float to int16
152
+ std::vector<int16_t> outputBuffer(numFrames * channels);
153
+
154
+ for (CMItemCount frame = 0; frame < numFrames; frame++) {
155
+ for (int ch = 0; ch < channels && ch < (int)audioBufferList->mNumberBuffers; ch++) {
156
+ const float *channelData = (const float *)audioBufferList->mBuffers[ch].mData;
157
+ float sample = channelData[frame];
158
+ // Clamp to [-1.0, 1.0]
159
+ if (sample > 1.0f) sample = 1.0f;
160
+ if (sample < -1.0f) sample = -1.0f;
161
+ // Convert to 16-bit and interleave
162
+ outputBuffer[frame * channels + ch] = (int16_t)(sample * 32767.0f);
163
+ }
164
+ }
165
+
166
+ self.dataCallback((const uint8_t*)outputBuffer.data(), outputBuffer.size() * sizeof(int16_t));
167
+ }
168
+
169
+ free(audioBufferList);
170
+ if (blockBuffer) CFRelease(blockBuffer);
171
+ } else {
172
+ // Interleaved audio: original code path
173
+ CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
174
+ if (!blockBuffer) return;
175
+
176
+ size_t totalLength = 0;
177
+ char *dataPointer = NULL;
178
+ OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &totalLength, &dataPointer);
179
+
180
+ if (status != kCMBlockBufferNoErr || !dataPointer) return;
181
+
182
+ // ScreenCaptureKit outputs 32-bit float audio
183
+ // Convert to 16-bit signed integer PCM for consistency with other sources
184
+ if (isFloat && asbd->mBitsPerChannel == 32) {
185
+ size_t numSamples = totalLength / sizeof(float);
186
+ std::vector<int16_t> outputBuffer(numSamples);
187
+
188
+ const float *floatData = (const float *)dataPointer;
189
+ for (size_t i = 0; i < numSamples; i++) {
190
+ float sample = floatData[i];
191
+ // Clamp to [-1.0, 1.0]
192
+ if (sample > 1.0f) sample = 1.0f;
193
+ if (sample < -1.0f) sample = -1.0f;
194
+ // Convert to 16-bit
195
+ outputBuffer[i] = (int16_t)(sample * 32767.0f);
196
+ }
197
+
198
+ self.dataCallback((const uint8_t*)outputBuffer.data(), numSamples * sizeof(int16_t));
199
+ }
200
+ }
201
+ }
202
+ }
203
+
204
+ // SCStreamDelegate method
205
+ - (void)stream:(SCStream *)stream didStopWithError:(NSError *)error {
206
+ if (@available(macOS 12.3, *)) {
207
+ if (error && self.errorCallback) {
208
+ self.errorCallback("Stream stopped with error: " + std::string(error.localizedDescription.UTF8String));
209
+ }
210
+ }
211
+ }
212
+
213
+ @end
@@ -0,0 +1,9 @@
1
+ #include "AudioController.h"
2
+ #include <napi.h>
3
+
4
+
5
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
6
+ return AudioController::Init(env, exports);
7
+ }
8
+
9
+ NODE_API_MODULE(native_audio_sdk, Init)