electron-native-screenshare 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ /**
2
+ * macOS Audio Capture via ScreenCaptureKit (macOS 13 Ventura+)
3
+ *
4
+ * Process-level audio isolation using SCStream:
5
+ * - Include mode: SCContentFilter with includingApplications → only target app audio
6
+ * - Exclude mode: SCContentFilter with excludingApplications → all audio except target
7
+ *
8
+ * Audio is delivered as raw PCM float32, stereo, 48kHz — matching the Windows WASAPI output.
9
+ */
10
+
11
+ #import <ScreenCaptureKit/ScreenCaptureKit.h>
12
+ #import <CoreMedia/CoreMedia.h>
13
+ #import <AudioToolbox/AudioToolbox.h>
14
+ #import <Foundation/Foundation.h>
15
+ #include "coreaudio_capture.h"
16
+ #include <dispatch/dispatch.h>
17
+ #include <iostream>
18
+ #include <thread>
19
+
20
+ // --- SCStream delegate that forwards audio samples to the C++ callback ---
21
+
22
+ @interface AudioStreamDelegate : NSObject <SCStreamOutput, SCStreamDelegate>
23
+ @property (nonatomic, assign) CoreAudioCapture::DataCallback dataCallback;
24
+ @property (nonatomic, assign) std::atomic<bool>* isCapturingRef;
25
+ @end
26
+
27
+ @implementation AudioStreamDelegate
28
+
29
+ - (void)stream:(SCStream *)stream didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer ofType:(SCStreamOutputType)type {
30
+ if (type != SCStreamOutputTypeAudio) return;
31
+ if (!self.isCapturingRef || !self.isCapturingRef->load()) return;
32
+ if (!self.dataCallback) return;
33
+
34
+ CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
35
+ if (!blockBuffer) return;
36
+
37
+ size_t totalLength = 0;
38
+ char* dataPointer = NULL;
39
+ OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &totalLength, &dataPointer);
40
+ if (status != kCMBlockBufferNoErr || !dataPointer || totalLength == 0) return;
41
+
42
+ // Extract format from the sample buffer
43
+ CMFormatDescriptionRef formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer);
44
+ const AudioStreamBasicDescription* asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDesc);
45
+
46
+ CoreAudioCapture::AudioMetadata meta;
47
+ meta.sampleRate = (uint32_t)asbd->mSampleRate;
48
+ meta.channels = (uint16_t)asbd->mChannelsPerFrame;
49
+ meta.bitsPerSample = (uint16_t)asbd->mBitsPerChannel;
50
+ meta.isFloat = (asbd->mFormatFlags & kAudioFormatFlagIsFloat) != 0;
51
+
52
+ self.dataCallback((const uint8_t*)dataPointer, totalLength, meta);
53
+ }
54
+
55
+ - (void)stream:(SCStream *)stream didStopWithError:(NSError *)error {
56
+ if (error) {
57
+ std::cerr << "[electron-native-screenshare] SCStream stopped with error: "
58
+ << error.localizedDescription.UTF8String << std::endl;
59
+ }
60
+ }
61
+
62
+ @end
63
+
64
+ // --- Pimpl implementation ---
65
+
66
+ struct CoreAudioCapture::Impl {
67
+ SCStream* stream = nil;
68
+ AudioStreamDelegate* delegate = nil;
69
+ dispatch_queue_t captureQueue = nil;
70
+ uint32_t targetPid = 0;
71
+ bool includeMode = false;
72
+ };
73
+
74
+ CoreAudioCapture::CoreAudioCapture() : pImpl(new Impl()) {
75
+ pImpl->captureQueue = dispatch_queue_create("com.electron-native-screenshare.audio", DISPATCH_QUEUE_SERIAL);
76
+ }
77
+
78
+ CoreAudioCapture::~CoreAudioCapture() {
79
+ Stop();
80
+ if (pImpl) {
81
+ if (pImpl->delegate) {
82
+ pImpl->delegate = nil;
83
+ }
84
+ if (pImpl->captureQueue) {
85
+ pImpl->captureQueue = nil;
86
+ }
87
+ delete pImpl;
88
+ pImpl = nullptr;
89
+ }
90
+ }
91
+
92
+ int CoreAudioCapture::Initialize(uint32_t processId, bool isIncludeMode, std::string& outError) {
93
+ pImpl->targetPid = processId;
94
+ pImpl->includeMode = isIncludeMode;
95
+
96
+ __block int result = 0;
97
+ __block std::string blockError;
98
+
99
+ // ScreenCaptureKit requires async content enumeration — bridge to sync with semaphore
100
+ dispatch_semaphore_t sema = dispatch_semaphore_create(0);
101
+
102
+ [SCShareableContent getShareableContentExcludingDesktopWindows:NO
103
+ onScreenWindowsOnly:NO
104
+ completionHandler:^(SCShareableContent* _Nullable content, NSError* _Nullable error) {
105
+ if (error || !content) {
106
+ blockError = error ? std::string(error.localizedDescription.UTF8String)
107
+ : "Failed to get shareable content";
108
+ result = -1;
109
+ dispatch_semaphore_signal(sema);
110
+ return;
111
+ }
112
+
113
+ // Find the target application by PID
114
+ SCRunningApplication* targetApp = nil;
115
+ for (SCRunningApplication* app in content.applications) {
116
+ if (app.processID == (pid_t)processId) {
117
+ targetApp = app;
118
+ break;
119
+ }
120
+ }
121
+
122
+ if (!targetApp) {
123
+ blockError = "Target process not found in running applications (PID: "
124
+ + std::to_string(processId) + ")";
125
+ result = -2;
126
+ dispatch_semaphore_signal(sema);
127
+ return;
128
+ }
129
+
130
+ // Build content filter
131
+ SCContentFilter* filter = nil;
132
+ SCDisplay* primaryDisplay = content.displays.firstObject;
133
+ if (!primaryDisplay) {
134
+ blockError = "No display found for content filter";
135
+ result = -3;
136
+ dispatch_semaphore_signal(sema);
137
+ return;
138
+ }
139
+
140
+ if (isIncludeMode) {
141
+ // Include mode: capture ONLY the target app's audio
142
+ filter = [[SCContentFilter alloc] initWithDisplay:primaryDisplay
143
+ includingApplications:@[targetApp]
144
+ exceptingWindows:@[]];
145
+ } else {
146
+ // Exclude mode: capture everything EXCEPT the target app
147
+ filter = [[SCContentFilter alloc] initWithDisplay:primaryDisplay
148
+ excludingApplications:@[targetApp]
149
+ exceptingWindows:@[]];
150
+ }
151
+
152
+ // Configure for audio capture (48kHz stereo float32, matching Windows WASAPI output)
153
+ SCStreamConfiguration* config = [[SCStreamConfiguration alloc] init];
154
+ config.capturesAudio = YES;
155
+ config.excludesCurrentProcessAudio = NO; // handled by the filter
156
+ config.sampleRate = 48000;
157
+ config.channelCount = 2;
158
+
159
+ // Minimize video overhead — we only need audio
160
+ config.width = 2;
161
+ config.height = 2;
162
+ config.minimumFrameInterval = CMTimeMake(1, 1); // 1 fps minimum
163
+
164
+ // Create the stream
165
+ pImpl->stream = [[SCStream alloc] initWithFilter:filter
166
+ configuration:config
167
+ delegate:nil];
168
+
169
+ dispatch_semaphore_signal(sema);
170
+ }];
171
+
172
+ dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
173
+
174
+ if (result != 0) {
175
+ outError = blockError;
176
+ }
177
+ return result;
178
+ }
179
+
180
+ void CoreAudioCapture::Start(DataCallback callback) {
181
+ if (isCapturing.load() || !pImpl->stream) return;
182
+
183
+ onData = callback;
184
+ isCapturing.store(true);
185
+
186
+ // Set up the delegate
187
+ pImpl->delegate = [[AudioStreamDelegate alloc] init];
188
+ pImpl->delegate.dataCallback = onData;
189
+ pImpl->delegate.isCapturingRef = &isCapturing;
190
+
191
+ NSError* addOutputError = nil;
192
+ [pImpl->stream addStreamOutput:pImpl->delegate
193
+ type:SCStreamOutputTypeAudio
194
+ sampleHandlerQueue:pImpl->captureQueue
195
+ error:&addOutputError];
196
+
197
+ if (addOutputError) {
198
+ std::cerr << "[electron-native-screenshare] Failed to add stream output: "
199
+ << addOutputError.localizedDescription.UTF8String << std::endl;
200
+ isCapturing.store(false);
201
+ return;
202
+ }
203
+
204
+ dispatch_semaphore_t startSema = dispatch_semaphore_create(0);
205
+ __block bool startSuccess = false;
206
+
207
+ [pImpl->stream startCaptureWithCompletionHandler:^(NSError* _Nullable error) {
208
+ if (error) {
209
+ std::cerr << "[electron-native-screenshare] SCStream start failed: "
210
+ << error.localizedDescription.UTF8String << std::endl;
211
+ } else {
212
+ startSuccess = true;
213
+ }
214
+ dispatch_semaphore_signal(startSema);
215
+ }];
216
+
217
+ dispatch_semaphore_wait(startSema, DISPATCH_TIME_FOREVER);
218
+
219
+ if (!startSuccess) {
220
+ isCapturing.store(false);
221
+ }
222
+ }
223
+
224
+ void CoreAudioCapture::Stop() {
225
+ if (!isCapturing.load()) return;
226
+ isCapturing.store(false);
227
+
228
+ if (pImpl->stream) {
229
+ dispatch_semaphore_t stopSema = dispatch_semaphore_create(0);
230
+ [pImpl->stream stopCaptureWithCompletionHandler:^(NSError* _Nullable error) {
231
+ dispatch_semaphore_signal(stopSema);
232
+ }];
233
+ dispatch_semaphore_wait(stopSema, dispatch_time(DISPATCH_TIME_NOW, 3 * NSEC_PER_SEC));
234
+ pImpl->stream = nil;
235
+ }
236
+ pImpl->delegate = nil;
237
+ }
238
+
239
+ // --- getPidFromWindowId using CGWindowListCopyWindowInfo ---
240
+
241
+ uint32_t getPidFromWindowId(uint32_t windowId) {
242
+ CFArrayRef windowList = CGWindowListCopyWindowInfo(
243
+ kCGWindowListOptionIncludingWindow, (CGWindowID)windowId);
244
+
245
+ if (!windowList) return 0;
246
+
247
+ uint32_t pid = 0;
248
+ CFIndex count = CFArrayGetCount(windowList);
249
+ for (CFIndex i = 0; i < count; i++) {
250
+ CFDictionaryRef windowInfo = (CFDictionaryRef)CFArrayGetValueAtIndex(windowList, i);
251
+
252
+ CFNumberRef windowNumber = (CFNumberRef)CFDictionaryGetValue(windowInfo, kCGWindowNumber);
253
+ int wid = 0;
254
+ CFNumberGetValue(windowNumber, kCFNumberIntType, &wid);
255
+
256
+ if ((uint32_t)wid == windowId) {
257
+ CFNumberRef ownerPid = (CFNumberRef)CFDictionaryGetValue(windowInfo, kCGWindowOwnerPID);
258
+ int p = 0;
259
+ CFNumberGetValue(ownerPid, kCFNumberIntType, &p);
260
+ pid = (uint32_t)p;
261
+ break;
262
+ }
263
+ }
264
+
265
+ CFRelease(windowList);
266
+ return pid;
267
+ }
@@ -0,0 +1,136 @@
1
+ #undef _WIN32_WINNT
2
+ #define _WIN32_WINNT 0x0A00
3
+ #undef NTDDI_VERSION
4
+ #define NTDDI_VERSION 0x0A00000A
5
+
6
+ #include <napi.h>
7
+ #include "wasapi_capture.h"
8
+
9
+ WasapiCapture capture;
10
+
11
+ Napi::ThreadSafeFunction tsfn;
12
+
13
+ Napi::Value StartCapture(const Napi::CallbackInfo& info) {
14
+ Napi::Env env = info.Env();
15
+
16
+ DWORD processId = 0;
17
+ if (info.Length() > 0 && info[0].IsNumber()) {
18
+ processId = info[0].As<Napi::Number>().Uint32Value();
19
+ }
20
+
21
+ bool isIncludeMode = false;
22
+ if (info.Length() > 1 && info[1].IsBoolean()) {
23
+ isIncludeMode = info[1].As<Napi::Boolean>().Value();
24
+ }
25
+
26
+ if (info.Length() < 3 || !info[2].IsFunction()) {
27
+ Napi::TypeError::New(env, "Callback function expected as third argument").ThrowAsJavaScriptException();
28
+ return env.Null();
29
+ }
30
+
31
+ std::string errorMsg;
32
+ HRESULT hr = capture.Initialize(processId, isIncludeMode, errorMsg);
33
+ if (FAILED(hr) || !errorMsg.empty()) {
34
+ char buf[256];
35
+ snprintf(buf, sizeof(buf), "WASAPI Init Failed: %s (HRESULT: 0x%08lX)", errorMsg.c_str(), hr);
36
+ Napi::TypeError::New(env, buf).ThrowAsJavaScriptException();
37
+ return env.Null();
38
+ }
39
+
40
+ tsfn = Napi::ThreadSafeFunction::New(
41
+ env,
42
+ info[2].As<Napi::Function>(),
43
+ "WASAPICaptureCallback",
44
+ 0,
45
+ 1
46
+ );
47
+
48
+ auto callback = [](const uint8_t* data, size_t length, WasapiCapture::AudioMetadata metadata) {
49
+ if (!tsfn) return;
50
+
51
+ struct Payload {
52
+ std::vector<uint8_t> buffer;
53
+ WasapiCapture::AudioMetadata meta;
54
+ };
55
+ auto* payload = new Payload{ std::vector<uint8_t>(data, data + length), metadata };
56
+
57
+ auto napiCallback = [](Napi::Env env, Napi::Function jsCallback, Payload* p) {
58
+ Napi::Object metaObj = Napi::Object::New(env);
59
+ metaObj.Set("sampleRate", p->meta.sampleRate);
60
+ metaObj.Set("channels", p->meta.channels);
61
+ metaObj.Set("bitsPerSample", p->meta.bitsPerSample);
62
+ metaObj.Set("isFloat", p->meta.isFloat);
63
+
64
+ Napi::Buffer<uint8_t> buffer = Napi::Buffer<uint8_t>::Copy(env, p->buffer.data(), p->buffer.size());
65
+ jsCallback.Call({ buffer, metaObj });
66
+ delete p;
67
+ };
68
+
69
+ tsfn.NonBlockingCall(payload, napiCallback);
70
+ };
71
+
72
+ capture.Start(callback);
73
+ return Napi::Boolean::New(env, true);
74
+ }
75
+
76
+ Napi::Value StopCapture(const Napi::CallbackInfo& info) {
77
+ Napi::Env env = info.Env();
78
+ capture.Stop();
79
+ if (tsfn) {
80
+ tsfn.Release();
81
+ tsfn = nullptr;
82
+ }
83
+ return Napi::Boolean::New(env, true);
84
+ }
85
+
86
+ struct EnumUWPData {
87
+ DWORD pid;
88
+ bool found;
89
+ };
90
+
91
+ BOOL CALLBACK EnumChildProc(HWND hwnd, LPARAM lParam) {
92
+ char className[256];
93
+ if (GetClassNameA(hwnd, className, sizeof(className))) {
94
+ if (strcmp(className, "Windows.UI.Core.CoreWindow") == 0) {
95
+ EnumUWPData* data = (EnumUWPData*)lParam;
96
+ GetWindowThreadProcessId(hwnd, &data->pid);
97
+ data->found = true;
98
+ return FALSE; // Stop enumeration
99
+ }
100
+ }
101
+ return TRUE;
102
+ }
103
+
104
+ Napi::Value GetPidFromHwnd(const Napi::CallbackInfo& info) {
105
+ Napi::Env env = info.Env();
106
+ if (info.Length() < 1 || !info[0].IsNumber()) {
107
+ Napi::TypeError::New(env, "Number expected").ThrowAsJavaScriptException();
108
+ return env.Null();
109
+ }
110
+ HWND hwnd = (HWND)(uintptr_t)info[0].As<Napi::Number>().Uint32Value();
111
+ DWORD pid = 0;
112
+ GetWindowThreadProcessId(hwnd, &pid);
113
+
114
+ // Optimization & Fix: If the window is a UWP ApplicationFrameWindow, the actual media process is a child window.
115
+ char className[256];
116
+ if (GetClassNameA(hwnd, className, sizeof(className))) {
117
+ if (strcmp(className, "ApplicationFrameWindow") == 0) {
118
+ EnumUWPData data = { pid, false };
119
+ EnumChildWindows(hwnd, EnumChildProc, (LPARAM)&data);
120
+ if (data.found) {
121
+ pid = data.pid;
122
+ }
123
+ }
124
+ }
125
+
126
+ return Napi::Number::New(env, pid);
127
+ }
128
+
129
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
130
+ exports.Set(Napi::String::New(env, "startCapture"), Napi::Function::New(env, StartCapture));
131
+ exports.Set(Napi::String::New(env, "stopCapture"), Napi::Function::New(env, StopCapture));
132
+ exports.Set(Napi::String::New(env, "getPidFromHwnd"), Napi::Function::New(env, GetPidFromHwnd));
133
+ return exports;
134
+ }
135
+
136
+ NODE_API_MODULE(topluyo_capture, Init)
@@ -0,0 +1,223 @@
1
+ #include "wasapi_capture.h"
2
+ #include <iostream>
3
+
4
+ #pragma comment(lib, "Mmdevapi.lib")
5
+
6
+ WasapiCapture::WasapiCapture() {}
7
+
8
+ WasapiCapture::~WasapiCapture() {
9
+ Stop();
10
+ if (pCaptureClient) pCaptureClient->Release();
11
+ if (pAudioClient) pAudioClient->Release();
12
+ if (pDevice) pDevice->Release();
13
+ }
14
+
15
+ #include <objidl.h>
16
+
17
+ #ifndef VIRTUAL_AUDIO_DEVICE_PROCESS_LOOPBACK
18
+ #define VIRTUAL_AUDIO_DEVICE_PROCESS_LOOPBACK L"VAD\\Process_Loopback"
19
+ #endif
20
+
21
+ // Define IAgileObject IID manually in case it's missing in some SDKs
22
+ static const IID IID_IAgileObject_Manual = { 0x94ea2b94, 0xe9cc, 0x49e0, { 0xc0, 0xff, 0xee, 0x64, 0xca, 0x8f, 0x5b, 0x90 } };
23
+
24
+ class AudioInterfaceCompletionHandler : public IActivateAudioInterfaceCompletionHandler, public IAgileObject {
25
+ LONG m_cRef;
26
+ HANDLE m_hEvent;
27
+ IAudioClient** m_ppAudioClient;
28
+ IUnknown* m_pUnkFTM;
29
+ public:
30
+ AudioInterfaceCompletionHandler(HANDLE hEvent, IAudioClient** ppAudioClient)
31
+ : m_cRef(1), m_hEvent(hEvent), m_ppAudioClient(ppAudioClient), m_pUnkFTM(nullptr) {
32
+ IUnknown* pUnkThis = static_cast<IActivateAudioInterfaceCompletionHandler*>(this);
33
+ CoCreateFreeThreadedMarshaler(pUnkThis, &m_pUnkFTM);
34
+ }
35
+
36
+ ~AudioInterfaceCompletionHandler() {
37
+ if (m_pUnkFTM) {
38
+ m_pUnkFTM->Release();
39
+ }
40
+ }
41
+
42
+ ULONG STDMETHODCALLTYPE AddRef() { return InterlockedIncrement(&m_cRef); }
43
+ ULONG STDMETHODCALLTYPE Release() {
44
+ ULONG ulRef = InterlockedDecrement(&m_cRef);
45
+ if (0 == ulRef) { delete this; }
46
+ return ulRef;
47
+ }
48
+ HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvInterface) {
49
+ if (riid == __uuidof(IUnknown) || riid == __uuidof(IActivateAudioInterfaceCompletionHandler)) {
50
+ *ppvInterface = static_cast<IActivateAudioInterfaceCompletionHandler*>(this);
51
+ AddRef();
52
+ return S_OK;
53
+ }
54
+ if (riid == IID_IAgileObject_Manual || riid == __uuidof(IAgileObject)) {
55
+ *ppvInterface = static_cast<IAgileObject*>(this);
56
+ AddRef();
57
+ return S_OK;
58
+ }
59
+ if (riid == __uuidof(IMarshal) && m_pUnkFTM != nullptr) {
60
+ return m_pUnkFTM->QueryInterface(riid, ppvInterface);
61
+ }
62
+ *ppvInterface = NULL;
63
+ return E_NOINTERFACE;
64
+ }
65
+ HRESULT STDMETHODCALLTYPE ActivateCompleted(IActivateAudioInterfaceAsyncOperation* operation) {
66
+ HRESULT hrActivateResult = S_OK;
67
+ IUnknown* punkAudioInterface = NULL;
68
+ HRESULT hr = operation->GetActivateResult(&hrActivateResult, &punkAudioInterface);
69
+ if (FAILED(hr) || FAILED(hrActivateResult) || punkAudioInterface == NULL) {
70
+ printf("[NativeCapture] ActivateCompleted FAILED! hr=0x%08lX, hrActivateResult=0x%08lX, punk=%p\n", hr, hrActivateResult, punkAudioInterface);
71
+ }
72
+ if (SUCCEEDED(hr) && SUCCEEDED(hrActivateResult) && punkAudioInterface != NULL) {
73
+ punkAudioInterface->QueryInterface(__uuidof(IAudioClient), (void**)m_ppAudioClient);
74
+ punkAudioInterface->Release();
75
+ }
76
+ SetEvent(m_hEvent);
77
+ return S_OK;
78
+ }
79
+ };
80
+
81
+ #include <thread>
82
+
83
+ HRESULT WasapiCapture::Initialize(DWORD processId, bool isIncludeMode, std::string& outError) {
84
+ HRESULT finalHr = S_OK;
85
+
86
+ // Electron's main thread is an STA. ActivateAudioInterfaceAsync strictly requires an MTA thread.
87
+ // If called on an STA, it throws E_ILLEGAL_METHOD_CALL. We bypass this by spawning an MTA worker.
88
+ std::thread initThread([&]() {
89
+ HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
90
+
91
+ AUDIOCLIENT_ACTIVATION_PARAMS activationParams = {};
92
+ activationParams.ActivationType = AUDIOCLIENT_ACTIVATION_TYPE_PROCESS_LOOPBACK;
93
+ activationParams.ProcessLoopbackParams.TargetProcessId = processId;
94
+ activationParams.ProcessLoopbackParams.ProcessLoopbackMode = isIncludeMode ? PROCESS_LOOPBACK_MODE_INCLUDE_TARGET_PROCESS_TREE : PROCESS_LOOPBACK_MODE_EXCLUDE_TARGET_PROCESS_TREE;
95
+
96
+ PROPVARIANT activateParams;
97
+ PropVariantInit(&activateParams);
98
+ activateParams.vt = VT_BLOB;
99
+ activateParams.blob.cbSize = sizeof(activationParams);
100
+ activateParams.blob.pBlobData = (BYTE*)&activationParams;
101
+
102
+ HANDLE hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
103
+ AudioInterfaceCompletionHandler* pHandler = new AudioInterfaceCompletionHandler(hEvent, &pAudioClient);
104
+
105
+ IActivateAudioInterfaceAsyncOperation* asyncOp = nullptr;
106
+ hr = ActivateAudioInterfaceAsync(VIRTUAL_AUDIO_DEVICE_PROCESS_LOOPBACK, __uuidof(IAudioClient), &activateParams, pHandler, &asyncOp);
107
+
108
+ if (FAILED(hr)) {
109
+ outError = "ActivateAudioInterfaceAsync failed synchronously";
110
+ finalHr = hr;
111
+ } else {
112
+ WaitForSingleObject(hEvent, INFINITE);
113
+ if (asyncOp) asyncOp->Release();
114
+ }
115
+
116
+ CloseHandle(hEvent);
117
+ pHandler->Release();
118
+
119
+ if (SUCCEEDED(finalHr)) {
120
+ if (!pAudioClient) {
121
+ outError = "pAudioClient is NULL after Wait";
122
+ finalHr = E_FAIL;
123
+ } else {
124
+ WAVEFORMATEXTENSIBLE wfx = {};
125
+ wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
126
+ wfx.Format.nChannels = 2;
127
+ wfx.Format.nSamplesPerSec = 48000;
128
+ wfx.Format.wBitsPerSample = 32;
129
+ wfx.Format.nBlockAlign = (wfx.Format.nChannels * wfx.Format.wBitsPerSample) / 8;
130
+ wfx.Format.nAvgBytesPerSec = wfx.Format.nSamplesPerSec * wfx.Format.nBlockAlign;
131
+ wfx.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
132
+ wfx.Samples.wValidBitsPerSample = 32;
133
+ wfx.dwChannelMask = 3; // SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT
134
+
135
+ static const GUID SUBTYPE_IEEE_FLOAT_GUID = { 0x00000003, 0x0000, 0x0010, { 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71 } };
136
+ wfx.SubFormat = SUBTYPE_IEEE_FLOAT_GUID;
137
+
138
+ // Process loopback requires AUDCLNT_STREAMFLAGS_LOOPBACK and AUDCLNT_STREAMFLAGS_EVENTCALLBACK
139
+ // We use AUTOCONVERTPCM to let Windows resample if needed
140
+ // Note: AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY causes AUDCLNT_E_INVALID_STREAM_FLAG (0x88890021)
141
+ DWORD streamFlags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM;
142
+
143
+ hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
144
+ streamFlags,
145
+ 0, 0, (WAVEFORMATEX*)&wfx, NULL);
146
+ if (FAILED(hr)) {
147
+ outError = "IAudioClient::Initialize failed";
148
+ finalHr = hr;
149
+ } else {
150
+ hr = pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
151
+ if (FAILED(hr)) {
152
+ outError = "GetService(IAudioCaptureClient) failed";
153
+ finalHr = hr;
154
+ }
155
+ }
156
+ }
157
+ }
158
+ CoUninitialize();
159
+ });
160
+
161
+ initThread.join();
162
+ return finalHr;
163
+ }
164
+
165
+ void WasapiCapture::Start(DataCallback callback) {
166
+ if (isCapturing || !pAudioClient) return;
167
+ onData = callback;
168
+ isCapturing = true;
169
+ hCaptureThread = CreateThread(NULL, 0, CaptureThreadProc, this, 0, NULL);
170
+ }
171
+
172
+ void WasapiCapture::Stop() {
173
+ isCapturing = false;
174
+ if (hCaptureThread) {
175
+ WaitForSingleObject(hCaptureThread, INFINITE);
176
+ CloseHandle(hCaptureThread);
177
+ hCaptureThread = nullptr;
178
+ }
179
+ }
180
+
181
+ DWORD WINAPI WasapiCapture::CaptureThreadProc(LPVOID pContext) {
182
+ WasapiCapture* pThis = static_cast<WasapiCapture*>(pContext);
183
+ pThis->CaptureLoop();
184
+ return 0;
185
+ }
186
+
187
+ void WasapiCapture::CaptureLoop() {
188
+ HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
189
+ HANDLE hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
190
+ pAudioClient->SetEventHandle(hEvent);
191
+ pAudioClient->Start();
192
+
193
+ while (isCapturing) {
194
+ DWORD waitResult = WaitForSingleObject(hEvent, 100);
195
+ if (waitResult == WAIT_OBJECT_0) {
196
+ UINT32 packetLength = 0;
197
+ pCaptureClient->GetNextPacketSize(&packetLength);
198
+ while (packetLength != 0) {
199
+ BYTE* pData;
200
+ UINT32 numFramesAvailable;
201
+ DWORD flags;
202
+ pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
203
+
204
+ // Here we push pData to the callback
205
+ if (onData && pData) {
206
+ size_t bytesPerFrame = 8; // 2 channels * 4 bytes
207
+
208
+ AudioMetadata meta;
209
+ meta.sampleRate = 48000;
210
+ meta.channels = 2;
211
+ meta.bitsPerSample = 32;
212
+ meta.isFloat = true;
213
+ onData(pData, numFramesAvailable * bytesPerFrame, meta);
214
+ }
215
+ pCaptureClient->ReleaseBuffer(numFramesAvailable);
216
+ pCaptureClient->GetNextPacketSize(&packetLength);
217
+ }
218
+ }
219
+ }
220
+ pAudioClient->Stop();
221
+ CloseHandle(hEvent);
222
+ CoUninitialize();
223
+ }
@@ -0,0 +1,45 @@
1
+ #pragma once
2
+
3
+ #undef _WIN32_WINNT
4
+ #define _WIN32_WINNT 0x0A00
5
+ #undef NTDDI_VERSION
6
+ #define NTDDI_VERSION 0x0A00000A
7
+
8
+ #include <windows.h>
9
+ #include <mmdeviceapi.h>
10
+ #include <Audioclient.h>
11
+ #include <audioclientactivationparams.h>
12
+ #include <functional>
13
+ #include <functional>
14
+ #include <vector>
15
+ #include <string>
16
+ #include <thread>
17
+
18
+ class WasapiCapture {
19
+ public:
20
+ struct AudioMetadata {
21
+ uint32_t sampleRate;
22
+ uint16_t channels;
23
+ uint16_t bitsPerSample;
24
+ bool isFloat;
25
+ };
26
+ using DataCallback = std::function<void(const uint8_t* data, size_t length, AudioMetadata metadata)>;
27
+
28
+ WasapiCapture();
29
+ ~WasapiCapture();
30
+
31
+ HRESULT Initialize(DWORD processId, bool isIncludeMode, std::string& outError);
32
+ void Start(DataCallback callback);
33
+ void Stop();
34
+
35
+ private:
36
+ IMMDevice* pDevice = nullptr;
37
+ IAudioClient* pAudioClient = nullptr;
38
+ IAudioCaptureClient* pCaptureClient = nullptr;
39
+ HANDLE hCaptureThread = nullptr;
40
+ bool isCapturing = false;
41
+ DataCallback onData;
42
+
43
+ static DWORD WINAPI CaptureThreadProc(LPVOID pContext);
44
+ void CaptureLoop();
45
+ };