@matbee/remotemedia-native 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @remotemedia/native
1
+ # @matbee/remotemedia-native
2
2
 
3
3
  Native Node.js bindings for RemoteMedia zero-copy IPC with iceoryx2 shared memory.
4
4
 
@@ -13,14 +13,14 @@ This package provides high-performance Node.js bindings for the RemoteMedia pipe
13
13
  ## Installation
14
14
 
15
15
  ```bash
16
- npm install @remotemedia/native
16
+ npm install @matbee/remotemedia-native
17
17
  ```
18
18
 
19
19
  ## Requirements
20
20
 
21
21
  - Node.js >= 18
22
22
  - Linux (x64, arm64) or macOS (x64, arm64)
23
- - For WebRTC features: see `@remotemedia/native-webrtc`
23
+ - For WebRTC features: see `@matbee/remotemedia-native-webrtc`
24
24
 
25
25
  ## Quick Start
26
26
 
@@ -29,7 +29,7 @@ import {
29
29
  createSession,
30
30
  NapiRuntimeData,
31
31
  isNativeLoaded,
32
- } from '@remotemedia/native';
32
+ } from '@matbee/remotemedia-native';
33
33
 
34
34
  // Check if native bindings loaded successfully
35
35
  if (!isNativeLoaded()) {
@@ -163,7 +163,7 @@ subscriber.close();
163
163
  ## Pipeline Execution
164
164
 
165
165
  ```typescript
166
- import { executePipeline, createStreamSession } from '@remotemedia/native';
166
+ import { executePipeline, createStreamSession } from '@matbee/remotemedia-native';
167
167
 
168
168
  // One-shot pipeline execution
169
169
  const manifest = {
package/index.js CHANGED
@@ -1,4 +1,4 @@
1
- // @remotemedia/native - Node.js bindings for RemoteMedia zero-copy IPC
1
+ // @matbee/remotemedia-native - Node.js bindings for RemoteMedia zero-copy IPC
2
2
  //
3
3
  // This module provides zero-copy IPC between Node.js, Python, and Rust
4
4
  // via iceoryx2 shared memory.
@@ -72,7 +72,7 @@ function loadNativeBinding() {
72
72
 
73
73
  // Try requiring without path (for globally installed)
74
74
  try {
75
- return require(`@remotemedia/native-${triple}`);
75
+ return require(`@matbee/remotemedia-native-${triple}`);
76
76
  } catch (e) {
77
77
  // Ignore
78
78
  }
package/node-schemas.json CHANGED
@@ -1,74 +1,114 @@
1
1
  [
2
2
  {
3
- "nodeType": "CalculatorNode",
4
- "description": "Performs arithmetic operations on JSON input",
5
- "category": "utility",
3
+ "nodeType": "AudioResample",
4
+ "description": "Resamples audio to target sample rate",
5
+ "category": "audio",
6
6
  "accepts": [
7
- "json"
7
+ "audio"
8
8
  ],
9
9
  "produces": [
10
- "json"
10
+ "audio"
11
11
  ],
12
12
  "parameters": [
13
13
  {
14
- "name": "precision",
14
+ "name": "target_sample_rate",
15
15
  "paramType": "integer",
16
- "description": "Decimal precision for results",
17
- "defaultValue": "10",
18
- "required": false
16
+ "description": "Target sample rate in Hz",
17
+ "defaultValue": "16000",
18
+ "required": false,
19
+ "minimum": 8000,
20
+ "maximum": 48000
19
21
  }
20
22
  ],
21
- "configSchema": "{\"properties\":{\"precision\":{\"default\":10,\"description\":\"Decimal precision for results\",\"type\":\"integer\"}},\"type\":\"object\"}",
23
+ "configSchema": "{\"properties\":{\"target_sample_rate\":{\"default\":16000,\"description\":\"Target sample rate in Hz\",\"maximum\":48000,\"minimum\":8000,\"type\":\"integer\"}},\"type\":\"object\"}",
22
24
  "isPython": false,
23
25
  "streaming": true,
24
- "multiOutput": false
26
+ "multiOutput": false,
27
+ "capabilities": {
28
+ "parallelizable": true,
29
+ "batchAware": false,
30
+ "supportsControl": false,
31
+ "latencyClass": 0
32
+ }
25
33
  },
26
34
  {
27
- "nodeType": "WhisperNode",
28
- "description": "Speech-to-text transcription using Whisper",
29
- "category": "ml",
35
+ "nodeType": "SileroVAD",
36
+ "description": "Voice Activity Detection using Silero VAD model",
37
+ "category": "audio",
30
38
  "accepts": [
31
39
  "audio"
32
40
  ],
33
41
  "produces": [
34
- "text",
35
- "json"
42
+ "audio",
43
+ "controlmessage"
36
44
  ],
37
45
  "parameters": [
38
46
  {
39
- "name": "language",
40
- "paramType": "string",
41
- "description": "Language code (null for auto-detect)",
47
+ "name": "min_silence_duration_ms",
48
+ "paramType": "integer",
49
+ "description": "Minimum silence duration in ms",
50
+ "defaultValue": "100",
42
51
  "required": false
43
52
  },
44
53
  {
45
- "name": "model",
46
- "paramType": "string",
47
- "description": "Whisper model size",
48
- "defaultValue": "\"base\"",
49
- "required": false,
50
- "enumValues": "[\"tiny\",\"base\",\"small\",\"medium\",\"large\",\"large-v3\"]"
54
+ "name": "min_speech_duration_ms",
55
+ "paramType": "integer",
56
+ "description": "Minimum speech duration in ms",
57
+ "defaultValue": "250",
58
+ "required": false
51
59
  },
52
60
  {
53
- "name": "task",
54
- "paramType": "string",
55
- "description": "Task type",
56
- "defaultValue": "\"transcribe\"",
61
+ "name": "threshold",
62
+ "paramType": "number",
63
+ "description": "Speech probability threshold",
64
+ "defaultValue": "0.5",
57
65
  "required": false,
58
- "enumValues": "[\"transcribe\",\"translate\"]"
66
+ "minimum": 0,
67
+ "maximum": 1
59
68
  }
60
69
  ],
61
- "configSchema": "{\"properties\":{\"language\":{\"description\":\"Language code (null for auto-detect)\",\"type\":\"string\"},\"model\":{\"default\":\"base\",\"description\":\"Whisper model size\",\"enum\":[\"tiny\",\"base\",\"small\",\"medium\",\"large\",\"large-v3\"],\"type\":\"string\"},\"task\":{\"default\":\"transcribe\",\"description\":\"Task type\",\"enum\":[\"transcribe\",\"translate\"],\"type\":\"string\"}},\"type\":\"object\"}",
62
- "isPython": true,
70
+ "configSchema": "{\"properties\":{\"min_silence_duration_ms\":{\"default\":100,\"description\":\"Minimum silence duration in ms\",\"type\":\"integer\"},\"min_speech_duration_ms\":{\"default\":250,\"description\":\"Minimum speech duration in ms\",\"type\":\"integer\"},\"threshold\":{\"default\":0.5,\"description\":\"Speech probability threshold\",\"maximum\":1.0,\"minimum\":0.0,\"type\":\"number\"}},\"type\":\"object\"}",
71
+ "isPython": false,
63
72
  "streaming": true,
64
73
  "multiOutput": false,
65
74
  "capabilities": {
66
- "parallelizable": false,
67
- "batchAware": true,
68
- "supportsControl": false,
69
- "latencyClass": 4
75
+ "parallelizable": true,
76
+ "batchAware": false,
77
+ "supportsControl": true,
78
+ "latencyClass": 1
70
79
  }
71
80
  },
81
+ {
82
+ "nodeType": "TextCollector",
83
+ "description": "Collects text chunks into complete utterances",
84
+ "category": "text",
85
+ "accepts": [
86
+ "text"
87
+ ],
88
+ "produces": [
89
+ "text"
90
+ ],
91
+ "parameters": [
92
+ {
93
+ "name": "delimiter",
94
+ "paramType": "string",
95
+ "description": "Delimiter to split on",
96
+ "defaultValue": "\"\"",
97
+ "required": false
98
+ },
99
+ {
100
+ "name": "flush_on_silence",
101
+ "paramType": "boolean",
102
+ "description": "Flush buffer when silence detected",
103
+ "defaultValue": "true",
104
+ "required": false
105
+ }
106
+ ],
107
+ "configSchema": "{\"properties\":{\"delimiter\":{\"default\":\"\",\"description\":\"Delimiter to split on\",\"type\":\"string\"},\"flush_on_silence\":{\"default\":true,\"description\":\"Flush buffer when silence detected\",\"type\":\"boolean\"}},\"type\":\"object\"}",
108
+ "isPython": false,
109
+ "streaming": true,
110
+ "multiOutput": false
111
+ },
72
112
  {
73
113
  "nodeType": "AudioChunker",
74
114
  "description": "Splits audio into fixed-size chunks",
@@ -93,6 +133,37 @@
93
133
  "streaming": true,
94
134
  "multiOutput": true
95
135
  },
136
+ {
137
+ "nodeType": "VideoFlip",
138
+ "description": "Flips video frames horizontally or vertically",
139
+ "category": "video",
140
+ "accepts": [
141
+ "video"
142
+ ],
143
+ "produces": [
144
+ "video"
145
+ ],
146
+ "parameters": [
147
+ {
148
+ "name": "horizontal",
149
+ "paramType": "boolean",
150
+ "description": "Flip horizontally",
151
+ "defaultValue": "true",
152
+ "required": false
153
+ },
154
+ {
155
+ "name": "vertical",
156
+ "paramType": "boolean",
157
+ "description": "Flip vertically",
158
+ "defaultValue": "false",
159
+ "required": false
160
+ }
161
+ ],
162
+ "configSchema": "{\"properties\":{\"horizontal\":{\"default\":true,\"description\":\"Flip horizontally\",\"type\":\"boolean\"},\"vertical\":{\"default\":false,\"description\":\"Flip vertically\",\"type\":\"boolean\"}},\"type\":\"object\"}",
163
+ "isPython": false,
164
+ "streaming": true,
165
+ "multiOutput": false
166
+ },
96
167
  {
97
168
  "nodeType": "PassThrough",
98
169
  "description": "Passes input through unchanged",
@@ -123,32 +194,25 @@
123
194
  "multiOutput": false
124
195
  },
125
196
  {
126
- "nodeType": "TextCollector",
127
- "description": "Collects text chunks into complete utterances",
128
- "category": "text",
197
+ "nodeType": "CalculatorNode",
198
+ "description": "Performs arithmetic operations on JSON input",
199
+ "category": "utility",
129
200
  "accepts": [
130
- "text"
201
+ "json"
131
202
  ],
132
203
  "produces": [
133
- "text"
204
+ "json"
134
205
  ],
135
206
  "parameters": [
136
207
  {
137
- "name": "delimiter",
138
- "paramType": "string",
139
- "description": "Delimiter to split on",
140
- "defaultValue": "\"\"",
141
- "required": false
142
- },
143
- {
144
- "name": "flush_on_silence",
145
- "paramType": "boolean",
146
- "description": "Flush buffer when silence detected",
147
- "defaultValue": "true",
208
+ "name": "precision",
209
+ "paramType": "integer",
210
+ "description": "Decimal precision for results",
211
+ "defaultValue": "10",
148
212
  "required": false
149
213
  }
150
214
  ],
151
- "configSchema": "{\"properties\":{\"delimiter\":{\"default\":\"\",\"description\":\"Delimiter to split on\",\"type\":\"string\"},\"flush_on_silence\":{\"default\":true,\"description\":\"Flush buffer when silence detected\",\"type\":\"boolean\"}},\"type\":\"object\"}",
215
+ "configSchema": "{\"properties\":{\"precision\":{\"default\":10,\"description\":\"Decimal precision for results\",\"type\":\"integer\"}},\"type\":\"object\"}",
152
216
  "isPython": false,
153
217
  "streaming": true,
154
218
  "multiOutput": false
@@ -183,82 +247,49 @@
183
247
  "multiOutput": false
184
248
  },
185
249
  {
186
- "nodeType": "SileroVAD",
187
- "description": "Voice Activity Detection using Silero VAD model",
188
- "category": "audio",
250
+ "nodeType": "WhisperNode",
251
+ "description": "Speech-to-text transcription using Whisper",
252
+ "category": "ml",
189
253
  "accepts": [
190
254
  "audio"
191
255
  ],
192
256
  "produces": [
193
- "audio",
194
- "controlmessage"
257
+ "text",
258
+ "json"
195
259
  ],
196
260
  "parameters": [
197
261
  {
198
- "name": "min_silence_duration_ms",
199
- "paramType": "integer",
200
- "description": "Minimum silence duration in ms",
201
- "defaultValue": "100",
202
- "required": false
203
- },
204
- {
205
- "name": "min_speech_duration_ms",
206
- "paramType": "integer",
207
- "description": "Minimum speech duration in ms",
208
- "defaultValue": "250",
262
+ "name": "language",
263
+ "paramType": "string",
264
+ "description": "Language code (null for auto-detect)",
209
265
  "required": false
210
266
  },
211
267
  {
212
- "name": "threshold",
213
- "paramType": "number",
214
- "description": "Speech probability threshold",
215
- "defaultValue": "0.5",
268
+ "name": "model",
269
+ "paramType": "string",
270
+ "description": "Whisper model size",
271
+ "defaultValue": "\"base\"",
216
272
  "required": false,
217
- "minimum": 0,
218
- "maximum": 1
219
- }
220
- ],
221
- "configSchema": "{\"properties\":{\"min_silence_duration_ms\":{\"default\":100,\"description\":\"Minimum silence duration in ms\",\"type\":\"integer\"},\"min_speech_duration_ms\":{\"default\":250,\"description\":\"Minimum speech duration in ms\",\"type\":\"integer\"},\"threshold\":{\"default\":0.5,\"description\":\"Speech probability threshold\",\"maximum\":1.0,\"minimum\":0.0,\"type\":\"number\"}},\"type\":\"object\"}",
222
- "isPython": false,
223
- "streaming": true,
224
- "multiOutput": false,
225
- "capabilities": {
226
- "parallelizable": true,
227
- "batchAware": false,
228
- "supportsControl": true,
229
- "latencyClass": 1
230
- }
231
- },
232
- {
233
- "nodeType": "AudioResample",
234
- "description": "Resamples audio to target sample rate",
235
- "category": "audio",
236
- "accepts": [
237
- "audio"
238
- ],
239
- "produces": [
240
- "audio"
241
- ],
242
- "parameters": [
273
+ "enumValues": "[\"tiny\",\"base\",\"small\",\"medium\",\"large\",\"large-v3\"]"
274
+ },
243
275
  {
244
- "name": "target_sample_rate",
245
- "paramType": "integer",
246
- "description": "Target sample rate in Hz",
247
- "defaultValue": "16000",
276
+ "name": "task",
277
+ "paramType": "string",
278
+ "description": "Task type",
279
+ "defaultValue": "\"transcribe\"",
248
280
  "required": false,
249
- "minimum": 8000,
250
- "maximum": 48000
281
+ "enumValues": "[\"transcribe\",\"translate\"]"
251
282
  }
252
283
  ],
253
- "configSchema": "{\"properties\":{\"target_sample_rate\":{\"default\":16000,\"description\":\"Target sample rate in Hz\",\"maximum\":48000,\"minimum\":8000,\"type\":\"integer\"}},\"type\":\"object\"}",
254
- "isPython": false,
284
+ "configSchema": "{\"properties\":{\"language\":{\"description\":\"Language code (null for auto-detect)\",\"type\":\"string\"},\"model\":{\"default\":\"base\",\"description\":\"Whisper model size\",\"enum\":[\"tiny\",\"base\",\"small\",\"medium\",\"large\",\"large-v3\"],\"type\":\"string\"},\"task\":{\"default\":\"transcribe\",\"description\":\"Task type\",\"enum\":[\"transcribe\",\"translate\"],\"type\":\"string\"}},\"type\":\"object\"}",
285
+ "isPython": true,
255
286
  "streaming": true,
256
287
  "multiOutput": false,
257
288
  "capabilities": {
258
- "parallelizable": true,
259
- "batchAware": false,
289
+ "parallelizable": false,
290
+ "batchAware": true,
260
291
  "supportsControl": false,
261
- "latencyClass": 0
292
+ "latencyClass": 4
262
293
  }
263
294
  },
264
295
  {
@@ -308,36 +339,5 @@
308
339
  "supportsControl": false,
309
340
  "latencyClass": 3
310
341
  }
311
- },
312
- {
313
- "nodeType": "VideoFlip",
314
- "description": "Flips video frames horizontally or vertically",
315
- "category": "video",
316
- "accepts": [
317
- "video"
318
- ],
319
- "produces": [
320
- "video"
321
- ],
322
- "parameters": [
323
- {
324
- "name": "horizontal",
325
- "paramType": "boolean",
326
- "description": "Flip horizontally",
327
- "defaultValue": "true",
328
- "required": false
329
- },
330
- {
331
- "name": "vertical",
332
- "paramType": "boolean",
333
- "description": "Flip vertically",
334
- "defaultValue": "false",
335
- "required": false
336
- }
337
- ],
338
- "configSchema": "{\"properties\":{\"horizontal\":{\"default\":true,\"description\":\"Flip horizontally\",\"type\":\"boolean\"},\"vertical\":{\"default\":false,\"description\":\"Flip vertically\",\"type\":\"boolean\"}},\"type\":\"object\"}",
339
- "isPython": false,
340
- "streaming": true,
341
- "multiOutput": false
342
342
  }
343
343
  ]
package/node-schemas.ts CHANGED
@@ -52,26 +52,20 @@ export type RuntimeData =
52
52
  | { type: 'numpy'; data: NumpyArray }
53
53
  | { type: 'control'; data: ControlMessage };
54
54
 
55
- /** Performs arithmetic operations on JSON input - Configuration */
56
- export interface CalculatorNodeConfig {
57
- /** Decimal precision for results */
58
- precision?: number;
59
- }
60
-
61
- /** Speech-to-text transcription using Whisper - Configuration */
62
- export interface WhisperNodeConfig {
63
- /** Language code (null for auto-detect) */
64
- language?: string;
65
- /** Whisper model size */
66
- model?: 'tiny' | 'base' | 'small' | 'medium' | 'large' | 'large-v3';
67
- /** Task type */
68
- task?: 'transcribe' | 'translate';
55
+ /** Resamples audio to target sample rate - Configuration */
56
+ export interface AudioResampleConfig {
57
+ /** Target sample rate in Hz */
58
+ target_sample_rate?: number;
69
59
  }
70
60
 
71
- /** Splits audio into fixed-size chunks - Configuration */
72
- export interface AudioChunkerConfig {
73
- /** Chunk duration in milliseconds */
74
- chunk_size_ms?: number;
61
+ /** Voice Activity Detection using Silero VAD model - Configuration */
62
+ export interface SileroVADConfig {
63
+ /** Minimum silence duration in ms */
64
+ min_silence_duration_ms?: number;
65
+ /** Minimum speech duration in ms */
66
+ min_speech_duration_ms?: number;
67
+ /** Speech probability threshold */
68
+ threshold?: number;
75
69
  }
76
70
 
77
71
  /** Collects text chunks into complete utterances - Configuration */
@@ -82,20 +76,34 @@ export interface TextCollectorConfig {
82
76
  flush_on_silence?: boolean;
83
77
  }
84
78
 
85
- /** Voice Activity Detection using Silero VAD model - Configuration */
86
- export interface SileroVADConfig {
87
- /** Minimum silence duration in ms */
88
- min_silence_duration_ms?: number;
89
- /** Minimum speech duration in ms */
90
- min_speech_duration_ms?: number;
91
- /** Speech probability threshold */
92
- threshold?: number;
79
+ /** Splits audio into fixed-size chunks - Configuration */
80
+ export interface AudioChunkerConfig {
81
+ /** Chunk duration in milliseconds */
82
+ chunk_size_ms?: number;
93
83
  }
94
84
 
95
- /** Resamples audio to target sample rate - Configuration */
96
- export interface AudioResampleConfig {
97
- /** Target sample rate in Hz */
98
- target_sample_rate?: number;
85
+ /** Flips video frames horizontally or vertically - Configuration */
86
+ export interface VideoFlipConfig {
87
+ /** Flip horizontally */
88
+ horizontal?: boolean;
89
+ /** Flip vertically */
90
+ vertical?: boolean;
91
+ }
92
+
93
+ /** Performs arithmetic operations on JSON input - Configuration */
94
+ export interface CalculatorNodeConfig {
95
+ /** Decimal precision for results */
96
+ precision?: number;
97
+ }
98
+
99
+ /** Speech-to-text transcription using Whisper - Configuration */
100
+ export interface WhisperNodeConfig {
101
+ /** Language code (null for auto-detect) */
102
+ language?: string;
103
+ /** Whisper model size */
104
+ model?: 'tiny' | 'base' | 'small' | 'medium' | 'large' | 'large-v3';
105
+ /** Task type */
106
+ task?: 'transcribe' | 'translate';
99
107
  }
100
108
 
101
109
  /** Text-to-speech synthesis using Kokoro TTS - Configuration */
@@ -108,14 +116,6 @@ export interface KokoroTTSNodeConfig {
108
116
  voice?: 'af_bella' | 'af_nicole' | 'af_sarah' | 'af_sky' | 'am_adam' | 'am_michael' | 'bf_emma' | 'bf_isabella' | 'bm_george' | 'bm_lewis';
109
117
  }
110
118
 
111
- /** Flips video frames horizontally or vertically - Configuration */
112
- export interface VideoFlipConfig {
113
- /** Flip horizontally */
114
- horizontal?: boolean;
115
- /** Flip vertically */
116
- vertical?: boolean;
117
- }
118
-
119
119
  /** Node metadata from registry */
120
120
  export interface NodeMetadata {
121
121
  nodeType: string;
@@ -130,29 +130,29 @@ export interface NodeMetadata {
130
130
 
131
131
  /** All registered node types */
132
132
  export type NodeType =
133
- | 'CalculatorNode'
134
- | 'WhisperNode'
133
+ | 'AudioResample'
134
+ | 'SileroVAD'
135
+ | 'TextCollector'
135
136
  | 'AudioChunker'
137
+ | 'VideoFlip'
136
138
  | 'PassThrough'
137
- | 'TextCollector'
139
+ | 'CalculatorNode'
138
140
  | 'Echo'
139
- | 'SileroVAD'
140
- | 'AudioResample'
141
- | 'KokoroTTSNode'
142
- | 'VideoFlip';
141
+ | 'WhisperNode'
142
+ | 'KokoroTTSNode';
143
143
 
144
144
  /** Node type to config type mapping */
145
145
  export interface NodeConfigMap {
146
- 'CalculatorNode': CalculatorNodeConfig;
147
- 'WhisperNode': WhisperNodeConfig;
146
+ 'AudioResample': AudioResampleConfig;
147
+ 'SileroVAD': SileroVADConfig;
148
+ 'TextCollector': TextCollectorConfig;
148
149
  'AudioChunker': AudioChunkerConfig;
150
+ 'VideoFlip': VideoFlipConfig;
149
151
  'PassThrough': Record<string, unknown>;
150
- 'TextCollector': TextCollectorConfig;
152
+ 'CalculatorNode': CalculatorNodeConfig;
151
153
  'Echo': Record<string, unknown>;
152
- 'SileroVAD': SileroVADConfig;
153
- 'AudioResample': AudioResampleConfig;
154
+ 'WhisperNode': WhisperNodeConfig;
154
155
  'KokoroTTSNode': KokoroTTSNodeConfig;
155
- 'VideoFlip': VideoFlipConfig;
156
156
  }
157
157
 
158
158
  /** Pipeline node with typed config */
@@ -178,31 +178,45 @@ export interface PipelineManifest {
178
178
  /** All node schemas (for runtime introspection) */
179
179
  export const nodeSchemas: NodeMetadata[] = [
180
180
  {
181
- "nodeType": "CalculatorNode",
182
- "description": "Performs arithmetic operations on JSON input",
183
- "category": "utility",
181
+ "nodeType": "AudioResample",
182
+ "description": "Resamples audio to target sample rate",
183
+ "category": "audio",
184
184
  "accepts": [
185
- "json"
185
+ "audio"
186
186
  ],
187
187
  "produces": [
188
- "json"
188
+ "audio"
189
189
  ],
190
190
  "isPython": false,
191
191
  "streaming": true,
192
192
  "multiOutput": false
193
193
  },
194
194
  {
195
- "nodeType": "WhisperNode",
196
- "description": "Speech-to-text transcription using Whisper",
197
- "category": "ml",
195
+ "nodeType": "SileroVAD",
196
+ "description": "Voice Activity Detection using Silero VAD model",
197
+ "category": "audio",
198
198
  "accepts": [
199
199
  "audio"
200
200
  ],
201
201
  "produces": [
202
- "text",
203
- "json"
202
+ "audio",
203
+ "controlmessage"
204
204
  ],
205
- "isPython": true,
205
+ "isPython": false,
206
+ "streaming": true,
207
+ "multiOutput": false
208
+ },
209
+ {
210
+ "nodeType": "TextCollector",
211
+ "description": "Collects text chunks into complete utterances",
212
+ "category": "text",
213
+ "accepts": [
214
+ "text"
215
+ ],
216
+ "produces": [
217
+ "text"
218
+ ],
219
+ "isPython": false,
206
220
  "streaming": true,
207
221
  "multiOutput": false
208
222
  },
@@ -220,6 +234,20 @@ export const nodeSchemas: NodeMetadata[] = [
220
234
  "streaming": true,
221
235
  "multiOutput": true
222
236
  },
237
+ {
238
+ "nodeType": "VideoFlip",
239
+ "description": "Flips video frames horizontally or vertically",
240
+ "category": "video",
241
+ "accepts": [
242
+ "video"
243
+ ],
244
+ "produces": [
245
+ "video"
246
+ ],
247
+ "isPython": false,
248
+ "streaming": true,
249
+ "multiOutput": false
250
+ },
223
251
  {
224
252
  "nodeType": "PassThrough",
225
253
  "description": "Passes input through unchanged",
@@ -249,14 +277,14 @@ export const nodeSchemas: NodeMetadata[] = [
249
277
  "multiOutput": false
250
278
  },
251
279
  {
252
- "nodeType": "TextCollector",
253
- "description": "Collects text chunks into complete utterances",
254
- "category": "text",
280
+ "nodeType": "CalculatorNode",
281
+ "description": "Performs arithmetic operations on JSON input",
282
+ "category": "utility",
255
283
  "accepts": [
256
- "text"
284
+ "json"
257
285
  ],
258
286
  "produces": [
259
- "text"
287
+ "json"
260
288
  ],
261
289
  "isPython": false,
262
290
  "streaming": true,
@@ -291,31 +319,17 @@ export const nodeSchemas: NodeMetadata[] = [
291
319
  "multiOutput": false
292
320
  },
293
321
  {
294
- "nodeType": "SileroVAD",
295
- "description": "Voice Activity Detection using Silero VAD model",
296
- "category": "audio",
297
- "accepts": [
298
- "audio"
299
- ],
300
- "produces": [
301
- "audio",
302
- "controlmessage"
303
- ],
304
- "isPython": false,
305
- "streaming": true,
306
- "multiOutput": false
307
- },
308
- {
309
- "nodeType": "AudioResample",
310
- "description": "Resamples audio to target sample rate",
311
- "category": "audio",
322
+ "nodeType": "WhisperNode",
323
+ "description": "Speech-to-text transcription using Whisper",
324
+ "category": "ml",
312
325
  "accepts": [
313
326
  "audio"
314
327
  ],
315
328
  "produces": [
316
- "audio"
329
+ "text",
330
+ "json"
317
331
  ],
318
- "isPython": false,
332
+ "isPython": true,
319
333
  "streaming": true,
320
334
  "multiOutput": false
321
335
  },
@@ -332,20 +346,6 @@ export const nodeSchemas: NodeMetadata[] = [
332
346
  "isPython": true,
333
347
  "streaming": true,
334
348
  "multiOutput": true
335
- },
336
- {
337
- "nodeType": "VideoFlip",
338
- "description": "Flips video frames horizontally or vertically",
339
- "category": "video",
340
- "accepts": [
341
- "video"
342
- ],
343
- "produces": [
344
- "video"
345
- ],
346
- "isPython": false,
347
- "streaming": true,
348
- "multiOutput": false
349
349
  }
350
350
  ];
351
351
 
@@ -398,40 +398,59 @@ export interface PipelineConnection {
398
398
  }
399
399
 
400
400
  /**
401
- * Performs arithmetic operations on JSON input
401
+ * Resamples audio to target sample rate
402
402
  *
403
403
  * @example
404
404
  * ```typescript
405
- * const node = new CalculatorNode('my-calculatornode', { });
405
+ * const node = new AudioResample('my-audioresample', { });
406
406
  * pipeline.addNode(node);
407
407
  * ```
408
408
  */
409
- export class CalculatorNode extends NodeBuilder<'CalculatorNode', CalculatorNodeConfig> {
410
- static readonly nodeType = 'CalculatorNode' as const;
411
- static readonly accepts: RuntimeDataType[] = ['json'];
412
- static readonly produces: RuntimeDataType[] = ['json'];
409
+ export class AudioResample extends NodeBuilder<'AudioResample', AudioResampleConfig> {
410
+ static readonly nodeType = 'AudioResample' as const;
411
+ static readonly accepts: RuntimeDataType[] = ['audio'];
412
+ static readonly produces: RuntimeDataType[] = ['audio'];
413
413
 
414
- constructor(id: string, config?: CalculatorNodeConfig) {
415
- super(id, 'CalculatorNode', config);
414
+ constructor(id: string, config?: AudioResampleConfig) {
415
+ super(id, 'AudioResample', config);
416
416
  }
417
417
  }
418
418
 
419
419
  /**
420
- * Speech-to-text transcription using Whisper
420
+ * Voice Activity Detection using Silero VAD model
421
421
  *
422
422
  * @example
423
423
  * ```typescript
424
- * const node = new WhisperNode('my-whispernode', { });
424
+ * const node = new SileroVAD('my-silerovad', { });
425
425
  * pipeline.addNode(node);
426
426
  * ```
427
427
  */
428
- export class WhisperNode extends NodeBuilder<'WhisperNode', WhisperNodeConfig> {
429
- static readonly nodeType = 'WhisperNode' as const;
428
+ export class SileroVAD extends NodeBuilder<'SileroVAD', SileroVADConfig> {
429
+ static readonly nodeType = 'SileroVAD' as const;
430
430
  static readonly accepts: RuntimeDataType[] = ['audio'];
431
- static readonly produces: RuntimeDataType[] = ['text', 'json'];
431
+ static readonly produces: RuntimeDataType[] = ['audio', 'controlmessage'];
432
432
 
433
- constructor(id: string, config?: WhisperNodeConfig) {
434
- super(id, 'WhisperNode', config);
433
+ constructor(id: string, config?: SileroVADConfig) {
434
+ super(id, 'SileroVAD', config);
435
+ }
436
+ }
437
+
438
+ /**
439
+ * Collects text chunks into complete utterances
440
+ *
441
+ * @example
442
+ * ```typescript
443
+ * const node = new TextCollector('my-textcollector', { });
444
+ * pipeline.addNode(node);
445
+ * ```
446
+ */
447
+ export class TextCollector extends NodeBuilder<'TextCollector', TextCollectorConfig> {
448
+ static readonly nodeType = 'TextCollector' as const;
449
+ static readonly accepts: RuntimeDataType[] = ['text'];
450
+ static readonly produces: RuntimeDataType[] = ['text'];
451
+
452
+ constructor(id: string, config?: TextCollectorConfig) {
453
+ super(id, 'TextCollector', config);
435
454
  }
436
455
  }
437
456
 
@@ -454,6 +473,25 @@ export class AudioChunker extends NodeBuilder<'AudioChunker', AudioChunkerConfig
454
473
  }
455
474
  }
456
475
 
476
+ /**
477
+ * Flips video frames horizontally or vertically
478
+ *
479
+ * @example
480
+ * ```typescript
481
+ * const node = new VideoFlip('my-videoflip', { });
482
+ * pipeline.addNode(node);
483
+ * ```
484
+ */
485
+ export class VideoFlip extends NodeBuilder<'VideoFlip', VideoFlipConfig> {
486
+ static readonly nodeType = 'VideoFlip' as const;
487
+ static readonly accepts: RuntimeDataType[] = ['video'];
488
+ static readonly produces: RuntimeDataType[] = ['video'];
489
+
490
+ constructor(id: string, config?: VideoFlipConfig) {
491
+ super(id, 'VideoFlip', config);
492
+ }
493
+ }
494
+
457
495
  /**
458
496
  * Passes input through unchanged
459
497
  *
@@ -474,21 +512,21 @@ export class PassThrough extends NodeBuilder<'PassThrough', Record<string, unkno
474
512
  }
475
513
 
476
514
  /**
477
- * Collects text chunks into complete utterances
515
+ * Performs arithmetic operations on JSON input
478
516
  *
479
517
  * @example
480
518
  * ```typescript
481
- * const node = new TextCollector('my-textcollector', { });
519
+ * const node = new CalculatorNode('my-calculatornode', { });
482
520
  * pipeline.addNode(node);
483
521
  * ```
484
522
  */
485
- export class TextCollector extends NodeBuilder<'TextCollector', TextCollectorConfig> {
486
- static readonly nodeType = 'TextCollector' as const;
487
- static readonly accepts: RuntimeDataType[] = ['text'];
488
- static readonly produces: RuntimeDataType[] = ['text'];
523
+ export class CalculatorNode extends NodeBuilder<'CalculatorNode', CalculatorNodeConfig> {
524
+ static readonly nodeType = 'CalculatorNode' as const;
525
+ static readonly accepts: RuntimeDataType[] = ['json'];
526
+ static readonly produces: RuntimeDataType[] = ['json'];
489
527
 
490
- constructor(id: string, config?: TextCollectorConfig) {
491
- super(id, 'TextCollector', config);
528
+ constructor(id: string, config?: CalculatorNodeConfig) {
529
+ super(id, 'CalculatorNode', config);
492
530
  }
493
531
  }
494
532
 
@@ -512,40 +550,21 @@ export class Echo extends NodeBuilder<'Echo', Record<string, unknown>> {
512
550
  }
513
551
 
514
552
  /**
515
- * Voice Activity Detection using Silero VAD model
516
- *
517
- * @example
518
- * ```typescript
519
- * const node = new SileroVAD('my-silerovad', { });
520
- * pipeline.addNode(node);
521
- * ```
522
- */
523
- export class SileroVAD extends NodeBuilder<'SileroVAD', SileroVADConfig> {
524
- static readonly nodeType = 'SileroVAD' as const;
525
- static readonly accepts: RuntimeDataType[] = ['audio'];
526
- static readonly produces: RuntimeDataType[] = ['audio', 'controlmessage'];
527
-
528
- constructor(id: string, config?: SileroVADConfig) {
529
- super(id, 'SileroVAD', config);
530
- }
531
- }
532
-
533
- /**
534
- * Resamples audio to target sample rate
553
+ * Speech-to-text transcription using Whisper
535
554
  *
536
555
  * @example
537
556
  * ```typescript
538
- * const node = new AudioResample('my-audioresample', { });
557
+ * const node = new WhisperNode('my-whispernode', { });
539
558
  * pipeline.addNode(node);
540
559
  * ```
541
560
  */
542
- export class AudioResample extends NodeBuilder<'AudioResample', AudioResampleConfig> {
543
- static readonly nodeType = 'AudioResample' as const;
561
+ export class WhisperNode extends NodeBuilder<'WhisperNode', WhisperNodeConfig> {
562
+ static readonly nodeType = 'WhisperNode' as const;
544
563
  static readonly accepts: RuntimeDataType[] = ['audio'];
545
- static readonly produces: RuntimeDataType[] = ['audio'];
564
+ static readonly produces: RuntimeDataType[] = ['text', 'json'];
546
565
 
547
- constructor(id: string, config?: AudioResampleConfig) {
548
- super(id, 'AudioResample', config);
566
+ constructor(id: string, config?: WhisperNodeConfig) {
567
+ super(id, 'WhisperNode', config);
549
568
  }
550
569
  }
551
570
 
@@ -568,37 +587,18 @@ export class KokoroTTSNode extends NodeBuilder<'KokoroTTSNode', KokoroTTSNodeCon
568
587
  }
569
588
  }
570
589
 
571
- /**
572
- * Flips video frames horizontally or vertically
573
- *
574
- * @example
575
- * ```typescript
576
- * const node = new VideoFlip('my-videoflip', { });
577
- * pipeline.addNode(node);
578
- * ```
579
- */
580
- export class VideoFlip extends NodeBuilder<'VideoFlip', VideoFlipConfig> {
581
- static readonly nodeType = 'VideoFlip' as const;
582
- static readonly accepts: RuntimeDataType[] = ['video'];
583
- static readonly produces: RuntimeDataType[] = ['video'];
584
-
585
- constructor(id: string, config?: VideoFlipConfig) {
586
- super(id, 'VideoFlip', config);
587
- }
588
- }
589
-
590
590
  /** Namespace containing all node builder classes */
591
591
  export const Nodes = {
592
- CalculatorNode,
593
- WhisperNode,
592
+ AudioResample,
593
+ SileroVAD,
594
+ TextCollector,
594
595
  AudioChunker,
596
+ VideoFlip,
595
597
  PassThrough,
596
- TextCollector,
598
+ CalculatorNode,
597
599
  Echo,
598
- SileroVAD,
599
- AudioResample,
600
- KokoroTTSNode,
601
- VideoFlip
600
+ WhisperNode,
601
+ KokoroTTSNode
602
602
  } as const;
603
603
 
604
604
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@matbee/remotemedia-native",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Native Node.js bindings for RemoteMedia zero-copy IPC with iceoryx2 shared memory",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -59,12 +59,16 @@
59
59
  "scripts": {
60
60
  "build": "../scripts/build-npm.sh --release",
61
61
  "build:debug": "../scripts/build-npm.sh --debug",
62
- "build:cargo": "cd ../../.. && cargo build --release --features napi --no-default-features -p remotemedia-ffi && cp target/release/libremotemedia_ffi.so target/release/remotemedia_native.node || cp target/release/libremotemedia_ffi.dylib target/release/remotemedia_native.node 2>/dev/null || true",
63
- "build:cargo:debug": "cd ../../.. && rm -f target/release/remotemedia_native.node && cargo build --features napi --no-default-features -p remotemedia-ffi && cp target/debug/libremotemedia_ffi.so target/debug/remotemedia_native.node || cp target/debug/libremotemedia_ffi.dylib target/debug/remotemedia_native.node 2>/dev/null || true",
64
- "build:napi": "napi build --platform --release",
65
- "build:napi:debug": "napi build --platform",
66
- "build:all": "../scripts/build-npm.sh --release --all",
67
62
  "build:webrtc": "../scripts/build-npm.sh --release --webrtc",
63
+ "build:webrtc:debug": "../scripts/build-npm.sh --debug --webrtc",
64
+ "build:all": "../scripts/build-npm.sh --release --all",
65
+ "build:all:webrtc": "../scripts/build-npm.sh --release --all --webrtc",
66
+ "build:cargo": "cd ../../.. && cargo build --release -p remotemedia-ffi --features napi --no-default-features && npm run copy:native",
67
+ "build:cargo:debug": "cd ../../.. && cargo build -p remotemedia-ffi --features napi --no-default-features && npm run copy:native:debug",
68
+ "build:cargo:webrtc": "cd ../../.. && cargo build --release -p remotemedia-ffi --features napi,webrtc --no-default-features && npm run copy:native",
69
+ "build:cargo:webrtc:debug": "cd ../../.. && cargo build -p remotemedia-ffi --features napi,webrtc --no-default-features && npm run copy:native:debug",
70
+ "copy:native": "node -e \"const fs=require('fs'),p=require('path'),t=process.platform==='darwin'?'dylib':'so',s=p.join(__dirname,'..','..','..','target','release',`libremotemedia_ffi.${t}`),d=p.join(__dirname,`remotemedia-native.${process.arch==='arm64'?'aarch64':'x86_64'}-${process.platform==='darwin'?'apple-darwin':'unknown-linux-gnu'}.node`);fs.existsSync(s)&&fs.copyFileSync(s,d)&&console.log('Copied to',p.basename(d))\"",
71
+ "copy:native:debug": "node -e \"const fs=require('fs'),p=require('path'),t=process.platform==='darwin'?'dylib':'so',s=p.join(__dirname,'..','..','..','target','debug',`libremotemedia_ffi.${t}`),d=p.join(__dirname,`remotemedia-native.${process.arch==='arm64'?'aarch64':'x86_64'}-${process.platform==='darwin'?'apple-darwin':'unknown-linux-gnu'}.node`);fs.existsSync(s)&&fs.copyFileSync(s,d)&&console.log('Copied to',p.basename(d))\"",
68
72
  "generate-types": "node scripts/generate-types.js",
69
73
  "postbuild": "npm run generate-types || true",
70
74
  "prepublishOnly": "test -n \"$(ls *.node 2>/dev/null)\" || (echo 'Error: No .node binaries found. Run npm run build first.' && exit 1)",
@@ -76,6 +80,7 @@
76
80
  "test:session": "jest session.test.ts",
77
81
  "test:webrtc": "jest webrtc.test.ts",
78
82
  "test:webrtc-pipeline": "jest webrtc-pipeline.test.ts",
83
+ "test:e2e": "node __tests__/e2e/run-e2e-server.js",
79
84
  "benchmark": "node benchmark-zero-copy.js",
80
85
  "clean": "rm -rf *.node node_modules"
81
86
  },