@matbee/remotemedia-native 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,689 @@
1
+ // Auto-generated by RemoteMedia SDK - DO NOT EDIT
2
+ // Run `npm run generate-types` to regenerate
3
+
4
+ // RuntimeData types (matches Rust enum)
5
+ export type RuntimeDataType = 'audio' | 'video' | 'json' | 'text' | 'binary' | 'tensor' | 'numpy' | 'control' | 'controlmessage';
6
+
7
+ export interface AudioData {
8
+ samples: Float32Array;
9
+ sampleRate: number;
10
+ channels: number;
11
+ streamId?: string;
12
+ }
13
+
14
+ export interface VideoData {
15
+ pixelData: Uint8Array;
16
+ width: number;
17
+ height: number;
18
+ format: 'yuv420p' | 'rgb24' | 'rgba32' | 'gray8';
19
+ codec?: 'raw' | 'h264' | 'vp8' | 'vp9' | 'av1';
20
+ frameNumber?: number;
21
+ isKeyframe?: boolean;
22
+ }
23
+
24
+ export interface TensorData {
25
+ data: Uint8Array;
26
+ shape: number[];
27
+ dtype: 'f32' | 'f16' | 'i32' | 'i8' | 'u8';
28
+ }
29
+
30
+ export interface NumpyArray {
31
+ data: Uint8Array;
32
+ shape: number[];
33
+ dtype: string;
34
+ strides: number[];
35
+ }
36
+
37
+ export interface ControlMessage {
38
+ type: 'start' | 'stop' | 'cancel' | 'flush' | 'config_update' | 'custom';
39
+ timestamp?: number;
40
+ segmentId?: string;
41
+ metadata?: Record<string, unknown>;
42
+ cancelRange?: { start: number; end: number };
43
+ }
44
+
45
+ export type RuntimeData =
46
+ | { type: 'audio'; data: AudioData }
47
+ | { type: 'video'; data: VideoData }
48
+ | { type: 'json'; data: Record<string, unknown> }
49
+ | { type: 'text'; data: string }
50
+ | { type: 'binary'; data: Uint8Array }
51
+ | { type: 'tensor'; data: TensorData }
52
+ | { type: 'numpy'; data: NumpyArray }
53
+ | { type: 'control'; data: ControlMessage };
54
+
55
+ /** Performs arithmetic operations on JSON input - Configuration */
56
+ export interface CalculatorNodeConfig {
57
+ /** Decimal precision for results */
58
+ precision?: number;
59
+ }
60
+
61
+ /** Speech-to-text transcription using Whisper - Configuration */
62
+ export interface WhisperNodeConfig {
63
+ /** Language code (null for auto-detect) */
64
+ language?: string;
65
+ /** Whisper model size */
66
+ model?: 'tiny' | 'base' | 'small' | 'medium' | 'large' | 'large-v3';
67
+ /** Task type */
68
+ task?: 'transcribe' | 'translate';
69
+ }
70
+
71
+ /** Splits audio into fixed-size chunks - Configuration */
72
+ export interface AudioChunkerConfig {
73
+ /** Chunk duration in milliseconds */
74
+ chunk_size_ms?: number;
75
+ }
76
+
77
+ /** Collects text chunks into complete utterances - Configuration */
78
+ export interface TextCollectorConfig {
79
+ /** Delimiter to split on */
80
+ delimiter?: string;
81
+ /** Flush buffer when silence detected */
82
+ flush_on_silence?: boolean;
83
+ }
84
+
85
+ /** Voice Activity Detection using Silero VAD model - Configuration */
86
+ export interface SileroVADConfig {
87
+ /** Minimum silence duration in ms */
88
+ min_silence_duration_ms?: number;
89
+ /** Minimum speech duration in ms */
90
+ min_speech_duration_ms?: number;
91
+ /** Speech probability threshold */
92
+ threshold?: number;
93
+ }
94
+
95
+ /** Resamples audio to target sample rate - Configuration */
96
+ export interface AudioResampleConfig {
97
+ /** Target sample rate in Hz */
98
+ target_sample_rate?: number;
99
+ }
100
+
101
+ /** Text-to-speech synthesis using Kokoro TTS - Configuration */
102
+ export interface KokoroTTSNodeConfig {
103
+ /** Language code */
104
+ language?: 'en-us' | 'en-gb' | 'es' | 'fr' | 'de' | 'it' | 'ja' | 'ko' | 'pt-br' | 'zh';
105
+ /** Speech speed multiplier */
106
+ speed?: number;
107
+ /** Voice ID to use */
108
+ voice?: 'af_bella' | 'af_nicole' | 'af_sarah' | 'af_sky' | 'am_adam' | 'am_michael' | 'bf_emma' | 'bf_isabella' | 'bm_george' | 'bm_lewis';
109
+ }
110
+
111
+ /** Flips video frames horizontally or vertically - Configuration */
112
+ export interface VideoFlipConfig {
113
+ /** Flip horizontally */
114
+ horizontal?: boolean;
115
+ /** Flip vertically */
116
+ vertical?: boolean;
117
+ }
118
+
119
+ /** Node metadata from registry */
120
+ export interface NodeMetadata {
121
+ nodeType: string;
122
+ description?: string;
123
+ category?: string;
124
+ accepts: RuntimeDataType[];
125
+ produces: RuntimeDataType[];
126
+ isPython: boolean;
127
+ streaming: boolean;
128
+ multiOutput: boolean;
129
+ }
130
+
131
+ /** All registered node types */
132
+ export type NodeType =
133
+ | 'CalculatorNode'
134
+ | 'WhisperNode'
135
+ | 'AudioChunker'
136
+ | 'PassThrough'
137
+ | 'TextCollector'
138
+ | 'Echo'
139
+ | 'SileroVAD'
140
+ | 'AudioResample'
141
+ | 'KokoroTTSNode'
142
+ | 'VideoFlip';
143
+
144
+ /** Node type to config type mapping */
145
+ export interface NodeConfigMap {
146
+ 'CalculatorNode': CalculatorNodeConfig;
147
+ 'WhisperNode': WhisperNodeConfig;
148
+ 'AudioChunker': AudioChunkerConfig;
149
+ 'PassThrough': Record<string, unknown>;
150
+ 'TextCollector': TextCollectorConfig;
151
+ 'Echo': Record<string, unknown>;
152
+ 'SileroVAD': SileroVADConfig;
153
+ 'AudioResample': AudioResampleConfig;
154
+ 'KokoroTTSNode': KokoroTTSNodeConfig;
155
+ 'VideoFlip': VideoFlipConfig;
156
+ }
157
+
158
+ /** Pipeline node with typed config */
159
+ export interface PipelineNode<T extends NodeType = NodeType> {
160
+ id: string;
161
+ nodeType: T;
162
+ config?: T extends keyof NodeConfigMap ? NodeConfigMap[T] : Record<string, unknown>;
163
+ }
164
+
165
+ /** Pipeline manifest */
166
+ export interface PipelineManifest {
167
+ version: string;
168
+ metadata?: { name?: string; description?: string; [key: string]: unknown };
169
+ nodes: PipelineNode[];
170
+ connections: Array<{
171
+ source: string;
172
+ sourcePort?: string;
173
+ destination: string;
174
+ destinationPort?: string;
175
+ }>;
176
+ }
177
+
178
+ /** All node schemas (for runtime introspection) */
179
+ export const nodeSchemas: NodeMetadata[] = [
180
+ {
181
+ "nodeType": "CalculatorNode",
182
+ "description": "Performs arithmetic operations on JSON input",
183
+ "category": "utility",
184
+ "accepts": [
185
+ "json"
186
+ ],
187
+ "produces": [
188
+ "json"
189
+ ],
190
+ "isPython": false,
191
+ "streaming": true,
192
+ "multiOutput": false
193
+ },
194
+ {
195
+ "nodeType": "WhisperNode",
196
+ "description": "Speech-to-text transcription using Whisper",
197
+ "category": "ml",
198
+ "accepts": [
199
+ "audio"
200
+ ],
201
+ "produces": [
202
+ "text",
203
+ "json"
204
+ ],
205
+ "isPython": true,
206
+ "streaming": true,
207
+ "multiOutput": false
208
+ },
209
+ {
210
+ "nodeType": "AudioChunker",
211
+ "description": "Splits audio into fixed-size chunks",
212
+ "category": "audio",
213
+ "accepts": [
214
+ "audio"
215
+ ],
216
+ "produces": [
217
+ "audio"
218
+ ],
219
+ "isPython": false,
220
+ "streaming": true,
221
+ "multiOutput": true
222
+ },
223
+ {
224
+ "nodeType": "PassThrough",
225
+ "description": "Passes input through unchanged",
226
+ "category": "utility",
227
+ "accepts": [
228
+ "audio",
229
+ "video",
230
+ "json",
231
+ "text",
232
+ "binary",
233
+ "tensor",
234
+ "numpy",
235
+ "controlmessage"
236
+ ],
237
+ "produces": [
238
+ "audio",
239
+ "video",
240
+ "json",
241
+ "text",
242
+ "binary",
243
+ "tensor",
244
+ "numpy",
245
+ "controlmessage"
246
+ ],
247
+ "isPython": false,
248
+ "streaming": true,
249
+ "multiOutput": false
250
+ },
251
+ {
252
+ "nodeType": "TextCollector",
253
+ "description": "Collects text chunks into complete utterances",
254
+ "category": "text",
255
+ "accepts": [
256
+ "text"
257
+ ],
258
+ "produces": [
259
+ "text"
260
+ ],
261
+ "isPython": false,
262
+ "streaming": true,
263
+ "multiOutput": false
264
+ },
265
+ {
266
+ "nodeType": "Echo",
267
+ "description": "Passes input through unchanged (for testing)",
268
+ "category": "utility",
269
+ "accepts": [
270
+ "audio",
271
+ "video",
272
+ "json",
273
+ "text",
274
+ "binary",
275
+ "tensor",
276
+ "numpy",
277
+ "controlmessage"
278
+ ],
279
+ "produces": [
280
+ "audio",
281
+ "video",
282
+ "json",
283
+ "text",
284
+ "binary",
285
+ "tensor",
286
+ "numpy",
287
+ "controlmessage"
288
+ ],
289
+ "isPython": false,
290
+ "streaming": true,
291
+ "multiOutput": false
292
+ },
293
+ {
294
+ "nodeType": "SileroVAD",
295
+ "description": "Voice Activity Detection using Silero VAD model",
296
+ "category": "audio",
297
+ "accepts": [
298
+ "audio"
299
+ ],
300
+ "produces": [
301
+ "audio",
302
+ "controlmessage"
303
+ ],
304
+ "isPython": false,
305
+ "streaming": true,
306
+ "multiOutput": false
307
+ },
308
+ {
309
+ "nodeType": "AudioResample",
310
+ "description": "Resamples audio to target sample rate",
311
+ "category": "audio",
312
+ "accepts": [
313
+ "audio"
314
+ ],
315
+ "produces": [
316
+ "audio"
317
+ ],
318
+ "isPython": false,
319
+ "streaming": true,
320
+ "multiOutput": false
321
+ },
322
+ {
323
+ "nodeType": "KokoroTTSNode",
324
+ "description": "Text-to-speech synthesis using Kokoro TTS",
325
+ "category": "ml",
326
+ "accepts": [
327
+ "text"
328
+ ],
329
+ "produces": [
330
+ "audio"
331
+ ],
332
+ "isPython": true,
333
+ "streaming": true,
334
+ "multiOutput": true
335
+ },
336
+ {
337
+ "nodeType": "VideoFlip",
338
+ "description": "Flips video frames horizontally or vertically",
339
+ "category": "video",
340
+ "accepts": [
341
+ "video"
342
+ ],
343
+ "produces": [
344
+ "video"
345
+ ],
346
+ "isPython": false,
347
+ "streaming": true,
348
+ "multiOutput": false
349
+ }
350
+ ];
351
+
352
+ // =============================================================================
353
+ // Node Builder Classes
354
+ // =============================================================================
355
+
356
+ /**
357
+ * Base class for all node builders.
358
+ * Provides type-safe construction of pipeline nodes.
359
+ */
360
+ export abstract class NodeBuilder<T extends NodeType = NodeType, C = unknown> {
361
+ readonly id: string;
362
+ readonly nodeType: T;
363
+ readonly config?: C;
364
+
365
+ constructor(id: string, nodeType: T, config?: C) {
366
+ this.id = id;
367
+ this.nodeType = nodeType;
368
+ this.config = config;
369
+ }
370
+
371
+ /** Convert to PipelineNode format for manifest */
372
+ toPipelineNode(): PipelineNode<T> {
373
+ return {
374
+ id: this.id,
375
+ nodeType: this.nodeType,
376
+ config: this.config as T extends keyof NodeConfigMap ? NodeConfigMap[T] : Record<string, unknown>,
377
+ };
378
+ }
379
+
380
+ /** Create connection to another node */
381
+ connectTo(target: NodeBuilder<NodeType, unknown> | string, sourcePort?: string, destinationPort?: string): PipelineConnection {
382
+ const targetId = typeof target === 'string' ? target : target.id;
383
+ return {
384
+ source: this.id,
385
+ sourcePort,
386
+ destination: targetId,
387
+ destinationPort,
388
+ };
389
+ }
390
+ }
391
+
392
+ /** Connection between pipeline nodes */
393
+ export interface PipelineConnection {
394
+ source: string;
395
+ sourcePort?: string;
396
+ destination: string;
397
+ destinationPort?: string;
398
+ }
399
+
400
+ /**
401
+ * Performs arithmetic operations on JSON input
402
+ *
403
+ * @example
404
+ * ```typescript
405
+ * const node = new CalculatorNode('my-calculatornode', { });
406
+ * pipeline.addNode(node);
407
+ * ```
408
+ */
409
+ export class CalculatorNode extends NodeBuilder<'CalculatorNode', CalculatorNodeConfig> {
410
+ static readonly nodeType = 'CalculatorNode' as const;
411
+ static readonly accepts: RuntimeDataType[] = ['json'];
412
+ static readonly produces: RuntimeDataType[] = ['json'];
413
+
414
+ constructor(id: string, config?: CalculatorNodeConfig) {
415
+ super(id, 'CalculatorNode', config);
416
+ }
417
+ }
418
+
419
+ /**
420
+ * Speech-to-text transcription using Whisper
421
+ *
422
+ * @example
423
+ * ```typescript
424
+ * const node = new WhisperNode('my-whispernode', { });
425
+ * pipeline.addNode(node);
426
+ * ```
427
+ */
428
+ export class WhisperNode extends NodeBuilder<'WhisperNode', WhisperNodeConfig> {
429
+ static readonly nodeType = 'WhisperNode' as const;
430
+ static readonly accepts: RuntimeDataType[] = ['audio'];
431
+ static readonly produces: RuntimeDataType[] = ['text', 'json'];
432
+
433
+ constructor(id: string, config?: WhisperNodeConfig) {
434
+ super(id, 'WhisperNode', config);
435
+ }
436
+ }
437
+
438
+ /**
439
+ * Splits audio into fixed-size chunks
440
+ *
441
+ * @example
442
+ * ```typescript
443
+ * const node = new AudioChunker('my-audiochunker', { });
444
+ * pipeline.addNode(node);
445
+ * ```
446
+ */
447
+ export class AudioChunker extends NodeBuilder<'AudioChunker', AudioChunkerConfig> {
448
+ static readonly nodeType = 'AudioChunker' as const;
449
+ static readonly accepts: RuntimeDataType[] = ['audio'];
450
+ static readonly produces: RuntimeDataType[] = ['audio'];
451
+
452
+ constructor(id: string, config?: AudioChunkerConfig) {
453
+ super(id, 'AudioChunker', config);
454
+ }
455
+ }
456
+
457
+ /**
458
+ * Passes input through unchanged
459
+ *
460
+ * @example
461
+ * ```typescript
462
+ * const node = new PassThrough('my-passthrough');
463
+ * pipeline.addNode(node);
464
+ * ```
465
+ */
466
+ export class PassThrough extends NodeBuilder<'PassThrough', Record<string, unknown>> {
467
+ static readonly nodeType = 'PassThrough' as const;
468
+ static readonly accepts: RuntimeDataType[] = ['audio', 'video', 'json', 'text', 'binary', 'tensor', 'numpy', 'controlmessage'];
469
+ static readonly produces: RuntimeDataType[] = ['audio', 'video', 'json', 'text', 'binary', 'tensor', 'numpy', 'controlmessage'];
470
+
471
+ constructor(id: string, config?: Record<string, unknown>) {
472
+ super(id, 'PassThrough', config);
473
+ }
474
+ }
475
+
476
+ /**
477
+ * Collects text chunks into complete utterances
478
+ *
479
+ * @example
480
+ * ```typescript
481
+ * const node = new TextCollector('my-textcollector', { });
482
+ * pipeline.addNode(node);
483
+ * ```
484
+ */
485
+ export class TextCollector extends NodeBuilder<'TextCollector', TextCollectorConfig> {
486
+ static readonly nodeType = 'TextCollector' as const;
487
+ static readonly accepts: RuntimeDataType[] = ['text'];
488
+ static readonly produces: RuntimeDataType[] = ['text'];
489
+
490
+ constructor(id: string, config?: TextCollectorConfig) {
491
+ super(id, 'TextCollector', config);
492
+ }
493
+ }
494
+
495
+ /**
496
+ * Passes input through unchanged (for testing)
497
+ *
498
+ * @example
499
+ * ```typescript
500
+ * const node = new Echo('my-echo');
501
+ * pipeline.addNode(node);
502
+ * ```
503
+ */
504
+ export class Echo extends NodeBuilder<'Echo', Record<string, unknown>> {
505
+ static readonly nodeType = 'Echo' as const;
506
+ static readonly accepts: RuntimeDataType[] = ['audio', 'video', 'json', 'text', 'binary', 'tensor', 'numpy', 'controlmessage'];
507
+ static readonly produces: RuntimeDataType[] = ['audio', 'video', 'json', 'text', 'binary', 'tensor', 'numpy', 'controlmessage'];
508
+
509
+ constructor(id: string, config?: Record<string, unknown>) {
510
+ super(id, 'Echo', config);
511
+ }
512
+ }
513
+
514
+ /**
515
+ * Voice Activity Detection using Silero VAD model
516
+ *
517
+ * @example
518
+ * ```typescript
519
+ * const node = new SileroVAD('my-silerovad', { });
520
+ * pipeline.addNode(node);
521
+ * ```
522
+ */
523
+ export class SileroVAD extends NodeBuilder<'SileroVAD', SileroVADConfig> {
524
+ static readonly nodeType = 'SileroVAD' as const;
525
+ static readonly accepts: RuntimeDataType[] = ['audio'];
526
+ static readonly produces: RuntimeDataType[] = ['audio', 'controlmessage'];
527
+
528
+ constructor(id: string, config?: SileroVADConfig) {
529
+ super(id, 'SileroVAD', config);
530
+ }
531
+ }
532
+
533
+ /**
534
+ * Resamples audio to target sample rate
535
+ *
536
+ * @example
537
+ * ```typescript
538
+ * const node = new AudioResample('my-audioresample', { });
539
+ * pipeline.addNode(node);
540
+ * ```
541
+ */
542
+ export class AudioResample extends NodeBuilder<'AudioResample', AudioResampleConfig> {
543
+ static readonly nodeType = 'AudioResample' as const;
544
+ static readonly accepts: RuntimeDataType[] = ['audio'];
545
+ static readonly produces: RuntimeDataType[] = ['audio'];
546
+
547
+ constructor(id: string, config?: AudioResampleConfig) {
548
+ super(id, 'AudioResample', config);
549
+ }
550
+ }
551
+
552
+ /**
553
+ * Text-to-speech synthesis using Kokoro TTS
554
+ *
555
+ * @example
556
+ * ```typescript
557
+ * const node = new KokoroTTSNode('my-kokorottsnode', { });
558
+ * pipeline.addNode(node);
559
+ * ```
560
+ */
561
+ export class KokoroTTSNode extends NodeBuilder<'KokoroTTSNode', KokoroTTSNodeConfig> {
562
+ static readonly nodeType = 'KokoroTTSNode' as const;
563
+ static readonly accepts: RuntimeDataType[] = ['text'];
564
+ static readonly produces: RuntimeDataType[] = ['audio'];
565
+
566
+ constructor(id: string, config?: KokoroTTSNodeConfig) {
567
+ super(id, 'KokoroTTSNode', config);
568
+ }
569
+ }
570
+
571
+ /**
572
+ * Flips video frames horizontally or vertically
573
+ *
574
+ * @example
575
+ * ```typescript
576
+ * const node = new VideoFlip('my-videoflip', { });
577
+ * pipeline.addNode(node);
578
+ * ```
579
+ */
580
+ export class VideoFlip extends NodeBuilder<'VideoFlip', VideoFlipConfig> {
581
+ static readonly nodeType = 'VideoFlip' as const;
582
+ static readonly accepts: RuntimeDataType[] = ['video'];
583
+ static readonly produces: RuntimeDataType[] = ['video'];
584
+
585
+ constructor(id: string, config?: VideoFlipConfig) {
586
+ super(id, 'VideoFlip', config);
587
+ }
588
+ }
589
+
590
+ /** Namespace containing all node builder classes */
591
+ export const Nodes = {
592
+ CalculatorNode,
593
+ WhisperNode,
594
+ AudioChunker,
595
+ PassThrough,
596
+ TextCollector,
597
+ Echo,
598
+ SileroVAD,
599
+ AudioResample,
600
+ KokoroTTSNode,
601
+ VideoFlip
602
+ } as const;
603
+
604
+ /**
605
+ * Fluent pipeline builder for constructing manifests.
606
+ *
607
+ * @example
608
+ * ```typescript
609
+ * const manifest = new PipelineBuilder('1.0')
610
+ * .name('My Pipeline')
611
+ * .add(new SileroVAD('vad', { threshold: 0.6 }))
612
+ * .add(new WhisperNode('whisper', { model: 'base' }))
613
+ * .connect('vad', 'whisper')
614
+ * .build();
615
+ * ```
616
+ */
617
+ export class PipelineBuilder {
618
+ private version: string;
619
+ private metadata: { name?: string; description?: string; [key: string]: unknown } = {};
620
+ private nodes: PipelineNode[] = [];
621
+ private connections: PipelineConnection[] = [];
622
+
623
+ constructor(version: string = '1.0') {
624
+ this.version = version;
625
+ }
626
+
627
+ /** Set pipeline name */
628
+ name(name: string): this {
629
+ this.metadata.name = name;
630
+ return this;
631
+ }
632
+
633
+ /** Set pipeline description */
634
+ description(description: string): this {
635
+ this.metadata.description = description;
636
+ return this;
637
+ }
638
+
639
+ /** Add metadata key-value */
640
+ meta(key: string, value: unknown): this {
641
+ this.metadata[key] = value;
642
+ return this;
643
+ }
644
+
645
+ /** Add a node to the pipeline */
646
+ add(node: NodeBuilder<NodeType, unknown>): this {
647
+ this.nodes.push(node.toPipelineNode());
648
+ return this;
649
+ }
650
+
651
+ /** Add a raw node definition */
652
+ addRaw<T extends NodeType>(node: PipelineNode<T>): this {
653
+ this.nodes.push(node);
654
+ return this;
655
+ }
656
+
657
+ /** Connect two nodes */
658
+ connect(
659
+ source: NodeBuilder<NodeType, unknown> | string,
660
+ destination: NodeBuilder<NodeType, unknown> | string,
661
+ sourcePort?: string,
662
+ destinationPort?: string
663
+ ): this {
664
+ const sourceId = typeof source === 'string' ? source : source.id;
665
+ const destId = typeof destination === 'string' ? destination : destination.id;
666
+ this.connections.push({
667
+ source: sourceId,
668
+ sourcePort,
669
+ destination: destId,
670
+ destinationPort,
671
+ });
672
+ return this;
673
+ }
674
+
675
+ /** Build the final pipeline manifest */
676
+ build(): PipelineManifest {
677
+ return {
678
+ version: this.version,
679
+ metadata: Object.keys(this.metadata).length > 0 ? this.metadata : undefined,
680
+ nodes: this.nodes,
681
+ connections: this.connections,
682
+ };
683
+ }
684
+
685
+ /** Convert to JSON string */
686
+ toJson(): string {
687
+ return JSON.stringify(this.build(), null, 2);
688
+ }
689
+ }