@astropods/messaging 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,9 @@ export interface AgentResponse {
45
45
  threadMetadata?: ThreadMetadata;
46
46
  error?: ErrorResponse;
47
47
  contextRequest?: ThreadHistoryRequest;
48
+ transcript?: Transcript;
49
+ audioConfig?: AudioStreamConfig;
50
+ audioChunk?: AudioChunk;
48
51
  }
49
52
  export interface StatusUpdate {
50
53
  status: 'THINKING' | 'SEARCHING' | 'GENERATING' | 'PROCESSING' | 'ANALYZING' | 'CUSTOM';
@@ -77,6 +80,11 @@ export interface ErrorResponse {
77
80
  details?: string;
78
81
  retryable?: boolean;
79
82
  }
83
+ export interface Transcript {
84
+ text: string;
85
+ messageId?: string;
86
+ language?: string;
87
+ }
80
88
  export interface ThreadHistoryRequest {
81
89
  conversationId: string;
82
90
  maxMessages?: number;
@@ -129,11 +137,68 @@ export interface AgentConfig {
129
137
  systemPrompt: string;
130
138
  tools: AgentToolConfig[];
131
139
  }
140
+ /**
141
+ * Supported audio encoding formats. Matches the AudioEncoding protobuf enum.
142
+ *
143
+ * Common sources:
144
+ * - LINEAR16: Universal PCM baseline (any platform)
145
+ * - MULAW: Twilio / telephony (G.711 mu-law, 8kHz)
146
+ * - WEBM_OPUS: Browser MediaRecorder default
147
+ * - AAC: iOS native recording
148
+ */
149
+ export type AudioEncoding = 'LINEAR16' | 'MULAW' | 'OPUS' | 'MP3' | 'WEBM_OPUS' | 'OGG_OPUS' | 'FLAC' | 'AAC';
150
+ /**
151
+ * Configuration sent at the start of an audio segment to describe the format.
152
+ * Maps to the AudioStreamConfig protobuf message.
153
+ */
154
+ export interface AudioStreamConfig {
155
+ encoding: AudioEncoding;
156
+ sampleRate: number;
157
+ channels: number;
158
+ language?: string;
159
+ conversationId: string;
160
+ source?: string;
161
+ }
162
+ /**
163
+ * A chunk of raw audio bytes. Maps to the AudioChunk protobuf message.
164
+ *
165
+ * Chunks arrive sequentially during a segment. When done=true, the segment
166
+ * is complete and the agent should run STT on the accumulated audio.
167
+ */
168
+ export interface AudioChunk {
169
+ data: Buffer | Uint8Array;
170
+ sequence?: number;
171
+ done?: boolean;
172
+ }
173
+ /**
174
+ * Maps an AudioEncoding to the filetype string expected by Mastra's voice.listen().
175
+ *
176
+ * Usage:
177
+ * const filetype = audioEncodingToFiletype(config.encoding);
178
+ * const transcript = await agent.voice.listen(audioStream, { filetype });
179
+ */
180
+ export declare function audioEncodingToFiletype(encoding: AudioEncoding): string;
132
181
  export interface ConversationRequest {
133
182
  message?: Message;
134
183
  feedback?: any;
135
184
  agentConfig?: AgentConfig;
136
185
  agentResponse?: AgentResponse;
186
+ audioConfig?: AudioStreamConfig;
187
+ audio?: AudioChunk;
188
+ }
189
+ export interface ReconnectOptions {
190
+ /** Maximum number of reconnect attempts. Default: Infinity */
191
+ maxRetries?: number;
192
+ /** Initial delay before first retry in ms. Default: 500 */
193
+ initialDelayMs?: number;
194
+ /** Maximum delay between retries in ms. Default: 30_000 */
195
+ maxDelayMs?: number;
196
+ /** Apply full jitter to backoff delay. Default: true */
197
+ jitter?: boolean;
198
+ /** Maximum number of writes to queue during reconnect. Default: 1000 */
199
+ maxBufferSize?: number;
200
+ /** gRPC status codes that trigger a reconnect attempt. Default: UNAVAILABLE, DEADLINE_EXCEEDED, INTERNAL, RESOURCE_EXHAUSTED */
201
+ retryableStatusCodes?: number[];
137
202
  }
138
203
  /**
139
204
  * MessagingClient provides a TypeScript interface to the Astro Messaging gRPC service
@@ -149,9 +214,14 @@ export declare class MessagingClient extends EventEmitter {
149
214
  */
150
215
  connect(): Promise<void>;
151
216
  /**
152
- * Create a bidirectional conversation stream
217
+ * Connect with automatic retry on failure (exponential backoff).
218
+ * Emits 'reconnecting' before each retry and 'reconnected' on success after failures.
153
219
  */
154
- createConversationStream(): ConversationStream;
220
+ connectWithRetry(options?: ReconnectOptions): Promise<void>;
221
+ /**
222
+ * Create a bidirectional conversation stream with optional reconnect support
223
+ */
224
+ createConversationStream(options?: ReconnectOptions): ConversationStream;
155
225
  /**
156
226
  * Process a single message (server-side streaming)
157
227
  */
@@ -176,11 +246,31 @@ export declare class MessagingClient extends EventEmitter {
176
246
  close(): void;
177
247
  }
178
248
  /**
179
- * ConversationStream wraps a bidirectional gRPC stream
249
+ * ConversationStream wraps a bidirectional gRPC stream with automatic reconnection.
250
+ *
251
+ * Events:
252
+ * - 'response' — AgentResponse received from server
253
+ * - 'reconnecting' — { attempt, reason, delayMs } — before each retry delay
254
+ * - 'reconnected' — { attempt } — after a successful stream recreation
255
+ * - 'error' — non-retryable error OR max retries exceeded
256
+ * - 'end' — only on intentional close(), not on unexpected stream drop
180
257
  */
181
258
  export declare class ConversationStream extends EventEmitter {
259
+ private streamFactory;
182
260
  private stream;
183
- constructor(stream: any);
261
+ private writeBuffer;
262
+ private reconnecting;
263
+ private closed;
264
+ private retryCount;
265
+ private readonly opts;
266
+ constructor(streamFactory: () => any, options?: ReconnectOptions);
267
+ private attachHandlers;
268
+ private isRetryable;
269
+ private calculateDelay;
270
+ private scheduleReconnect;
271
+ private doReconnect;
272
+ private flushBuffer;
273
+ private write;
184
274
  /**
185
275
  * Send a message through the stream
186
276
  */
@@ -206,7 +296,62 @@ export declare class ConversationStream extends EventEmitter {
206
296
  */
207
297
  sendStatusUpdate(conversationId: string, status: StatusUpdate): void;
208
298
  /**
209
- * End the stream
299
+ * Send a transcript of the user's audio input back to the platform.
300
+ *
301
+ * After the agent runs STT on the audio, it calls this to send the transcribed
302
+ * text back to the platform (web adapter). The platform uses it to replace the
303
+ * "[audio]" placeholder message with the actual spoken text in the chat UI.
304
+ *
305
+ * @param conversationId - The conversation this transcript belongs to
306
+ * @param text - The transcribed text from STT
307
+ * @param messageId - Optional: the original "[audio]" message ID to update
308
+ * @param language - Optional: BCP-47 language detected by STT (e.g. "en-US")
309
+ */
310
+ sendTranscript(conversationId: string, text: string, messageId?: string, language?: string): void;
311
+ /**
312
+ * Send an audio stream config through the bidi stream.
313
+ * Must be called before sendAudioChunk() so the receiver knows the encoding.
314
+ */
315
+ sendAudioConfig(config: AudioStreamConfig): void;
316
+ /**
317
+ * Send a raw audio chunk through the bidi stream.
318
+ * The chunk's sequence number should increase monotonically.
319
+ */
320
+ sendAudioChunk(chunk: AudioChunk): void;
321
+ /**
322
+ * Signal end of the current audio segment by sending an empty chunk with done=true.
323
+ * The receiver should process all accumulated audio (e.g. run STT).
324
+ * After this, more audio can follow — either new config or more chunks.
325
+ */
326
+ endAudio(): void;
327
+ /**
328
+ * Converts incoming audioChunk events into a Web Streams API ReadableStream.
329
+ *
330
+ * This is the primary integration point with Mastra's voice system. The agent
331
+ * listens for the 'audioConfig' event to know the format, then calls this
332
+ * method to get a stream it can pass directly to voice.listen():
333
+ *
334
+ * ```typescript
335
+ * conversation.on('audioConfig', async (config) => {
336
+ * const audioStream = conversation.audioAsReadable();
337
+ * const filetype = audioEncodingToFiletype(config.encoding);
338
+ * const transcript = await agent.voice.listen(audioStream, { filetype });
339
+ * // ... process transcript
340
+ * });
341
+ * ```
342
+ *
343
+ * The ReadableStream:
344
+ * - Yields Uint8Array chunks as audioChunk events arrive
345
+ * - Closes when an AudioChunk with done=true arrives (end of segment)
346
+ * - Closes when the ConversationStream emits 'end' (intentional close)
347
+ * - Errors when the ConversationStream emits 'error'
348
+ * - Properly cleans up all event listeners on close, error, or cancel
349
+ *
350
+ * @returns A ReadableStream<Uint8Array> suitable for Mastra voice.listen()
351
+ */
352
+ audioAsReadable(): ReadableStream<Uint8Array>;
353
+ /**
354
+ * End the stream intentionally. Emits 'end' and prevents any further reconnects.
210
355
  */
211
356
  end(): void;
212
357
  }
@@ -34,10 +34,43 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.Helpers = exports.MessageStream = exports.ConversationStream = exports.MessagingClient = void 0;
37
+ exports.audioEncodingToFiletype = audioEncodingToFiletype;
37
38
  const grpc = __importStar(require("@grpc/grpc-js"));
38
39
  const protoLoader = __importStar(require("@grpc/proto-loader"));
39
40
  const path_1 = require("path");
40
41
  const events_1 = require("events");
42
+ /**
43
+ * Maps an AudioEncoding to the filetype string expected by Mastra's voice.listen().
44
+ *
45
+ * Usage:
46
+ * const filetype = audioEncodingToFiletype(config.encoding);
47
+ * const transcript = await agent.voice.listen(audioStream, { filetype });
48
+ */
49
+ function audioEncodingToFiletype(encoding) {
50
+ const map = {
51
+ LINEAR16: 'wav',
52
+ MULAW: 'wav',
53
+ OPUS: 'opus',
54
+ MP3: 'mp3',
55
+ WEBM_OPUS: 'webm',
56
+ OGG_OPUS: 'ogg',
57
+ FLAC: 'flac',
58
+ AAC: 'm4a',
59
+ };
60
+ return map[encoding] ?? 'wav';
61
+ }
62
+ // gRPC status codes: DEADLINE_EXCEEDED=4, INTERNAL=13, UNAVAILABLE=14, RESOURCE_EXHAUSTED=8
63
+ const DEFAULT_RETRYABLE_STATUS_CODES = [4, 8, 13, 14];
64
+ function resolveReconnectOptions(options) {
65
+ return {
66
+ maxRetries: options.maxRetries ?? Infinity,
67
+ initialDelayMs: options.initialDelayMs ?? 500,
68
+ maxDelayMs: options.maxDelayMs ?? 30000,
69
+ jitter: options.jitter ?? true,
70
+ maxBufferSize: options.maxBufferSize ?? 1000,
71
+ retryableStatusCodes: options.retryableStatusCodes ?? DEFAULT_RETRYABLE_STATUS_CODES,
72
+ };
73
+ }
41
74
  /**
42
75
  * MessagingClient provides a TypeScript interface to the Astro Messaging gRPC service
43
76
  */
@@ -45,6 +78,7 @@ class MessagingClient extends events_1.EventEmitter {
45
78
  constructor(serverAddress) {
46
79
  super();
47
80
  this.serverAddress = serverAddress;
81
+ this.conversationStream = null;
48
82
  this.isConnected = false;
49
83
  }
50
84
  /**
@@ -67,14 +101,42 @@ class MessagingClient extends events_1.EventEmitter {
67
101
  this.emit('connected');
68
102
  }
69
103
  /**
70
- * Create a bidirectional conversation stream
104
+ * Connect with automatic retry on failure (exponential backoff).
105
+ * Emits 'reconnecting' before each retry and 'reconnected' on success after failures.
106
+ */
107
+ async connectWithRetry(options = {}) {
108
+ const opts = resolveReconnectOptions(options);
109
+ let retryCount = 0;
110
+ while (true) {
111
+ try {
112
+ await this.connect();
113
+ if (retryCount > 0) {
114
+ this.emit('reconnected', { attempt: retryCount });
115
+ }
116
+ return;
117
+ }
118
+ catch (err) {
119
+ if (retryCount >= opts.maxRetries) {
120
+ throw err;
121
+ }
122
+ const base = Math.min(opts.initialDelayMs * Math.pow(2, retryCount), opts.maxDelayMs);
123
+ const delayMs = opts.jitter ? base * (0.5 + Math.random() * 0.5) : base;
124
+ this.emit('reconnecting', { attempt: retryCount + 1, reason: err, delayMs });
125
+ await new Promise(resolve => setTimeout(resolve, delayMs));
126
+ retryCount++;
127
+ }
128
+ }
129
+ }
130
+ /**
131
+ * Create a bidirectional conversation stream with optional reconnect support
71
132
  */
72
- createConversationStream() {
133
+ createConversationStream(options) {
73
134
  if (!this.isConnected) {
74
135
  throw new Error('Client not connected. Call connect() first.');
75
136
  }
76
- this.conversationStream = this.client.ProcessConversation();
77
- return new ConversationStream(this.conversationStream);
137
+ const factory = () => this.client.ProcessConversation();
138
+ this.conversationStream = new ConversationStream(factory, options);
139
+ return this.conversationStream;
78
140
  }
79
141
  /**
80
142
  * Process a single message (server-side streaming)
@@ -169,57 +231,133 @@ class MessagingClient extends events_1.EventEmitter {
169
231
  }
170
232
  exports.MessagingClient = MessagingClient;
171
233
  /**
172
- * ConversationStream wraps a bidirectional gRPC stream
234
+ * ConversationStream wraps a bidirectional gRPC stream with automatic reconnection.
235
+ *
236
+ * Events:
237
+ * - 'response' — AgentResponse received from server
238
+ * - 'reconnecting' — { attempt, reason, delayMs } — before each retry delay
239
+ * - 'reconnected' — { attempt } — after a successful stream recreation
240
+ * - 'error' — non-retryable error OR max retries exceeded
241
+ * - 'end' — only on intentional close(), not on unexpected stream drop
173
242
  */
174
243
  class ConversationStream extends events_1.EventEmitter {
175
- constructor(stream) {
244
+ constructor(streamFactory, options = {}) {
176
245
  super();
177
- this.stream = stream;
178
- this.stream.on('data', (response) => {
246
+ this.streamFactory = streamFactory;
247
+ this.writeBuffer = [];
248
+ this.reconnecting = false;
249
+ this.closed = false;
250
+ this.retryCount = 0;
251
+ this.opts = resolveReconnectOptions(options);
252
+ this.stream = this.streamFactory();
253
+ this.attachHandlers(this.stream);
254
+ }
255
+ attachHandlers(stream) {
256
+ stream.on('data', (response) => {
257
+ this.retryCount = 0;
258
+ // Emit audio-specific events if present.
259
+ // The server sends audio data through the bidi stream as AgentResponse
260
+ // messages with audioConfig or audioChunk payloads. We emit dedicated
261
+ // events for these so the agent can handle audio separately from text,
262
+ // while still emitting the generic 'response' event for observability.
263
+ if (response.audioConfig) {
264
+ this.emit('audioConfig', response.audioConfig);
265
+ }
266
+ else if (response.audioChunk) {
267
+ this.emit('audioChunk', response.audioChunk);
268
+ }
179
269
  this.emit('response', response);
180
270
  });
181
- this.stream.on('end', () => {
182
- this.emit('end');
271
+ stream.on('error', (error) => {
272
+ if (!this.closed && this.isRetryable(error)) {
273
+ this.scheduleReconnect(error);
274
+ }
275
+ else {
276
+ this.emit('error', error);
277
+ }
183
278
  });
184
- this.stream.on('error', (error) => {
185
- this.emit('error', error);
279
+ stream.on('end', () => {
280
+ if (!this.closed) {
281
+ this.scheduleReconnect(new Error('Stream ended unexpectedly'));
282
+ }
283
+ // If closed, 'end' was already emitted by end() — do nothing
186
284
  });
187
285
  }
286
+ isRetryable(error) {
287
+ return this.opts.retryableStatusCodes.includes(error.code);
288
+ }
289
+ calculateDelay() {
290
+ const base = Math.min(this.opts.initialDelayMs * Math.pow(2, this.retryCount), this.opts.maxDelayMs);
291
+ return this.opts.jitter ? base * (0.5 + Math.random() * 0.5) : base;
292
+ }
293
+ scheduleReconnect(reason) {
294
+ if (this.reconnecting || this.closed)
295
+ return;
296
+ if (this.retryCount >= this.opts.maxRetries) {
297
+ this.emit('error', new Error(`Max reconnection attempts (${this.opts.maxRetries}) exceeded`));
298
+ return;
299
+ }
300
+ const delayMs = this.calculateDelay();
301
+ this.reconnecting = true;
302
+ this.emit('reconnecting', { attempt: this.retryCount + 1, reason, delayMs });
303
+ setTimeout(() => this.doReconnect(), delayMs);
304
+ }
305
+ doReconnect() {
306
+ if (this.closed)
307
+ return;
308
+ this.retryCount++;
309
+ try {
310
+ this.stream = this.streamFactory();
311
+ this.attachHandlers(this.stream);
312
+ this.reconnecting = false;
313
+ this.emit('reconnected', { attempt: this.retryCount });
314
+ this.flushBuffer();
315
+ }
316
+ catch (err) {
317
+ this.reconnecting = false;
318
+ this.scheduleReconnect(err);
319
+ }
320
+ }
321
+ flushBuffer() {
322
+ const toFlush = this.writeBuffer.splice(0);
323
+ for (const request of toFlush) {
324
+ this.stream.write(request);
325
+ }
326
+ }
327
+ write(request) {
328
+ if (this.reconnecting || this.closed) {
329
+ if (this.writeBuffer.length >= this.opts.maxBufferSize) {
330
+ this.writeBuffer.shift(); // drop oldest
331
+ }
332
+ this.writeBuffer.push(request);
333
+ }
334
+ else {
335
+ this.stream.write(request);
336
+ }
337
+ }
188
338
  /**
189
339
  * Send a message through the stream
190
340
  */
191
341
  sendMessage(message) {
192
- const request = {
193
- message,
194
- };
195
- this.stream.write(request);
342
+ this.write({ message });
196
343
  }
197
344
  /**
198
345
  * Send platform feedback through the stream
199
346
  */
200
347
  sendFeedback(feedback) {
201
- const request = {
202
- feedback,
203
- };
204
- this.stream.write(request);
348
+ this.write({ feedback });
205
349
  }
206
350
  /**
207
351
  * Send agent configuration through the stream
208
352
  */
209
353
  sendAgentConfig(config) {
210
- const request = {
211
- agentConfig: config,
212
- };
213
- this.stream.write(request);
354
+ this.write({ agentConfig: config });
214
355
  }
215
356
  /**
216
357
  * Send a typed AgentResponse through the stream
217
358
  */
218
359
  sendAgentResponse(response) {
219
- const request = {
220
- agentResponse: response,
221
- };
222
- this.stream.write(request);
360
+ this.write({ agentResponse: response });
223
361
  }
224
362
  /**
225
363
  * Send a content chunk (START/DELTA/END) for a conversation
@@ -240,10 +378,138 @@ class ConversationStream extends events_1.EventEmitter {
240
378
  });
241
379
  }
242
380
  /**
243
- * End the stream
381
+ * Send a transcript of the user's audio input back to the platform.
382
+ *
383
+ * After the agent runs STT on the audio, it calls this to send the transcribed
384
+ * text back to the platform (web adapter). The platform uses it to replace the
385
+ * "[audio]" placeholder message with the actual spoken text in the chat UI.
386
+ *
387
+ * @param conversationId - The conversation this transcript belongs to
388
+ * @param text - The transcribed text from STT
389
+ * @param messageId - Optional: the original "[audio]" message ID to update
390
+ * @param language - Optional: BCP-47 language detected by STT (e.g. "en-US")
391
+ */
392
+ sendTranscript(conversationId, text, messageId, language) {
393
+ this.sendAgentResponse({
394
+ conversationId,
395
+ transcript: { text, messageId, language },
396
+ });
397
+ }
398
+ // --- Audio support ---
399
+ //
400
+ // These methods handle sending audio data through the gRPC bidi stream.
401
+ // Two directions:
402
+ // - Agent → Server (sendAudioConfig/sendAudioChunk/endAudio): used when the
403
+ // agent needs to forward audio upstream (less common)
404
+ // - Server → Agent (audioConfig/audioChunk events + audioAsReadable): the main
405
+ // path where the server forwards client mic audio to the agent for STT
406
+ /**
407
+ * Send an audio stream config through the bidi stream.
408
+ * Must be called before sendAudioChunk() so the receiver knows the encoding.
409
+ */
410
+ sendAudioConfig(config) {
411
+ this.write({ audioConfig: config });
412
+ }
413
+ /**
414
+ * Send a raw audio chunk through the bidi stream.
415
+ * The chunk's sequence number should increase monotonically.
416
+ */
417
+ sendAudioChunk(chunk) {
418
+ this.write({ audio: chunk });
419
+ }
420
+ /**
421
+ * Signal end of the current audio segment by sending an empty chunk with done=true.
422
+ * The receiver should process all accumulated audio (e.g. run STT).
423
+ * After this, more audio can follow — either new config or more chunks.
424
+ */
425
+ endAudio() {
426
+ this.write({ audio: { data: Buffer.alloc(0), done: true } });
427
+ }
428
+ /**
429
+ * Converts incoming audioChunk events into a Web Streams API ReadableStream.
430
+ *
431
+ * This is the primary integration point with Mastra's voice system. The agent
432
+ * listens for the 'audioConfig' event to know the format, then calls this
433
+ * method to get a stream it can pass directly to voice.listen():
434
+ *
435
+ * ```typescript
436
+ * conversation.on('audioConfig', async (config) => {
437
+ * const audioStream = conversation.audioAsReadable();
438
+ * const filetype = audioEncodingToFiletype(config.encoding);
439
+ * const transcript = await agent.voice.listen(audioStream, { filetype });
440
+ * // ... process transcript
441
+ * });
442
+ * ```
443
+ *
444
+ * The ReadableStream:
445
+ * - Yields Uint8Array chunks as audioChunk events arrive
446
+ * - Closes when an AudioChunk with done=true arrives (end of segment)
447
+ * - Closes when the ConversationStream emits 'end' (intentional close)
448
+ * - Errors when the ConversationStream emits 'error'
449
+ * - Properly cleans up all event listeners on close, error, or cancel
450
+ *
451
+ * @returns A ReadableStream<Uint8Array> suitable for Mastra voice.listen()
452
+ */
453
+ audioAsReadable() {
454
+ // Centralized cleanup to prevent listener leaks. Called on:
455
+ // - done=true chunk (normal completion)
456
+ // - stream 'end' event (intentional close)
457
+ // - stream 'error' event
458
+ // - ReadableStream cancel() (consumer gave up, e.g. reader.cancel())
459
+ const cleanup = () => {
460
+ this.removeListener('audioChunk', onChunk);
461
+ this.removeListener('end', onEnd);
462
+ this.removeListener('error', onError);
463
+ };
464
+ const onChunk = (chunk) => {
465
+ if (chunk.done) {
466
+ cleanup();
467
+ try {
468
+ controller.close();
469
+ }
470
+ catch { }
471
+ }
472
+ else {
473
+ controller.enqueue(new Uint8Array(chunk.data));
474
+ }
475
+ };
476
+ const onEnd = () => {
477
+ cleanup();
478
+ try {
479
+ controller.close();
480
+ }
481
+ catch { }
482
+ };
483
+ const onError = (err) => {
484
+ cleanup();
485
+ try {
486
+ controller.error(err);
487
+ }
488
+ catch { }
489
+ };
490
+ let controller;
491
+ return new ReadableStream({
492
+ start: (ctrl) => {
493
+ controller = ctrl;
494
+ this.on('audioChunk', onChunk);
495
+ this.once('end', onEnd);
496
+ this.once('error', onError);
497
+ },
498
+ cancel: () => {
499
+ // Consumer cancelled (e.g. reader.cancel()) — remove all listeners
500
+ // to prevent memory leaks
501
+ cleanup();
502
+ },
503
+ });
504
+ }
505
+ /**
506
+ * End the stream intentionally. Emits 'end' and prevents any further reconnects.
244
507
  */
245
508
  end() {
509
+ this.closed = true;
510
+ this.writeBuffer = [];
246
511
  this.stream.end();
512
+ this.emit('end');
247
513
  }
248
514
  }
249
515
  exports.ConversationStream = ConversationStream;
@@ -0,0 +1,71 @@
1
+ // Audio streaming types for the messaging system.
2
+ //
3
+ // These types enable raw audio input from any frontend (browser, phone, mobile app)
4
+ // to be streamed through the messaging server to an agent. The messaging system is
5
+ // a pass-through — it does NOT perform speech-to-text, transcoding, or voice activity
6
+ // detection. The agent handles STT via Mastra's voice provider abstraction.
7
+ //
8
+ // Data flow:
9
+ // Client (mic) → WebSocket → Server → gRPC (these types) → Agent → Mastra voice.listen()
10
+ //
11
+ // Two ways audio enters the system:
12
+ // 1. ProcessAudioStream RPC: dedicated audio-only streaming (AudioStreamRequest)
13
+ // 2. ProcessConversation RPC: audio mixed into the bidi stream (ConversationRequest.audio_config/audio)
14
+ //
15
+ // Both converge on the same types: AudioStreamConfig describes the format,
16
+ // AudioChunk carries the bytes, and done=true signals end of an utterance.
17
+
18
+ syntax = "proto3";
19
+
20
+ package astro.messaging.v1;
21
+
22
+ option go_package = "github.com/postman/astro/messaging/v1;messagingv1";
23
+
24
+ // Audio encoding format — covers browser, telephony, and mobile sources.
25
+ // The agent uses this to configure the STT provider (e.g. Whisper, Deepgram).
26
+ enum AudioEncoding {
27
+ AUDIO_ENCODING_UNSPECIFIED = 0;
28
+ LINEAR16 = 1; // PCM signed 16-bit little-endian — universal baseline, any platform
29
+ MULAW = 2; // G.711 mu-law — Twilio and traditional telephony (8kHz)
30
+ OPUS = 3; // Raw Opus frames — low-latency codec
31
+ MP3 = 4; // MP3 — batch uploads, pre-recorded audio
32
+ WEBM_OPUS = 5; // WebM container with Opus — browser MediaRecorder default
33
+ OGG_OPUS = 6; // OGG container with Opus — Firefox MediaRecorder
34
+ FLAC = 7; // FLAC lossless — high-quality uploads
35
+ AAC = 8; // AAC — iOS native recording
36
+ }
37
+
38
+ // Sent once at the start of an audio segment to tell the agent what format
39
+ // the subsequent AudioChunk bytes are in. Without this, the agent can't
40
+ // decode the raw bytes.
41
+ message AudioStreamConfig {
42
+ AudioEncoding encoding = 1; // What codec the audio bytes use
43
+ int32 sample_rate = 2; // Hz: 8000 (telephony), 16000 (speech), 48000 (browser)
44
+ int32 channels = 3; // 1 = mono (speech default), 2 = stereo
45
+ string language = 4; // BCP-47 hint for STT, e.g. "en-US" (optional)
46
+ string conversation_id = 5; // Links this audio to an existing conversation
47
+
48
+ // Source metadata — helps the agent pick the right STT config.
49
+ // Examples: "browser", "twilio", "vonage", "mobile", "upload"
50
+ string source = 6;
51
+ }
52
+
53
+ // A chunk of raw audio bytes in the encoding specified by AudioStreamConfig.
54
+ //
55
+ // Chunks arrive in order during a segment. When done=true, the segment is
56
+ // complete and the agent should run STT on all accumulated chunks.
57
+ // The data field may be empty on the final done=true chunk.
58
+ message AudioChunk {
59
+ bytes data = 1; // Raw audio bytes (pass-through, no transcoding)
60
+ int64 sequence = 2; // Monotonic sequence number for ordering
61
+ bool done = 3; // true = end of segment, process accumulated audio
62
+ }
63
+
64
+ // Wrapper for the ProcessAudioStream RPC (dedicated audio streaming).
65
+ // The first message MUST be config, all subsequent messages are audio chunks.
66
+ message AudioStreamRequest {
67
+ oneof request {
68
+ AudioStreamConfig config = 1; // First message: tells agent the audio format
69
+ AudioChunk audio = 2; // Subsequent: raw audio bytes
70
+ }
71
+ }
@@ -4,6 +4,7 @@ package astro.messaging.v1;
4
4
 
5
5
  import "google/protobuf/timestamp.proto";
6
6
  import "astro/messaging/v1/message.proto";
7
+ import "astro/messaging/v1/audio.proto";
7
8
 
8
9
  option go_package = "github.com/postman/astro/messaging/v1;messagingv1";
9
10
 
@@ -22,6 +23,9 @@ message AgentResponse {
22
23
  ThreadMetadata thread_metadata = 7; // Thread title, creation
23
24
  ErrorResponse error = 8; // Error during processing
24
25
  ThreadHistoryRequest context_request = 9; // Request cached context (optional)
26
+ Transcript transcript = 10; // Audio transcript (agent → platform)
27
+ AudioStreamConfig audio_config = 11; // Audio session config (server → agent)
28
+ AudioChunk audio_chunk = 12; // Audio data (server → agent)
25
29
  }
26
30
  }
27
31
 
@@ -144,6 +148,14 @@ message ThreadMetadata {
144
148
  bool create_new = 3; // Create new thread
145
149
  }
146
150
 
151
+ // Transcript of user audio input (agent → platform after STT)
152
+ // Used to update a placeholder message with the actual transcribed text
153
+ message Transcript {
154
+ string text = 1; // Transcribed text
155
+ string message_id = 2; // User message ID to update (optional)
156
+ string language = 3; // Detected language BCP-47 (optional)
157
+ }
158
+
147
159
  // Error response from agent
148
160
  message ErrorResponse {
149
161
  enum ErrorCode {
@@ -5,6 +5,7 @@ package astro.messaging.v1;
5
5
  import "astro/messaging/v1/message.proto";
6
6
  import "astro/messaging/v1/response.proto";
7
7
  import "astro/messaging/v1/feedback.proto";
8
+ import "astro/messaging/v1/audio.proto";
8
9
  import "astro/messaging/v1/config.proto";
9
10
  import "google/protobuf/timestamp.proto";
10
11
 
@@ -29,6 +30,11 @@ service AgentMessaging {
29
30
  rpc GetConversationMetadata(ConversationMetadataRequest)
30
31
  returns (ConversationMetadataResponse);
31
32
 
33
+ // Audio: client streams raw audio, server responds with text
34
+ // First message MUST be AudioStreamConfig, rest are AudioChunks
35
+ rpc ProcessAudioStream(stream AudioStreamRequest)
36
+ returns (stream AgentResponse);
37
+
32
38
  // Health check
33
39
  rpc HealthCheck(HealthCheckRequest)
34
40
  returns (HealthCheckResponse);
@@ -41,6 +47,8 @@ message ConversationRequest {
41
47
  PlatformFeedback feedback = 2;
42
48
  AgentConfig agent_config = 3;
43
49
  AgentResponse agent_response = 4;
50
+ AudioStreamConfig audio_config = 5; // Start audio within conversation
51
+ AudioChunk audio = 6; // Audio data within conversation
44
52
  }
45
53
  }
46
54
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@astropods/messaging",
3
3
  "license": "Apache-2.0",
4
- "version": "0.0.1",
4
+ "version": "0.0.3",
5
5
  "description": "TypeScript SDK for Astro Messaging",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
@@ -9,8 +9,8 @@
9
9
  "dist"
10
10
  ],
11
11
  "scripts": {
12
- "postinstall": "ln -sf ../../proto proto",
13
- "build": "tsc && cp -r ../../proto dist/proto",
12
+ "postinstall": "rm -rf proto && ln -sf ../../proto proto",
13
+ "build": "tsc && rm -rf dist/proto && cp -r ../../proto dist/proto",
14
14
  "watch": "tsc --watch",
15
15
  "test": "bun test",
16
16
  "test:watch": "bun test --watch"