@astropods/messaging 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -45,6 +45,9 @@ export interface AgentResponse {
|
|
|
45
45
|
threadMetadata?: ThreadMetadata;
|
|
46
46
|
error?: ErrorResponse;
|
|
47
47
|
contextRequest?: ThreadHistoryRequest;
|
|
48
|
+
transcript?: Transcript;
|
|
49
|
+
audioConfig?: AudioStreamConfig;
|
|
50
|
+
audioChunk?: AudioChunk;
|
|
48
51
|
}
|
|
49
52
|
export interface StatusUpdate {
|
|
50
53
|
status: 'THINKING' | 'SEARCHING' | 'GENERATING' | 'PROCESSING' | 'ANALYZING' | 'CUSTOM';
|
|
@@ -77,6 +80,11 @@ export interface ErrorResponse {
|
|
|
77
80
|
details?: string;
|
|
78
81
|
retryable?: boolean;
|
|
79
82
|
}
|
|
83
|
+
export interface Transcript {
|
|
84
|
+
text: string;
|
|
85
|
+
messageId?: string;
|
|
86
|
+
language?: string;
|
|
87
|
+
}
|
|
80
88
|
export interface ThreadHistoryRequest {
|
|
81
89
|
conversationId: string;
|
|
82
90
|
maxMessages?: number;
|
|
@@ -129,11 +137,55 @@ export interface AgentConfig {
|
|
|
129
137
|
systemPrompt: string;
|
|
130
138
|
tools: AgentToolConfig[];
|
|
131
139
|
}
|
|
140
|
+
/**
|
|
141
|
+
* Supported audio encoding formats. Matches the AudioEncoding protobuf enum.
|
|
142
|
+
*
|
|
143
|
+
* Common sources:
|
|
144
|
+
* - LINEAR16: Universal PCM baseline (any platform)
|
|
145
|
+
* - MULAW: Twilio / telephony (G.711 mu-law, 8kHz)
|
|
146
|
+
* - WEBM_OPUS: Browser MediaRecorder default
|
|
147
|
+
* - AAC: iOS native recording
|
|
148
|
+
*/
|
|
149
|
+
export type AudioEncoding = 'LINEAR16' | 'MULAW' | 'OPUS' | 'MP3' | 'WEBM_OPUS' | 'OGG_OPUS' | 'FLAC' | 'AAC';
|
|
150
|
+
/**
|
|
151
|
+
* Configuration sent at the start of an audio segment to describe the format.
|
|
152
|
+
* Maps to the AudioStreamConfig protobuf message.
|
|
153
|
+
*/
|
|
154
|
+
export interface AudioStreamConfig {
|
|
155
|
+
encoding: AudioEncoding;
|
|
156
|
+
sampleRate: number;
|
|
157
|
+
channels: number;
|
|
158
|
+
language?: string;
|
|
159
|
+
conversationId: string;
|
|
160
|
+
source?: string;
|
|
161
|
+
userId?: string;
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* A chunk of raw audio bytes. Maps to the AudioChunk protobuf message.
|
|
165
|
+
*
|
|
166
|
+
* Chunks arrive sequentially during a segment. When done=true, the segment
|
|
167
|
+
* is complete and the agent should run STT on the accumulated audio.
|
|
168
|
+
*/
|
|
169
|
+
export interface AudioChunk {
|
|
170
|
+
data: Buffer | Uint8Array;
|
|
171
|
+
sequence?: number;
|
|
172
|
+
done?: boolean;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Maps an AudioEncoding to the filetype string expected by Mastra's voice.listen().
|
|
176
|
+
*
|
|
177
|
+
* Usage:
|
|
178
|
+
* const filetype = audioEncodingToFiletype(config.encoding);
|
|
179
|
+
* const transcript = await agent.voice.listen(audioStream, { filetype });
|
|
180
|
+
*/
|
|
181
|
+
export declare function audioEncodingToFiletype(encoding: AudioEncoding): string;
|
|
132
182
|
export interface ConversationRequest {
|
|
133
183
|
message?: Message;
|
|
134
184
|
feedback?: any;
|
|
135
185
|
agentConfig?: AgentConfig;
|
|
136
186
|
agentResponse?: AgentResponse;
|
|
187
|
+
audioConfig?: AudioStreamConfig;
|
|
188
|
+
audio?: AudioChunk;
|
|
137
189
|
}
|
|
138
190
|
export interface ReconnectOptions {
|
|
139
191
|
/** Maximum number of reconnect attempts. Default: Infinity */
|
|
@@ -244,6 +296,61 @@ export declare class ConversationStream extends EventEmitter {
|
|
|
244
296
|
* Send a status update for a conversation
|
|
245
297
|
*/
|
|
246
298
|
sendStatusUpdate(conversationId: string, status: StatusUpdate): void;
|
|
299
|
+
/**
|
|
300
|
+
* Send a transcript of the user's audio input back to the platform.
|
|
301
|
+
*
|
|
302
|
+
* After the agent runs STT on the audio, it calls this to send the transcribed
|
|
303
|
+
* text back to the platform (web adapter). The platform uses it to replace the
|
|
304
|
+
* "[audio]" placeholder message with the actual spoken text in the chat UI.
|
|
305
|
+
*
|
|
306
|
+
* @param conversationId - The conversation this transcript belongs to
|
|
307
|
+
* @param text - The transcribed text from STT
|
|
308
|
+
* @param messageId - Optional: the original "[audio]" message ID to update
|
|
309
|
+
* @param language - Optional: BCP-47 language detected by STT (e.g. "en-US")
|
|
310
|
+
*/
|
|
311
|
+
sendTranscript(conversationId: string, text: string, messageId?: string, language?: string): void;
|
|
312
|
+
/**
|
|
313
|
+
* Send an audio stream config through the bidi stream.
|
|
314
|
+
* Must be called before sendAudioChunk() so the receiver knows the encoding.
|
|
315
|
+
*/
|
|
316
|
+
sendAudioConfig(config: AudioStreamConfig): void;
|
|
317
|
+
/**
|
|
318
|
+
* Send a raw audio chunk through the bidi stream.
|
|
319
|
+
* The chunk's sequence number should increase monotonically.
|
|
320
|
+
*/
|
|
321
|
+
sendAudioChunk(chunk: AudioChunk): void;
|
|
322
|
+
/**
|
|
323
|
+
* Signal end of the current audio segment by sending an empty chunk with done=true.
|
|
324
|
+
* The receiver should process all accumulated audio (e.g. run STT).
|
|
325
|
+
* After this, more audio can follow — either new config or more chunks.
|
|
326
|
+
*/
|
|
327
|
+
endAudio(): void;
|
|
328
|
+
/**
|
|
329
|
+
* Converts incoming audioChunk events into a Web Streams API ReadableStream.
|
|
330
|
+
*
|
|
331
|
+
* This is the primary integration point with Mastra's voice system. The agent
|
|
332
|
+
* listens for the 'audioConfig' event to know the format, then calls this
|
|
333
|
+
* method to get a stream it can pass directly to voice.listen():
|
|
334
|
+
*
|
|
335
|
+
* ```typescript
|
|
336
|
+
* conversation.on('audioConfig', async (config) => {
|
|
337
|
+
* const audioStream = conversation.audioAsReadable();
|
|
338
|
+
* const filetype = audioEncodingToFiletype(config.encoding);
|
|
339
|
+
* const transcript = await agent.voice.listen(audioStream, { filetype });
|
|
340
|
+
* // ... process transcript
|
|
341
|
+
* });
|
|
342
|
+
* ```
|
|
343
|
+
*
|
|
344
|
+
* The ReadableStream:
|
|
345
|
+
* - Yields Uint8Array chunks as audioChunk events arrive
|
|
346
|
+
* - Closes when an AudioChunk with done=true arrives (end of segment)
|
|
347
|
+
* - Closes when the ConversationStream emits 'end' (intentional close)
|
|
348
|
+
* - Errors when the ConversationStream emits 'error'
|
|
349
|
+
* - Properly cleans up all event listeners on close, error, or cancel
|
|
350
|
+
*
|
|
351
|
+
* @returns A ReadableStream<Uint8Array> suitable for Mastra voice.listen()
|
|
352
|
+
*/
|
|
353
|
+
audioAsReadable(): ReadableStream<Uint8Array>;
|
|
247
354
|
/**
|
|
248
355
|
* End the stream intentionally. Emits 'end' and prevents any further reconnects.
|
|
249
356
|
*/
|
package/dist/messaging-client.js
CHANGED
|
@@ -34,10 +34,31 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.Helpers = exports.MessageStream = exports.ConversationStream = exports.MessagingClient = void 0;
|
|
37
|
+
exports.audioEncodingToFiletype = audioEncodingToFiletype;
|
|
37
38
|
const grpc = __importStar(require("@grpc/grpc-js"));
|
|
38
39
|
const protoLoader = __importStar(require("@grpc/proto-loader"));
|
|
39
40
|
const path_1 = require("path");
|
|
40
41
|
const events_1 = require("events");
|
|
42
|
+
/**
|
|
43
|
+
* Maps an AudioEncoding to the filetype string expected by Mastra's voice.listen().
|
|
44
|
+
*
|
|
45
|
+
* Usage:
|
|
46
|
+
* const filetype = audioEncodingToFiletype(config.encoding);
|
|
47
|
+
* const transcript = await agent.voice.listen(audioStream, { filetype });
|
|
48
|
+
*/
|
|
49
|
+
function audioEncodingToFiletype(encoding) {
|
|
50
|
+
const map = {
|
|
51
|
+
LINEAR16: 'wav',
|
|
52
|
+
MULAW: 'wav',
|
|
53
|
+
OPUS: 'opus',
|
|
54
|
+
MP3: 'mp3',
|
|
55
|
+
WEBM_OPUS: 'webm',
|
|
56
|
+
OGG_OPUS: 'ogg',
|
|
57
|
+
FLAC: 'flac',
|
|
58
|
+
AAC: 'm4a',
|
|
59
|
+
};
|
|
60
|
+
return map[encoding] ?? 'wav';
|
|
61
|
+
}
|
|
41
62
|
// gRPC status codes: DEADLINE_EXCEEDED=4, INTERNAL=13, UNAVAILABLE=14, RESOURCE_EXHAUSTED=8
|
|
42
63
|
const DEFAULT_RETRYABLE_STATUS_CODES = [4, 8, 13, 14];
|
|
43
64
|
function resolveReconnectOptions(options) {
|
|
@@ -234,6 +255,17 @@ class ConversationStream extends events_1.EventEmitter {
|
|
|
234
255
|
attachHandlers(stream) {
|
|
235
256
|
stream.on('data', (response) => {
|
|
236
257
|
this.retryCount = 0;
|
|
258
|
+
// Emit audio-specific events if present.
|
|
259
|
+
// The server sends audio data through the bidi stream as AgentResponse
|
|
260
|
+
// messages with audioConfig or audioChunk payloads. We emit dedicated
|
|
261
|
+
// events for these so the agent can handle audio separately from text,
|
|
262
|
+
// while still emitting the generic 'response' event for observability.
|
|
263
|
+
if (response.audioConfig) {
|
|
264
|
+
this.emit('audioConfig', response.audioConfig);
|
|
265
|
+
}
|
|
266
|
+
else if (response.audioChunk) {
|
|
267
|
+
this.emit('audioChunk', response.audioChunk);
|
|
268
|
+
}
|
|
237
269
|
this.emit('response', response);
|
|
238
270
|
});
|
|
239
271
|
stream.on('error', (error) => {
|
|
@@ -345,6 +377,131 @@ class ConversationStream extends events_1.EventEmitter {
|
|
|
345
377
|
status,
|
|
346
378
|
});
|
|
347
379
|
}
|
|
380
|
+
/**
|
|
381
|
+
* Send a transcript of the user's audio input back to the platform.
|
|
382
|
+
*
|
|
383
|
+
* After the agent runs STT on the audio, it calls this to send the transcribed
|
|
384
|
+
* text back to the platform (web adapter). The platform uses it to replace the
|
|
385
|
+
* "[audio]" placeholder message with the actual spoken text in the chat UI.
|
|
386
|
+
*
|
|
387
|
+
* @param conversationId - The conversation this transcript belongs to
|
|
388
|
+
* @param text - The transcribed text from STT
|
|
389
|
+
* @param messageId - Optional: the original "[audio]" message ID to update
|
|
390
|
+
* @param language - Optional: BCP-47 language detected by STT (e.g. "en-US")
|
|
391
|
+
*/
|
|
392
|
+
sendTranscript(conversationId, text, messageId, language) {
|
|
393
|
+
this.sendAgentResponse({
|
|
394
|
+
conversationId,
|
|
395
|
+
transcript: { text, messageId, language },
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
// --- Audio support ---
|
|
399
|
+
//
|
|
400
|
+
// These methods handle sending audio data through the gRPC bidi stream.
|
|
401
|
+
// Two directions:
|
|
402
|
+
// - Agent → Server (sendAudioConfig/sendAudioChunk/endAudio): used when the
|
|
403
|
+
// agent needs to forward audio upstream (less common)
|
|
404
|
+
// - Server → Agent (audioConfig/audioChunk events + audioAsReadable): the main
|
|
405
|
+
// path where the server forwards client mic audio to the agent for STT
|
|
406
|
+
/**
|
|
407
|
+
* Send an audio stream config through the bidi stream.
|
|
408
|
+
* Must be called before sendAudioChunk() so the receiver knows the encoding.
|
|
409
|
+
*/
|
|
410
|
+
sendAudioConfig(config) {
|
|
411
|
+
this.write({ audioConfig: config });
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Send a raw audio chunk through the bidi stream.
|
|
415
|
+
* The chunk's sequence number should increase monotonically.
|
|
416
|
+
*/
|
|
417
|
+
sendAudioChunk(chunk) {
|
|
418
|
+
this.write({ audio: chunk });
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Signal end of the current audio segment by sending an empty chunk with done=true.
|
|
422
|
+
* The receiver should process all accumulated audio (e.g. run STT).
|
|
423
|
+
* After this, more audio can follow — either new config or more chunks.
|
|
424
|
+
*/
|
|
425
|
+
endAudio() {
|
|
426
|
+
this.write({ audio: { data: Buffer.alloc(0), done: true } });
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Converts incoming audioChunk events into a Web Streams API ReadableStream.
|
|
430
|
+
*
|
|
431
|
+
* This is the primary integration point with Mastra's voice system. The agent
|
|
432
|
+
* listens for the 'audioConfig' event to know the format, then calls this
|
|
433
|
+
* method to get a stream it can pass directly to voice.listen():
|
|
434
|
+
*
|
|
435
|
+
* ```typescript
|
|
436
|
+
* conversation.on('audioConfig', async (config) => {
|
|
437
|
+
* const audioStream = conversation.audioAsReadable();
|
|
438
|
+
* const filetype = audioEncodingToFiletype(config.encoding);
|
|
439
|
+
* const transcript = await agent.voice.listen(audioStream, { filetype });
|
|
440
|
+
* // ... process transcript
|
|
441
|
+
* });
|
|
442
|
+
* ```
|
|
443
|
+
*
|
|
444
|
+
* The ReadableStream:
|
|
445
|
+
* - Yields Uint8Array chunks as audioChunk events arrive
|
|
446
|
+
* - Closes when an AudioChunk with done=true arrives (end of segment)
|
|
447
|
+
* - Closes when the ConversationStream emits 'end' (intentional close)
|
|
448
|
+
* - Errors when the ConversationStream emits 'error'
|
|
449
|
+
* - Properly cleans up all event listeners on close, error, or cancel
|
|
450
|
+
*
|
|
451
|
+
* @returns A ReadableStream<Uint8Array> suitable for Mastra voice.listen()
|
|
452
|
+
*/
|
|
453
|
+
audioAsReadable() {
|
|
454
|
+
// Centralized cleanup to prevent listener leaks. Called on:
|
|
455
|
+
// - done=true chunk (normal completion)
|
|
456
|
+
// - stream 'end' event (intentional close)
|
|
457
|
+
// - stream 'error' event
|
|
458
|
+
// - ReadableStream cancel() (consumer gave up, e.g. reader.cancel())
|
|
459
|
+
const cleanup = () => {
|
|
460
|
+
this.removeListener('audioChunk', onChunk);
|
|
461
|
+
this.removeListener('end', onEnd);
|
|
462
|
+
this.removeListener('error', onError);
|
|
463
|
+
};
|
|
464
|
+
const onChunk = (chunk) => {
|
|
465
|
+
if (chunk.done) {
|
|
466
|
+
cleanup();
|
|
467
|
+
try {
|
|
468
|
+
controller.close();
|
|
469
|
+
}
|
|
470
|
+
catch { }
|
|
471
|
+
}
|
|
472
|
+
else {
|
|
473
|
+
controller.enqueue(new Uint8Array(chunk.data));
|
|
474
|
+
}
|
|
475
|
+
};
|
|
476
|
+
const onEnd = () => {
|
|
477
|
+
cleanup();
|
|
478
|
+
try {
|
|
479
|
+
controller.close();
|
|
480
|
+
}
|
|
481
|
+
catch { }
|
|
482
|
+
};
|
|
483
|
+
const onError = (err) => {
|
|
484
|
+
cleanup();
|
|
485
|
+
try {
|
|
486
|
+
controller.error(err);
|
|
487
|
+
}
|
|
488
|
+
catch { }
|
|
489
|
+
};
|
|
490
|
+
let controller;
|
|
491
|
+
return new ReadableStream({
|
|
492
|
+
start: (ctrl) => {
|
|
493
|
+
controller = ctrl;
|
|
494
|
+
this.on('audioChunk', onChunk);
|
|
495
|
+
this.once('end', onEnd);
|
|
496
|
+
this.once('error', onError);
|
|
497
|
+
},
|
|
498
|
+
cancel: () => {
|
|
499
|
+
// Consumer cancelled (e.g. reader.cancel()) — remove all listeners
|
|
500
|
+
// to prevent memory leaks
|
|
501
|
+
cleanup();
|
|
502
|
+
},
|
|
503
|
+
});
|
|
504
|
+
}
|
|
348
505
|
/**
|
|
349
506
|
* End the stream intentionally. Emits 'end' and prevents any further reconnects.
|
|
350
507
|
*/
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Audio streaming types for the messaging system.
|
|
2
|
+
//
|
|
3
|
+
// These types enable raw audio input from any frontend (browser, phone, mobile app)
|
|
4
|
+
// to be streamed through the messaging server to an agent. The messaging system is
|
|
5
|
+
// a pass-through — it does NOT perform speech-to-text, transcoding, or voice activity
|
|
6
|
+
// detection. The agent handles STT via Mastra's voice provider abstraction.
|
|
7
|
+
//
|
|
8
|
+
// Data flow:
|
|
9
|
+
// Client (mic) → WebSocket → Server → gRPC (these types) → Agent → Mastra voice.listen()
|
|
10
|
+
//
|
|
11
|
+
// Two ways audio enters the system:
|
|
12
|
+
// 1. ProcessAudioStream RPC: dedicated audio-only streaming (AudioStreamRequest)
|
|
13
|
+
// 2. ProcessConversation RPC: audio mixed into the bidi stream (ConversationRequest.audio_config/audio)
|
|
14
|
+
//
|
|
15
|
+
// Both converge on the same types: AudioStreamConfig describes the format,
|
|
16
|
+
// AudioChunk carries the bytes, and done=true signals end of an utterance.
|
|
17
|
+
|
|
18
|
+
syntax = "proto3";
|
|
19
|
+
|
|
20
|
+
package astro.messaging.v1;
|
|
21
|
+
|
|
22
|
+
option go_package = "github.com/postman/astro/messaging/v1;messagingv1";
|
|
23
|
+
|
|
24
|
+
// Audio encoding format — covers browser, telephony, and mobile sources.
|
|
25
|
+
// The agent uses this to configure the STT provider (e.g. Whisper, Deepgram).
|
|
26
|
+
enum AudioEncoding {
|
|
27
|
+
AUDIO_ENCODING_UNSPECIFIED = 0;
|
|
28
|
+
LINEAR16 = 1; // PCM signed 16-bit little-endian — universal baseline, any platform
|
|
29
|
+
MULAW = 2; // G.711 mu-law — Twilio and traditional telephony (8kHz)
|
|
30
|
+
OPUS = 3; // Raw Opus frames — low-latency codec
|
|
31
|
+
MP3 = 4; // MP3 — batch uploads, pre-recorded audio
|
|
32
|
+
WEBM_OPUS = 5; // WebM container with Opus — browser MediaRecorder default
|
|
33
|
+
OGG_OPUS = 6; // OGG container with Opus — Firefox MediaRecorder
|
|
34
|
+
FLAC = 7; // FLAC lossless — high-quality uploads
|
|
35
|
+
AAC = 8; // AAC — iOS native recording
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Sent once at the start of an audio segment to tell the agent what format
|
|
39
|
+
// the subsequent AudioChunk bytes are in. Without this, the agent can't
|
|
40
|
+
// decode the raw bytes.
|
|
41
|
+
message AudioStreamConfig {
|
|
42
|
+
AudioEncoding encoding = 1; // What codec the audio bytes use
|
|
43
|
+
int32 sample_rate = 2; // Hz: 8000 (telephony), 16000 (speech), 48000 (browser)
|
|
44
|
+
int32 channels = 3; // 1 = mono (speech default), 2 = stereo
|
|
45
|
+
string language = 4; // BCP-47 hint for STT, e.g. "en-US" (optional)
|
|
46
|
+
string conversation_id = 5; // Links this audio to an existing conversation
|
|
47
|
+
|
|
48
|
+
// Source metadata — helps the agent pick the right STT config.
|
|
49
|
+
// Examples: "browser", "twilio", "vonage", "mobile", "upload"
|
|
50
|
+
string source = 6;
|
|
51
|
+
|
|
52
|
+
// Identity of the user sending audio, so the agent knows who is speaking
|
|
53
|
+
// without relying solely on the placeholder "[audio]" message.
|
|
54
|
+
string user_id = 7;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// A chunk of raw audio bytes in the encoding specified by AudioStreamConfig.
|
|
58
|
+
//
|
|
59
|
+
// Chunks arrive in order during a segment. When done=true, the segment is
|
|
60
|
+
// complete and the agent should run STT on all accumulated chunks.
|
|
61
|
+
// The data field may be empty on the final done=true chunk.
|
|
62
|
+
message AudioChunk {
|
|
63
|
+
bytes data = 1; // Raw audio bytes (pass-through, no transcoding)
|
|
64
|
+
int64 sequence = 2; // Monotonic sequence number for ordering
|
|
65
|
+
bool done = 3; // true = end of segment, process accumulated audio
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Wrapper for the ProcessAudioStream RPC (dedicated audio streaming).
|
|
69
|
+
// The first message MUST be config, all subsequent messages are audio chunks.
|
|
70
|
+
message AudioStreamRequest {
|
|
71
|
+
oneof request {
|
|
72
|
+
AudioStreamConfig config = 1; // First message: tells agent the audio format
|
|
73
|
+
AudioChunk audio = 2; // Subsequent: raw audio bytes
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -4,6 +4,7 @@ package astro.messaging.v1;
|
|
|
4
4
|
|
|
5
5
|
import "google/protobuf/timestamp.proto";
|
|
6
6
|
import "astro/messaging/v1/message.proto";
|
|
7
|
+
import "astro/messaging/v1/audio.proto";
|
|
7
8
|
|
|
8
9
|
option go_package = "github.com/postman/astro/messaging/v1;messagingv1";
|
|
9
10
|
|
|
@@ -22,6 +23,9 @@ message AgentResponse {
|
|
|
22
23
|
ThreadMetadata thread_metadata = 7; // Thread title, creation
|
|
23
24
|
ErrorResponse error = 8; // Error during processing
|
|
24
25
|
ThreadHistoryRequest context_request = 9; // Request cached context (optional)
|
|
26
|
+
Transcript transcript = 10; // Audio transcript (agent → platform)
|
|
27
|
+
AudioStreamConfig audio_config = 11; // Audio session config (server → agent)
|
|
28
|
+
AudioChunk audio_chunk = 12; // Audio data (server → agent)
|
|
25
29
|
}
|
|
26
30
|
}
|
|
27
31
|
|
|
@@ -144,6 +148,14 @@ message ThreadMetadata {
|
|
|
144
148
|
bool create_new = 3; // Create new thread
|
|
145
149
|
}
|
|
146
150
|
|
|
151
|
+
// Transcript of user audio input (agent → platform after STT)
|
|
152
|
+
// Used to update a placeholder message with the actual transcribed text
|
|
153
|
+
message Transcript {
|
|
154
|
+
string text = 1; // Transcribed text
|
|
155
|
+
string message_id = 2; // User message ID to update (optional)
|
|
156
|
+
string language = 3; // Detected language BCP-47 (optional)
|
|
157
|
+
}
|
|
158
|
+
|
|
147
159
|
// Error response from agent
|
|
148
160
|
message ErrorResponse {
|
|
149
161
|
enum ErrorCode {
|
|
@@ -5,6 +5,7 @@ package astro.messaging.v1;
|
|
|
5
5
|
import "astro/messaging/v1/message.proto";
|
|
6
6
|
import "astro/messaging/v1/response.proto";
|
|
7
7
|
import "astro/messaging/v1/feedback.proto";
|
|
8
|
+
import "astro/messaging/v1/audio.proto";
|
|
8
9
|
import "astro/messaging/v1/config.proto";
|
|
9
10
|
import "google/protobuf/timestamp.proto";
|
|
10
11
|
|
|
@@ -29,6 +30,11 @@ service AgentMessaging {
|
|
|
29
30
|
rpc GetConversationMetadata(ConversationMetadataRequest)
|
|
30
31
|
returns (ConversationMetadataResponse);
|
|
31
32
|
|
|
33
|
+
// Audio: client streams raw audio, server responds with text
|
|
34
|
+
// First message MUST be AudioStreamConfig, rest are AudioChunks
|
|
35
|
+
rpc ProcessAudioStream(stream AudioStreamRequest)
|
|
36
|
+
returns (stream AgentResponse);
|
|
37
|
+
|
|
32
38
|
// Health check
|
|
33
39
|
rpc HealthCheck(HealthCheckRequest)
|
|
34
40
|
returns (HealthCheckResponse);
|
|
@@ -41,6 +47,8 @@ message ConversationRequest {
|
|
|
41
47
|
PlatformFeedback feedback = 2;
|
|
42
48
|
AgentConfig agent_config = 3;
|
|
43
49
|
AgentResponse agent_response = 4;
|
|
50
|
+
AudioStreamConfig audio_config = 5; // Start audio within conversation
|
|
51
|
+
AudioChunk audio = 6; // Audio data within conversation
|
|
44
52
|
}
|
|
45
53
|
}
|
|
46
54
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@astropods/messaging",
|
|
3
3
|
"license": "Apache-2.0",
|
|
4
|
-
"version": "0.0.
|
|
4
|
+
"version": "0.0.4",
|
|
5
5
|
"description": "TypeScript SDK for Astro Messaging",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
],
|
|
11
11
|
"scripts": {
|
|
12
12
|
"postinstall": "rm -rf proto && ln -sf ../../proto proto",
|
|
13
|
-
"build": "tsc && cp -r ../../proto dist/proto",
|
|
13
|
+
"build": "tsc && rm -rf dist/proto && cp -r ../../proto dist/proto",
|
|
14
14
|
"watch": "tsc --watch",
|
|
15
15
|
"test": "bun test",
|
|
16
16
|
"test:watch": "bun test --watch"
|