@deepgram/sdk 3.12.1 → 3.13.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/main/DeepgramClient.d.ts.map +1 -1
  2. package/dist/main/DeepgramClient.js +1 -1
  3. package/dist/main/DeepgramClient.js.map +1 -1
  4. package/dist/main/lib/enums/AgentEvents.d.ts +5 -5
  5. package/dist/main/lib/enums/AgentEvents.d.ts.map +1 -1
  6. package/dist/main/lib/enums/AgentEvents.js +5 -5
  7. package/dist/main/lib/enums/AgentEvents.js.map +1 -1
  8. package/dist/main/lib/types/AgentLiveSchema.d.ts +98 -95
  9. package/dist/main/lib/types/AgentLiveSchema.d.ts.map +1 -1
  10. package/dist/main/lib/types/FunctionCallResponse.d.ts +6 -2
  11. package/dist/main/lib/types/FunctionCallResponse.d.ts.map +1 -1
  12. package/dist/main/lib/version.d.ts +1 -1
  13. package/dist/main/lib/version.js +1 -1
  14. package/dist/main/packages/AgentLiveClient.d.ts +1 -17
  15. package/dist/main/packages/AgentLiveClient.d.ts.map +1 -1
  16. package/dist/main/packages/AgentLiveClient.js +8 -28
  17. package/dist/main/packages/AgentLiveClient.js.map +1 -1
  18. package/dist/module/DeepgramClient.d.ts.map +1 -1
  19. package/dist/module/DeepgramClient.js +1 -1
  20. package/dist/module/DeepgramClient.js.map +1 -1
  21. package/dist/module/lib/enums/AgentEvents.d.ts +5 -5
  22. package/dist/module/lib/enums/AgentEvents.d.ts.map +1 -1
  23. package/dist/module/lib/enums/AgentEvents.js +5 -5
  24. package/dist/module/lib/enums/AgentEvents.js.map +1 -1
  25. package/dist/module/lib/types/AgentLiveSchema.d.ts +98 -95
  26. package/dist/module/lib/types/AgentLiveSchema.d.ts.map +1 -1
  27. package/dist/module/lib/types/FunctionCallResponse.d.ts +6 -2
  28. package/dist/module/lib/types/FunctionCallResponse.d.ts.map +1 -1
  29. package/dist/module/lib/version.d.ts +1 -1
  30. package/dist/module/lib/version.js +1 -1
  31. package/dist/module/packages/AgentLiveClient.d.ts +1 -17
  32. package/dist/module/packages/AgentLiveClient.d.ts.map +1 -1
  33. package/dist/module/packages/AgentLiveClient.js +8 -28
  34. package/dist/module/packages/AgentLiveClient.js.map +1 -1
  35. package/dist/umd/deepgram.js +1 -1
  36. package/package.json +1 -1
  37. package/src/DeepgramClient.ts +1 -1
  38. package/src/lib/enums/AgentEvents.ts +5 -5
  39. package/src/lib/types/AgentLiveSchema.ts +98 -105
  40. package/src/lib/types/FunctionCallResponse.ts +6 -2
  41. package/src/lib/version.ts +1 -1
  42. package/src/packages/AgentLiveClient.ts +13 -28
@@ -1,20 +1,3 @@
1
- type AudioFormat =
2
- | {
3
- encoding: "linear16";
4
- container: "wav" | "none";
5
- sampleRate: 8000 | 16000 | 24000 | 32000 | 48000;
6
- }
7
- | {
8
- encoding: "mulaw";
9
- container: "wav" | "none";
10
- sampleRate: 8000 | 16000;
11
- }
12
- | {
13
- encoding: "alaw";
14
- container: "wav" | "none";
15
- sampleRate: 8000 | 16000;
16
- };
17
-
18
1
  type AudioEncoding =
19
2
  | "linear16"
20
3
  | "flac"
@@ -76,119 +59,129 @@ type SpeakModel =
76
59
  | "aura-zeus-en"
77
60
  | string;
78
61
 
79
- interface ThinkModelFunction {
80
- name: string;
81
- description: string;
82
- url: string;
83
- headers: [
84
- {
85
- key: "authorization";
86
- value: string;
87
- }
88
- ];
89
- method: "POST";
90
- parameters: {
91
- type: string;
92
- properties: Record<
93
- string,
94
- {
95
- type: string;
96
- description: string;
97
- }
98
- >;
99
- };
100
- }
101
-
102
- type ThinkModel =
103
- | {
104
- provider: {
105
- type: "open_ai";
106
- };
107
- model: "gpt-4o-mini";
108
- instructions?: string;
109
- functions?: ThinkModelFunction[];
110
- }
111
- | {
112
- provider: {
113
- type: "anthropic";
114
- };
115
- model: "claude-3-haiku-20240307";
116
- instructions?: string;
117
- functions?: ThinkModelFunction[];
118
- }
119
- | {
120
- provider: {
121
- type: "groq";
122
- };
123
- model: "";
124
- instructions?: string;
125
- functions?: ThinkModelFunction[];
126
- }
127
- | {
128
- provider: {
129
- type: "custom";
130
- url: string;
131
- key: string;
132
- };
133
- model: string;
134
- instructions?: string;
135
- functions?: ThinkModelFunction[];
136
- };
137
-
138
62
  /**
139
63
  * @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#settingsconfiguration
140
64
  */
141
65
  interface AgentLiveSchema extends Record<string, unknown> {
66
+ /**
67
+ * Set to true to enable experimental features.
68
+ * @default false
69
+ */
70
+ experimental?: boolean;
142
71
  audio: {
143
72
  input?: {
144
73
  /**
145
- * @default 1
74
+ * @default "linear16"
146
75
  */
147
- channels?: number;
148
76
  encoding: AudioEncoding;
149
77
  /**
150
- * @default false
78
+ * @default 16000
151
79
  */
152
- multichannel?: boolean;
153
- sampleRate: number;
80
+ sample_rate: number;
154
81
  };
155
82
  /**
156
83
  * @see https://developers.deepgram.com/docs/tts-media-output-settings#audio-format-combinations
157
84
  */
158
- output?: AudioFormat;
159
- };
160
- agent: {
161
- listen: {
85
+ output?: {
86
+ encoding?: string;
87
+ sample_rate?: number;
88
+ bitrate?: number;
162
89
  /**
163
- * @see https://developers.deepgram.com/docs/model
90
+ * @default "none"
164
91
  */
165
- model: ListenModel;
166
- /**
167
- * @see https://developers.deepgram.com/docs/keyterm
168
- */
169
- keyterms?: string[];
92
+ container?: string;
170
93
  };
171
- speak: {
94
+ };
95
+ agent: {
96
+ language?: {
172
97
  /**
173
- * @see https://developers.deepgram.com/docs/tts-models
98
+ * ISO 639-1 language code for agent language.
99
+ * @default "en"
174
100
  */
175
- model: SpeakModel;
101
+ type: string;
102
+ };
103
+ listen?: {
104
+ provider: {
105
+ type: "deepgram";
106
+ /**
107
+ * @see https://developers.deepgram.com/docs/model
108
+ */
109
+ model: ListenModel;
110
+ /**
111
+ * Only available for Nova 3.
112
+ * @see https://developers.deepgram.com/docs/keyterm
113
+ */
114
+ keyterms?: string[];
115
+ };
116
+ };
117
+ speak?: {
118
+ provider: {
119
+ type: "deepgram" | "eleven_labs" | "cartesia" | "open_ai" | string;
120
+ /**
121
+ * Deepgram OR OpenAI model to use.
122
+ */
123
+ model?: SpeakModel;
124
+ /**
125
+ * Eleven Labs OR Cartesia model to use.
126
+ */
127
+ model_id?: string;
128
+ /**
129
+ * Cartesia voice configuration.
130
+ */
131
+ voice?: {
132
+ mode: string;
133
+ id: string;
134
+ };
135
+ /**
136
+ * Optional Cartesia language.
137
+ */
138
+ language?: string;
139
+ /**
140
+ * Optional Eleven Labs voice.
141
+ */
142
+ language_code?: string;
143
+ };
144
+ endpoint?: {
145
+ url?: string;
146
+ headers?: Record<string, string>;
147
+ };
176
148
  };
177
149
  /**
178
150
  * @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#supported-llm-providers-and-models
179
151
  */
180
- think: ThinkModel;
181
- };
182
- context?: {
183
- /**
184
- * LLM message history (e.g. to restore existing conversation if websocket disconnects)
185
- */
186
- messages: { role: "user" | "assistant"; content: string }[];
187
- /**
188
- * Whether to replay the last message, if it is an assistant message.
189
- */
190
- replay: boolean;
152
+ think?: {
153
+ provider: {
154
+ type: "deepgram" | "open_ai" | "anthropic" | "x_ai" | string;
155
+ model: string;
156
+ /**
157
+ * 0-2 for OpenAI, 0-1 for Anthropic.
158
+ */
159
+ temperature?: number;
160
+ };
161
+ /**
162
+ * Optional ONLY if LLM provider is Deepgram.
163
+ */
164
+ endpoint?: {
165
+ url?: string;
166
+ headers?: Record<string, string>;
167
+ };
168
+ functions?: {
169
+ name?: string;
170
+ description?: string;
171
+ parameters?: Record<string, unknown>;
172
+ endpoint?: {
173
+ url?: string;
174
+ method?: string;
175
+ headers?: Record<string, string>;
176
+ };
177
+ }[];
178
+ prompt?: string;
179
+ };
191
180
  };
181
+ /**
182
+ * Optional message the agent will say at the start of the connection.
183
+ */
184
+ greeting?: string;
192
185
  }
193
186
 
194
187
  export type { AgentLiveSchema, SpeakModel };
@@ -5,9 +5,13 @@ export interface FunctionCallResponse {
5
5
  /**
6
6
  * This must be the ID that was received in the request.
7
7
  */
8
- function_call_id: string;
8
+ id: string;
9
+ /**
10
+ * The name of the function being called.
11
+ */
12
+ name: string;
9
13
  /**
10
14
  * The result of the function call.
11
15
  */
12
- output: string;
16
+ content: string;
13
17
  }
@@ -1 +1 @@
1
- export const version = "3.12.1";
1
+ export const version = "3.13.0-beta.2";
@@ -12,7 +12,7 @@ import { AbstractLiveClient } from "./AbstractLiveClient";
12
12
  export class AgentLiveClient extends AbstractLiveClient {
13
13
  public namespace: string = "agent";
14
14
 
15
- constructor(options: DeepgramClientOptions, endpoint: string = "/agent") {
15
+ constructor(options: DeepgramClientOptions, endpoint: string = "/:version/agent/converse") {
16
16
  super(options);
17
17
  this.baseUrl = options.agent?.websocket?.options?.url ?? DEFAULT_AGENT_URL;
18
18
 
@@ -59,6 +59,7 @@ export class AgentLiveClient extends AbstractLiveClient {
59
59
  } catch (error) {
60
60
  this.emit(AgentEvents.Error, {
61
61
  event,
62
+ data: event.data,
62
63
  message: "Unable to parse `data` as JSON.",
63
64
  error,
64
65
  });
@@ -104,41 +105,27 @@ export class AgentLiveClient extends AbstractLiveClient {
104
105
  * To be called with your model configuration BEFORE sending
105
106
  * any audio data.
106
107
  * @param options - The SettingsConfiguration object.
107
- * @param options.audio.input.encoding - The encoding for your inbound (user) audio.
108
- * @param options.audio.input.sampleRate - The sample rate for your inbound (user) audio.
109
- * @param options.audio.output.encoding - The encoding for your outbound (agent) audio.
110
- * @param options.audio.output.sampleRate - The sample rate for your outbound (agent) audio.
111
- * @param options.audio.output.bitrate - The bitrate for your outbound (agent) audio.
112
- * @param options.audio.output.container - The container for your outbound (agent) audio.
113
- * @param options.agent.listen.model - The STT model to use for processing user audio.
114
- * @param options.agent.speak.model - The TTS model to use for generating agent audio.
115
- * @param options.agent.think.provider.type - The LLM provider to use.
116
- * @param options.agent.think.model - The LLM model to use.
117
- * @param options.agent.think.instructions - The instructions to provide to the LLM.
118
- * @param options.agent.think.functions - The functions to provide to the LLM.
119
- * @param options.context.messages - The message history to provide to the LLM (useful if a websocket connection is lost.)
120
- * @param options.context.replay - Whether to replay the last message if it was an assistant message.
121
108
  */
122
109
  public configure(options: AgentLiveSchema): void {
123
- // @ts-expect-error Not every consumer of the SDK is using TypeScript, this conditional exists to catch runtime errors for JS code where there is no compile-time checking.
124
- if (!options.agent.listen.model.startsWith("nova-3") && options.agent.listen.keyterm?.length) {
110
+ if (
111
+ !options.agent.listen?.provider.model.startsWith("nova-3") &&
112
+ options.agent.listen?.provider.keyterms?.length
113
+ ) {
125
114
  throw new DeepgramError("Keyterms are only supported with the Nova 3 models.");
126
115
  }
127
- // Converting the property names...
128
- const opts: Record<string, any> = { ...options };
129
- opts.audio.input["sample_rate"] = options.audio.input?.sampleRate;
130
- delete opts.audio.input.sampleRate;
131
- opts.audio.output["sample_rate"] = options.audio.output?.sampleRate;
132
- delete opts.audio.output.sampleRate;
133
- this.send(JSON.stringify({ type: "SettingsConfiguration", ...opts }));
116
+ const string = JSON.stringify({
117
+ type: "Settings",
118
+ ...options,
119
+ });
120
+ this.send(string);
134
121
  }
135
122
 
136
123
  /**
137
124
  * Provide new instructions to the LLM.
138
125
  * @param instructions - The instructions to provide.
139
126
  */
140
- public updateInstructions(instructions: string): void {
141
- this.send(JSON.stringify({ type: "UpdateInstructions", instructions }));
127
+ public updatePrompt(instructions: string): void {
128
+ this.send(JSON.stringify({ type: "UpdatePrompt", instructions }));
142
129
  }
143
130
 
144
131
  /**
@@ -165,8 +152,6 @@ export class AgentLiveClient extends AbstractLiveClient {
165
152
  /**
166
153
  * Respond to a function call request.
167
154
  * @param response - The response to the function call request.
168
- * @param response.function_call_id - The ID that was received in the request (these MUST match).
169
- * @param response.output - The result of the function call.
170
155
  */
171
156
  public functionCallResponse(response: FunctionCallResponse): void {
172
157
  this.send(JSON.stringify({ type: "FunctionCallResponse", ...response }));