phonic 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,7 +7,6 @@ Node.js library for the Phonic API.
7
7
  - [Usage](#usage)
8
8
  - [Get voices](#get-voices)
9
9
  - [Get voice by id](#get-voice-by-id)
10
- - [Text-to-speech via WebSocket](#text-to-speech-via-websocket)
11
10
  - [Speech-to-speech via WebSocket](#speech-to-speech-via-websocket)
12
11
 
13
12
  ## Installation
@@ -49,7 +48,7 @@ if (error === null) {
49
48
  }
50
49
  ```
51
50
 
52
- ### Speesh-to-speech via WebSocket
51
+ ### Speech-to-speech via WebSocket
53
52
 
54
53
  Open a WebSocket connection:
55
54
 
@@ -98,7 +97,7 @@ phonicWebSocket.onMessage((message) => {
98
97
 
99
98
  case "audio_chunk": {
100
99
  // Send the audio chunk to Twilio, for example:
101
- ws.send(
100
+ twilioWebSocket.send(
102
101
  JSON.stringify({
103
102
  event: "media",
104
103
  streamSid: "...",
@@ -133,99 +132,6 @@ phonicWebSocket.onError((event) => {
133
132
  });
134
133
  ```
135
134
 
136
- ### Text-to-speech via WebSocket
137
-
138
- Open a WebSocket connection:
139
-
140
- ```ts
141
- const { data, error } = await phonic.tts.websocket({
142
- model: "shasta",
143
- output_format: "mulaw_8000",
144
- voice_id: "meredith",
145
- });
146
-
147
- if (error !== null) {
148
- throw new Error(error.message);
149
- }
150
-
151
- // Here we know that the WebSocket connection is open.
152
- const { phonicWebSocket } = data;
153
- ```
154
-
155
- Process audio chunks that Phonic sends back to you, by sending them to Twilio, for example:
156
-
157
- ```ts
158
- phonicWebSocket.onMessage((message) => {
159
- if (message.type === "audio_chunk") {
160
- ws.send(
161
- JSON.stringify({
162
- event: "media",
163
- streamSid: "...",
164
- media: {
165
- payload: message.audio,
166
- },
167
- }),
168
- );
169
- }
170
- });
171
- ```
172
-
173
- Send text chunks to Phonic for audio generation as you receive them from LLM:
174
-
175
- ```ts
176
- const stream = await openai.chat.completions.create(...);
177
-
178
- for await (const chunk of stream) {
179
- const text = chunk.choices[0]?.delta?.content || "";
180
-
181
- if (text) {
182
- phonicWebSocket.generate({ text });
183
- }
184
- }
185
- ```
186
-
187
- Tell Phonic to finish generating audio for all text chunks you've sent:
188
-
189
- ```ts
190
- phonicWebSocket.flush();
191
- ```
192
-
193
- You can also tell Phonic to stop sending audio chunks back, e.g. if the user interrupts the conversation:
194
-
195
- ```ts
196
- phonicWebSocket.stop();
197
- ```
198
-
199
- To close the WebSocket connection:
200
-
201
- ```ts
202
- phonicWebSocket.close();
203
- ```
204
-
205
- To know when the last audio chunk has been received:
206
-
207
- ```ts
208
- phonicWebSocket.onMessage((message) => {
209
- if (message.type === "flushed") {
210
- console.log("Last audio chunk received");
211
- }
212
- });
213
- ```
214
-
215
- You can also listen for close and error events:
216
-
217
- ```ts
218
- phonicWebSocket.onClose((event) => {
219
- console.log(
220
- `Phonic WebSocket closed with code ${event.code} and reason "${event.reason}"`,
221
- );
222
- });
223
-
224
- phonicWebSocket.onError((event) => {
225
- console.log(`Error from Phonic WebSocket: ${event.message}`);
226
- });
227
- ```
228
-
229
135
  ## Publish a new version on npm
230
136
 
231
137
  1. `bunx changeset`
package/dist/index.d.mts CHANGED
@@ -18,24 +18,13 @@ type DataOrError<T> = Promise<{
18
18
  error: ErrorResponse;
19
19
  }>;
20
20
 
21
- type PhonicTTSWebSocketParams = {
22
- model?: string;
23
- output_format?: string;
24
- voice_id?: string;
25
- };
26
- type PhonicTTSWebSocketResponseMessage = {
27
- type: "config";
28
- model: string;
29
- output_format: string;
30
- voice_id: string;
21
+ type PhonicSTSWebSocketResponseMessage = {
22
+ type: "input_text";
23
+ text: string;
31
24
  } | {
32
25
  type: "audio_chunk";
33
- audio: string;
34
26
  text: string;
35
- } | {
36
- type: "flush_confirm";
37
- } | {
38
- type: "stop_confirm";
27
+ audio: string;
39
28
  } | {
40
29
  type: "error";
41
30
  error: {
@@ -43,40 +32,44 @@ type PhonicTTSWebSocketResponseMessage = {
43
32
  code?: string;
44
33
  };
45
34
  paramErrors?: {
46
- model?: string;
47
- output_format?: string;
35
+ system_prompt?: string;
36
+ welcome_message?: string;
48
37
  voice_id?: string;
49
- text?: string;
50
- speed?: string;
38
+ input_format?: string;
39
+ output_format?: string;
51
40
  };
52
41
  };
53
- type OnMessageCallback$1 = (message: PhonicTTSWebSocketResponseMessage) => void;
54
- type OnCloseCallback$1 = (event: WebSocket.CloseEvent) => void;
55
- type OnErrorCallback$1 = (event: WebSocket.ErrorEvent) => void;
42
+ type OnMessageCallback = (message: PhonicSTSWebSocketResponseMessage) => void;
43
+ type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
44
+ type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
56
45
 
57
- declare class PhonicTTSWebSocket {
46
+ declare class PhonicSTSWebSocket {
58
47
  private readonly ws;
59
48
  private onMessageCallback;
60
49
  private onCloseCallback;
61
50
  private onErrorCallback;
62
51
  constructor(ws: WebSocket);
63
- onMessage(callback: OnMessageCallback$1): void;
64
- onClose(callback: OnCloseCallback$1): void;
65
- onError(callback: OnErrorCallback$1): void;
66
- generate(message: {
67
- text: string;
68
- speed?: number;
52
+ onMessage(callback: OnMessageCallback): void;
53
+ onClose(callback: OnCloseCallback): void;
54
+ onError(callback: OnErrorCallback): void;
55
+ config(message: {
56
+ system_prompt?: string;
57
+ welcome_message?: string;
58
+ voice_id?: string;
59
+ input_format?: "pcm_44100" | "mulaw_8000";
60
+ output_format?: "pcm_44100" | "mulaw_8000";
61
+ }): void;
62
+ audioChunk(message: {
63
+ audio: string;
69
64
  }): void;
70
- flush(): void;
71
- stop(): void;
72
65
  close(): void;
73
66
  }
74
67
 
75
- declare class TextToSpeech {
68
+ declare class SpeechToSpeech {
76
69
  private readonly phonic;
77
70
  constructor(phonic: Phonic);
78
- websocket(params?: PhonicTTSWebSocketParams): DataOrError<{
79
- phonicWebSocket: PhonicTTSWebSocket;
71
+ websocket(): DataOrError<{
72
+ phonicWebSocket: PhonicSTSWebSocket;
80
73
  }>;
81
74
  }
82
75
 
@@ -105,7 +98,7 @@ declare class Phonic {
105
98
  readonly baseUrl: string;
106
99
  private readonly headers;
107
100
  readonly voices: Voices;
108
- readonly tts: TextToSpeech;
101
+ readonly sts: SpeechToSpeech;
109
102
  constructor(apiKey: string, config?: PhonicConfig);
110
103
  fetchRequest<T>(path: string, options: FetchOptions): DataOrError<T>;
111
104
  get<T>(path: string): Promise<{
@@ -117,51 +110,4 @@ declare class Phonic {
117
110
  }>;
118
111
  }
119
112
 
120
- type PhonicSTSWebSocketResponseMessage = {
121
- type: "input_text";
122
- text: string;
123
- } | {
124
- type: "audio_chunk";
125
- text: string;
126
- audio: string;
127
- } | {
128
- type: "error";
129
- error: {
130
- message: string;
131
- code?: string;
132
- };
133
- paramErrors?: {
134
- system_prompt?: string;
135
- welcome_message?: string;
136
- voice_id?: string;
137
- input_format?: string;
138
- output_format?: string;
139
- };
140
- };
141
- type OnMessageCallback = (message: PhonicSTSWebSocketResponseMessage) => void;
142
- type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
143
- type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
144
-
145
- declare class PhonicSTSWebSocket {
146
- private readonly ws;
147
- private onMessageCallback;
148
- private onCloseCallback;
149
- private onErrorCallback;
150
- constructor(ws: WebSocket);
151
- onMessage(callback: OnMessageCallback): void;
152
- onClose(callback: OnCloseCallback): void;
153
- onError(callback: OnErrorCallback): void;
154
- config(message: {
155
- system_prompt?: string;
156
- welcome_message?: string;
157
- voice_id?: string;
158
- input_format?: "pcm_44100" | "mulaw_8000";
159
- output_format?: "pcm_44100" | "mulaw_8000";
160
- }): void;
161
- audioChunk(message: {
162
- audio: string;
163
- }): void;
164
- close(): void;
165
- }
166
-
167
- export { Phonic, PhonicSTSWebSocket, PhonicTTSWebSocket };
113
+ export { Phonic, PhonicSTSWebSocket };
package/dist/index.d.ts CHANGED
@@ -18,24 +18,13 @@ type DataOrError<T> = Promise<{
18
18
  error: ErrorResponse;
19
19
  }>;
20
20
 
21
- type PhonicTTSWebSocketParams = {
22
- model?: string;
23
- output_format?: string;
24
- voice_id?: string;
25
- };
26
- type PhonicTTSWebSocketResponseMessage = {
27
- type: "config";
28
- model: string;
29
- output_format: string;
30
- voice_id: string;
21
+ type PhonicSTSWebSocketResponseMessage = {
22
+ type: "input_text";
23
+ text: string;
31
24
  } | {
32
25
  type: "audio_chunk";
33
- audio: string;
34
26
  text: string;
35
- } | {
36
- type: "flush_confirm";
37
- } | {
38
- type: "stop_confirm";
27
+ audio: string;
39
28
  } | {
40
29
  type: "error";
41
30
  error: {
@@ -43,40 +32,44 @@ type PhonicTTSWebSocketResponseMessage = {
43
32
  code?: string;
44
33
  };
45
34
  paramErrors?: {
46
- model?: string;
47
- output_format?: string;
35
+ system_prompt?: string;
36
+ welcome_message?: string;
48
37
  voice_id?: string;
49
- text?: string;
50
- speed?: string;
38
+ input_format?: string;
39
+ output_format?: string;
51
40
  };
52
41
  };
53
- type OnMessageCallback$1 = (message: PhonicTTSWebSocketResponseMessage) => void;
54
- type OnCloseCallback$1 = (event: WebSocket.CloseEvent) => void;
55
- type OnErrorCallback$1 = (event: WebSocket.ErrorEvent) => void;
42
+ type OnMessageCallback = (message: PhonicSTSWebSocketResponseMessage) => void;
43
+ type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
44
+ type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
56
45
 
57
- declare class PhonicTTSWebSocket {
46
+ declare class PhonicSTSWebSocket {
58
47
  private readonly ws;
59
48
  private onMessageCallback;
60
49
  private onCloseCallback;
61
50
  private onErrorCallback;
62
51
  constructor(ws: WebSocket);
63
- onMessage(callback: OnMessageCallback$1): void;
64
- onClose(callback: OnCloseCallback$1): void;
65
- onError(callback: OnErrorCallback$1): void;
66
- generate(message: {
67
- text: string;
68
- speed?: number;
52
+ onMessage(callback: OnMessageCallback): void;
53
+ onClose(callback: OnCloseCallback): void;
54
+ onError(callback: OnErrorCallback): void;
55
+ config(message: {
56
+ system_prompt?: string;
57
+ welcome_message?: string;
58
+ voice_id?: string;
59
+ input_format?: "pcm_44100" | "mulaw_8000";
60
+ output_format?: "pcm_44100" | "mulaw_8000";
61
+ }): void;
62
+ audioChunk(message: {
63
+ audio: string;
69
64
  }): void;
70
- flush(): void;
71
- stop(): void;
72
65
  close(): void;
73
66
  }
74
67
 
75
- declare class TextToSpeech {
68
+ declare class SpeechToSpeech {
76
69
  private readonly phonic;
77
70
  constructor(phonic: Phonic);
78
- websocket(params?: PhonicTTSWebSocketParams): DataOrError<{
79
- phonicWebSocket: PhonicTTSWebSocket;
71
+ websocket(): DataOrError<{
72
+ phonicWebSocket: PhonicSTSWebSocket;
80
73
  }>;
81
74
  }
82
75
 
@@ -105,7 +98,7 @@ declare class Phonic {
105
98
  readonly baseUrl: string;
106
99
  private readonly headers;
107
100
  readonly voices: Voices;
108
- readonly tts: TextToSpeech;
101
+ readonly sts: SpeechToSpeech;
109
102
  constructor(apiKey: string, config?: PhonicConfig);
110
103
  fetchRequest<T>(path: string, options: FetchOptions): DataOrError<T>;
111
104
  get<T>(path: string): Promise<{
@@ -117,51 +110,4 @@ declare class Phonic {
117
110
  }>;
118
111
  }
119
112
 
120
- type PhonicSTSWebSocketResponseMessage = {
121
- type: "input_text";
122
- text: string;
123
- } | {
124
- type: "audio_chunk";
125
- text: string;
126
- audio: string;
127
- } | {
128
- type: "error";
129
- error: {
130
- message: string;
131
- code?: string;
132
- };
133
- paramErrors?: {
134
- system_prompt?: string;
135
- welcome_message?: string;
136
- voice_id?: string;
137
- input_format?: string;
138
- output_format?: string;
139
- };
140
- };
141
- type OnMessageCallback = (message: PhonicSTSWebSocketResponseMessage) => void;
142
- type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
143
- type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
144
-
145
- declare class PhonicSTSWebSocket {
146
- private readonly ws;
147
- private onMessageCallback;
148
- private onCloseCallback;
149
- private onErrorCallback;
150
- constructor(ws: WebSocket);
151
- onMessage(callback: OnMessageCallback): void;
152
- onClose(callback: OnCloseCallback): void;
153
- onError(callback: OnErrorCallback): void;
154
- config(message: {
155
- system_prompt?: string;
156
- welcome_message?: string;
157
- voice_id?: string;
158
- input_format?: "pcm_44100" | "mulaw_8000";
159
- output_format?: "pcm_44100" | "mulaw_8000";
160
- }): void;
161
- audioChunk(message: {
162
- audio: string;
163
- }): void;
164
- close(): void;
165
- }
166
-
167
- export { Phonic, PhonicSTSWebSocket, PhonicTTSWebSocket };
113
+ export { Phonic, PhonicSTSWebSocket };
package/dist/index.js CHANGED
@@ -35,13 +35,13 @@ __export(index_exports, {
35
35
  module.exports = __toCommonJS(index_exports);
36
36
 
37
37
  // package.json
38
- var version = "0.5.0";
38
+ var version = "0.6.0";
39
39
 
40
- // src/tts/index.ts
40
+ // src/sts/index.ts
41
41
  var import_ws = __toESM(require("ws"));
42
42
 
43
- // src/tts/websocket.ts
44
- var PhonicTTSWebSocket = class {
43
+ // src/sts/websocket.ts
44
+ var PhonicSTSWebSocket = class {
45
45
  constructor(ws) {
46
46
  this.ws = ws;
47
47
  this.ws.onmessage = (event) => {
@@ -71,9 +71,8 @@ var PhonicTTSWebSocket = class {
71
71
  this.onMessage = this.onMessage.bind(this);
72
72
  this.onClose = this.onClose.bind(this);
73
73
  this.onError = this.onError.bind(this);
74
- this.generate = this.generate.bind(this);
75
- this.flush = this.flush.bind(this);
76
- this.stop = this.stop.bind(this);
74
+ this.config = this.config.bind(this);
75
+ this.audioChunk = this.audioChunk.bind(this);
77
76
  this.close = this.close.bind(this);
78
77
  }
79
78
  onMessageCallback = null;
@@ -88,41 +87,42 @@ var PhonicTTSWebSocket = class {
88
87
  onError(callback) {
89
88
  this.onErrorCallback = callback;
90
89
  }
91
- generate(message) {
90
+ config(message) {
92
91
  this.ws.send(
93
92
  JSON.stringify({
94
- type: "generate",
93
+ type: "config",
95
94
  ...message
96
95
  })
97
96
  );
98
97
  }
99
- flush() {
100
- this.ws.send(JSON.stringify({ type: "flush" }));
101
- }
102
- stop() {
103
- this.ws.send(JSON.stringify({ type: "stop" }));
98
+ audioChunk(message) {
99
+ this.ws.send(
100
+ JSON.stringify({
101
+ type: "audio_chunk",
102
+ ...message
103
+ })
104
+ );
104
105
  }
105
106
  close() {
106
107
  this.ws.close();
107
108
  }
108
109
  };
109
110
 
110
- // src/tts/index.ts
111
- var TextToSpeech = class {
111
+ // src/sts/index.ts
112
+ var SpeechToSpeech = class {
112
113
  constructor(phonic) {
113
114
  this.phonic = phonic;
114
115
  }
115
- async websocket(params) {
116
+ async websocket() {
116
117
  return new Promise((resolve) => {
117
118
  const wsBaseUrl = this.phonic.baseUrl.replace(/^http/, "ws");
118
- const queryString = new URLSearchParams(params).toString();
119
- const ws = new import_ws.default(`${wsBaseUrl}/v1/tts/ws?${queryString}`, {
119
+ const ws = new import_ws.default(`${wsBaseUrl}/v1/sts/ws`, {
120
120
  headers: {
121
121
  Authorization: `Bearer ${this.phonic.apiKey}`
122
122
  }
123
123
  });
124
124
  ws.onopen = () => {
125
- const phonicWebSocket = new PhonicTTSWebSocket(ws);
125
+ const phonicWebSocket = new PhonicSTSWebSocket(ws);
126
126
  resolve({ data: { phonicWebSocket }, error: null });
127
127
  };
128
128
  ws.onerror = (error) => {
@@ -182,7 +182,7 @@ var Phonic = class {
182
182
  baseUrl;
183
183
  headers;
184
184
  voices = new Voices(this);
185
- tts = new TextToSpeech(this);
185
+ sts = new SpeechToSpeech(this);
186
186
  async fetchRequest(path, options) {
187
187
  try {
188
188
  const response = await fetch(`${this.baseUrl}/v1${path}`, {
package/dist/index.mjs CHANGED
@@ -1,11 +1,11 @@
1
1
  // package.json
2
- var version = "0.5.0";
2
+ var version = "0.6.0";
3
3
 
4
- // src/tts/index.ts
4
+ // src/sts/index.ts
5
5
  import WebSocket from "ws";
6
6
 
7
- // src/tts/websocket.ts
8
- var PhonicTTSWebSocket = class {
7
+ // src/sts/websocket.ts
8
+ var PhonicSTSWebSocket = class {
9
9
  constructor(ws) {
10
10
  this.ws = ws;
11
11
  this.ws.onmessage = (event) => {
@@ -35,9 +35,8 @@ var PhonicTTSWebSocket = class {
35
35
  this.onMessage = this.onMessage.bind(this);
36
36
  this.onClose = this.onClose.bind(this);
37
37
  this.onError = this.onError.bind(this);
38
- this.generate = this.generate.bind(this);
39
- this.flush = this.flush.bind(this);
40
- this.stop = this.stop.bind(this);
38
+ this.config = this.config.bind(this);
39
+ this.audioChunk = this.audioChunk.bind(this);
41
40
  this.close = this.close.bind(this);
42
41
  }
43
42
  onMessageCallback = null;
@@ -52,41 +51,42 @@ var PhonicTTSWebSocket = class {
52
51
  onError(callback) {
53
52
  this.onErrorCallback = callback;
54
53
  }
55
- generate(message) {
54
+ config(message) {
56
55
  this.ws.send(
57
56
  JSON.stringify({
58
- type: "generate",
57
+ type: "config",
59
58
  ...message
60
59
  })
61
60
  );
62
61
  }
63
- flush() {
64
- this.ws.send(JSON.stringify({ type: "flush" }));
65
- }
66
- stop() {
67
- this.ws.send(JSON.stringify({ type: "stop" }));
62
+ audioChunk(message) {
63
+ this.ws.send(
64
+ JSON.stringify({
65
+ type: "audio_chunk",
66
+ ...message
67
+ })
68
+ );
68
69
  }
69
70
  close() {
70
71
  this.ws.close();
71
72
  }
72
73
  };
73
74
 
74
- // src/tts/index.ts
75
- var TextToSpeech = class {
75
+ // src/sts/index.ts
76
+ var SpeechToSpeech = class {
76
77
  constructor(phonic) {
77
78
  this.phonic = phonic;
78
79
  }
79
- async websocket(params) {
80
+ async websocket() {
80
81
  return new Promise((resolve) => {
81
82
  const wsBaseUrl = this.phonic.baseUrl.replace(/^http/, "ws");
82
- const queryString = new URLSearchParams(params).toString();
83
- const ws = new WebSocket(`${wsBaseUrl}/v1/tts/ws?${queryString}`, {
83
+ const ws = new WebSocket(`${wsBaseUrl}/v1/sts/ws`, {
84
84
  headers: {
85
85
  Authorization: `Bearer ${this.phonic.apiKey}`
86
86
  }
87
87
  });
88
88
  ws.onopen = () => {
89
- const phonicWebSocket = new PhonicTTSWebSocket(ws);
89
+ const phonicWebSocket = new PhonicSTSWebSocket(ws);
90
90
  resolve({ data: { phonicWebSocket }, error: null });
91
91
  };
92
92
  ws.onerror = (error) => {
@@ -146,7 +146,7 @@ var Phonic = class {
146
146
  baseUrl;
147
147
  headers;
148
148
  voices = new Voices(this);
149
- tts = new TextToSpeech(this);
149
+ sts = new SpeechToSpeech(this);
150
150
  async fetchRequest(path, options) {
151
151
  try {
152
152
  const response = await fetch(`${this.baseUrl}/v1${path}`, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "phonic",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Phonic Node.js SDK",
5
5
  "scripts": {
6
6
  "build": "tsup",
@@ -51,8 +51,7 @@
51
51
  },
52
52
  "keywords": [
53
53
  "phonic",
54
- "text-to-speech",
55
- "tts",
54
+ "speech-to-speech",
56
55
  "javascript",
57
56
  "typescript",
58
57
  "ai",