phonic 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -50,48 +50,97 @@ if (error === null) {
50
50
 
51
51
  ### Text-to-speech via WebSocket
52
52
 
53
+ Open a WebSocket connection:
54
+
53
55
  ```js
54
- const { data, error } = await phonic.tts.websocket();
56
+ const { data, error } = await phonic.tts.websocket({
57
+ model: "shasta",
58
+ output_format: "mulaw_8000",
59
+ voice_id: "australian-man",
60
+ });
55
61
 
56
- if (error === null) {
57
- const { phonicWebSocket } = data;
58
- const stream = phonicWebSocket.send({
59
- script: "How can I help you today?", // 600 characters max
60
- output_format: "mulaw_8000", // or "pcm_44100"
61
- });
62
-
63
- for await (const data of stream) {
64
- if (data instanceof Buffer) {
65
- // Do something with the audio chunk,
66
- // e.g. send `data.toString("base64")` to Twilio.
67
- }
62
+ if (error !== null) {
63
+ throw new Error(error.message);
64
+ }
65
+
66
+ // Here we know that the WebSocket connection is open.
67
+ const { phonicWebSocket } = data;
68
+ ```
69
+
70
+ Process audio chunks that Phonic sends back to you, by sending them to Twilio, for example:
71
+
72
+ ```js
73
+ phonicWebSocket.onMessage((message) => {
74
+ if (message.type === "audio_chunk") {
75
+ ws.send(
76
+ JSON.stringify({
77
+ event: "media",
78
+ streamSid: "...",
79
+ media: {
80
+ payload: message.audio,
81
+ },
82
+ }),
83
+ );
68
84
  }
85
+ });
86
+ ```
69
87
 
70
- phonicWebSocket.close();
88
+ Send text chunks to Phonic for audio generation as you receive them from LLM:
89
+
90
+ ```js
91
+ const stream = await openai.chat.completions.create(...);
92
+
93
+ for await (const chunk of stream) {
94
+ const text = chunk.choices[0]?.delta?.content || "";
95
+
96
+ if (text) {
97
+ phonicWebSocket.generate({ text });
98
+ }
71
99
  }
72
100
  ```
73
101
 
74
- To perform other work while receiving chunks, use:
102
+ Tell Phonic to finish generating audio for all text chunks you've sent:
103
+
104
+ ```js
105
+ phonicWebSocket.flush();
106
+ ```
107
+
108
+ You can also tell Phonic to stop sending audio chunks back, e.g. if the user interrupts the conversation:
109
+
110
+ ```js
111
+ phonicWebSocket.stop();
112
+ ```
113
+
114
+ To close the WebSocket connection:
115
+
116
+ ```js
117
+ phonicWebSocket.close();
118
+ ```
119
+
120
+ To know when the last audio chunk has been received:
75
121
 
76
122
  ```js
77
- phonicWebSocket.onMessage((data) => {
78
- if (data instanceof Buffer) {
79
- // Do something with the audio chunk,
80
- // e.g. send `data.toString("base64")` to Twilio.
123
+ phonicWebSocket.onMessage((message) => {
124
+ if (message.type === "flushed") {
125
+ console.log("Last audio chunk received");
81
126
  }
82
127
  });
128
+ ```
83
129
 
84
- phonicWebSocket.send({
85
- script: "How can I help you today?",
86
- output_format: "mulaw_8000",
87
- });
130
+ You can also listen for close and error events:
88
131
 
89
- // Perform other work here
132
+ ```js
133
+ phonicWebSocket.onClose((event) => {
134
+ console.log(
135
+ `Phonic WebSocket closed with code ${event.code} and reason "${event.reason}"`,
136
+ );
137
+ });
90
138
 
91
- await phonicWebSocket.streamEnded; // This Promise will be resolved once the last chunk is received
139
+ phonicWebSocket.onError((event) => {
140
+ console.log(`Error from Phonic WebSocket: ${event.message}`);
141
+ });
92
142
  ```
93
143
 
94
-
95
144
  ## License
96
145
 
97
146
  MIT
package/dist/index.d.mts CHANGED
@@ -18,39 +18,62 @@ type DataOrError<T> = Promise<{
18
18
  error: ErrorResponse;
19
19
  }>;
20
20
 
21
- type PhonicWebSocketMessage = {
22
- script: string;
23
- output_format: "pcm_44100" | "mulaw_8000";
21
+ type PhonicWebSocketParams = {
22
+ model?: string;
23
+ output_format?: string;
24
+ voice_id?: string;
24
25
  };
25
26
  type PhonicWebSocketResponseMessage = {
26
- type: "stream-ended";
27
- error?: {
27
+ type: "config";
28
+ model: string;
29
+ output_format: string;
30
+ voice_id: string;
31
+ } | {
32
+ type: "audio_chunk";
33
+ audio: string;
34
+ text: string;
35
+ } | {
36
+ type: "flushed";
37
+ } | {
38
+ type: "error";
39
+ error: {
28
40
  message: string;
29
41
  code?: string;
30
42
  };
31
43
  paramErrors?: {
32
- script?: string;
44
+ model?: string;
33
45
  output_format?: string;
46
+ voice_id?: string;
47
+ text?: string;
48
+ speed?: string;
34
49
  };
35
50
  };
36
- type OnMessageCallback = (data: PhonicWebSocketResponseMessage | Buffer) => void;
51
+ type OnMessageCallback = (message: PhonicWebSocketResponseMessage) => void;
52
+ type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
53
+ type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
37
54
 
38
55
  declare class PhonicWebSocket {
39
56
  private readonly ws;
40
57
  private onMessageCallback;
41
- private streamEndedResolve;
42
- readonly streamEnded: Promise<void>;
43
- private streamController;
58
+ private onCloseCallback;
59
+ private onErrorCallback;
44
60
  constructor(ws: WebSocket);
45
61
  onMessage(callback: OnMessageCallback): void;
46
- send(message: PhonicWebSocketMessage): ReadableStream<PhonicWebSocketResponseMessage | Buffer>;
62
+ onClose(callback: OnCloseCallback): void;
63
+ onError(callback: OnErrorCallback): void;
64
+ generate(message: {
65
+ text: string;
66
+ speed?: number;
67
+ }): void;
68
+ flush(): void;
69
+ stop(): void;
47
70
  close(): void;
48
71
  }
49
72
 
50
73
  declare class TextToSpeech {
51
74
  private readonly phonic;
52
75
  constructor(phonic: Phonic);
53
- websocket(): DataOrError<{
76
+ websocket(params?: PhonicWebSocketParams): DataOrError<{
54
77
  phonicWebSocket: PhonicWebSocket;
55
78
  }>;
56
79
  }
package/dist/index.d.ts CHANGED
@@ -18,39 +18,62 @@ type DataOrError<T> = Promise<{
18
18
  error: ErrorResponse;
19
19
  }>;
20
20
 
21
- type PhonicWebSocketMessage = {
22
- script: string;
23
- output_format: "pcm_44100" | "mulaw_8000";
21
+ type PhonicWebSocketParams = {
22
+ model?: string;
23
+ output_format?: string;
24
+ voice_id?: string;
24
25
  };
25
26
  type PhonicWebSocketResponseMessage = {
26
- type: "stream-ended";
27
- error?: {
27
+ type: "config";
28
+ model: string;
29
+ output_format: string;
30
+ voice_id: string;
31
+ } | {
32
+ type: "audio_chunk";
33
+ audio: string;
34
+ text: string;
35
+ } | {
36
+ type: "flushed";
37
+ } | {
38
+ type: "error";
39
+ error: {
28
40
  message: string;
29
41
  code?: string;
30
42
  };
31
43
  paramErrors?: {
32
- script?: string;
44
+ model?: string;
33
45
  output_format?: string;
46
+ voice_id?: string;
47
+ text?: string;
48
+ speed?: string;
34
49
  };
35
50
  };
36
- type OnMessageCallback = (data: PhonicWebSocketResponseMessage | Buffer) => void;
51
+ type OnMessageCallback = (message: PhonicWebSocketResponseMessage) => void;
52
+ type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
53
+ type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
37
54
 
38
55
  declare class PhonicWebSocket {
39
56
  private readonly ws;
40
57
  private onMessageCallback;
41
- private streamEndedResolve;
42
- readonly streamEnded: Promise<void>;
43
- private streamController;
58
+ private onCloseCallback;
59
+ private onErrorCallback;
44
60
  constructor(ws: WebSocket);
45
61
  onMessage(callback: OnMessageCallback): void;
46
- send(message: PhonicWebSocketMessage): ReadableStream<PhonicWebSocketResponseMessage | Buffer>;
62
+ onClose(callback: OnCloseCallback): void;
63
+ onError(callback: OnErrorCallback): void;
64
+ generate(message: {
65
+ text: string;
66
+ speed?: number;
67
+ }): void;
68
+ flush(): void;
69
+ stop(): void;
47
70
  close(): void;
48
71
  }
49
72
 
50
73
  declare class TextToSpeech {
51
74
  private readonly phonic;
52
75
  constructor(phonic: Phonic);
53
- websocket(): DataOrError<{
76
+ websocket(params?: PhonicWebSocketParams): DataOrError<{
54
77
  phonicWebSocket: PhonicWebSocket;
55
78
  }>;
56
79
  }
package/dist/index.js CHANGED
@@ -35,7 +35,7 @@ __export(src_exports, {
35
35
  module.exports = __toCommonJS(src_exports);
36
36
 
37
37
  // package.json
38
- var version = "0.1.3";
38
+ var version = "0.2.0";
39
39
 
40
40
  // src/tts/index.ts
41
41
  var import_ws = __toESM(require("ws"));
@@ -45,45 +45,58 @@ var PhonicWebSocket = class {
45
45
  constructor(ws) {
46
46
  this.ws = ws;
47
47
  this.ws.onmessage = (event) => {
48
- if (typeof event.data === "string") {
49
- const dataObj = JSON.parse(
50
- event.data
51
- );
52
- this.onMessageCallback?.(dataObj);
53
- if (dataObj.type === "stream-ended") {
54
- this.streamEndedResolve();
55
- this.streamController?.close();
56
- } else {
57
- this.streamController?.enqueue(dataObj);
58
- }
59
- } else if (event.data instanceof Buffer) {
60
- this.onMessageCallback?.(event.data);
61
- this.streamController?.enqueue(event.data);
48
+ if (this.onMessageCallback === null) {
49
+ return;
50
+ }
51
+ if (typeof event.data !== "string") {
52
+ throw new Error("Received non-string message");
53
+ }
54
+ const dataObj = JSON.parse(event.data);
55
+ this.onMessageCallback(dataObj);
56
+ };
57
+ this.ws.onclose = (event) => {
58
+ if (this.onCloseCallback === null) {
59
+ return;
62
60
  }
61
+ this.onCloseCallback(event);
63
62
  };
63
+ this.ws.onerror = (event) => {
64
+ if (this.onErrorCallback === null) {
65
+ return;
66
+ }
67
+ this.onErrorCallback(event);
68
+ };
69
+ this.onMessage = this.onMessage.bind(this);
70
+ this.generate = this.generate.bind(this);
71
+ this.flush = this.flush.bind(this);
72
+ this.stop = this.stop.bind(this);
73
+ this.close = this.close.bind(this);
64
74
  }
65
75
  onMessageCallback = null;
66
- streamEndedResolve = () => {
67
- };
68
- streamEnded = new Promise((resolve) => {
69
- this.streamEndedResolve = resolve;
70
- });
71
- streamController = null;
76
+ onCloseCallback = null;
77
+ onErrorCallback = null;
72
78
  onMessage(callback) {
73
79
  this.onMessageCallback = callback;
74
80
  }
75
- send(message) {
76
- const self = this;
77
- try {
78
- self.streamController?.close();
79
- } catch (error) {
80
- }
81
- this.ws.send(JSON.stringify(message));
82
- return new ReadableStream({
83
- start(controller) {
84
- self.streamController = controller;
85
- }
86
- });
81
+ onClose(callback) {
82
+ this.onCloseCallback = callback;
83
+ }
84
+ onError(callback) {
85
+ this.onErrorCallback = callback;
86
+ }
87
+ generate(message) {
88
+ this.ws.send(
89
+ JSON.stringify({
90
+ type: "generate",
91
+ ...message
92
+ })
93
+ );
94
+ }
95
+ flush() {
96
+ this.ws.send(JSON.stringify({ type: "flush" }));
97
+ }
98
+ stop() {
99
+ this.ws.send(JSON.stringify({ type: "stop" }));
87
100
  }
88
101
  close() {
89
102
  this.ws.close();
@@ -95,10 +108,11 @@ var TextToSpeech = class {
95
108
  constructor(phonic) {
96
109
  this.phonic = phonic;
97
110
  }
98
- async websocket() {
111
+ async websocket(params) {
99
112
  return new Promise((resolve) => {
100
113
  const wsBaseUrl = this.phonic.baseUrl.replace(/^http/, "ws");
101
- const ws = new import_ws.default(`${wsBaseUrl}/v1/tts/ws`, {
114
+ const queryString = new URLSearchParams(params).toString();
115
+ const ws = new import_ws.default(`${wsBaseUrl}/v1/tts/ws?${queryString}`, {
102
116
  headers: {
103
117
  Authorization: `Bearer ${this.phonic.apiKey}`
104
118
  }
package/dist/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  // package.json
2
- var version = "0.1.3";
2
+ var version = "0.2.0";
3
3
 
4
4
  // src/tts/index.ts
5
5
  import WebSocket from "ws";
@@ -9,45 +9,58 @@ var PhonicWebSocket = class {
9
9
  constructor(ws) {
10
10
  this.ws = ws;
11
11
  this.ws.onmessage = (event) => {
12
- if (typeof event.data === "string") {
13
- const dataObj = JSON.parse(
14
- event.data
15
- );
16
- this.onMessageCallback?.(dataObj);
17
- if (dataObj.type === "stream-ended") {
18
- this.streamEndedResolve();
19
- this.streamController?.close();
20
- } else {
21
- this.streamController?.enqueue(dataObj);
22
- }
23
- } else if (event.data instanceof Buffer) {
24
- this.onMessageCallback?.(event.data);
25
- this.streamController?.enqueue(event.data);
12
+ if (this.onMessageCallback === null) {
13
+ return;
14
+ }
15
+ if (typeof event.data !== "string") {
16
+ throw new Error("Received non-string message");
17
+ }
18
+ const dataObj = JSON.parse(event.data);
19
+ this.onMessageCallback(dataObj);
20
+ };
21
+ this.ws.onclose = (event) => {
22
+ if (this.onCloseCallback === null) {
23
+ return;
26
24
  }
25
+ this.onCloseCallback(event);
27
26
  };
27
+ this.ws.onerror = (event) => {
28
+ if (this.onErrorCallback === null) {
29
+ return;
30
+ }
31
+ this.onErrorCallback(event);
32
+ };
33
+ this.onMessage = this.onMessage.bind(this);
34
+ this.generate = this.generate.bind(this);
35
+ this.flush = this.flush.bind(this);
36
+ this.stop = this.stop.bind(this);
37
+ this.close = this.close.bind(this);
28
38
  }
29
39
  onMessageCallback = null;
30
- streamEndedResolve = () => {
31
- };
32
- streamEnded = new Promise((resolve) => {
33
- this.streamEndedResolve = resolve;
34
- });
35
- streamController = null;
40
+ onCloseCallback = null;
41
+ onErrorCallback = null;
36
42
  onMessage(callback) {
37
43
  this.onMessageCallback = callback;
38
44
  }
39
- send(message) {
40
- const self = this;
41
- try {
42
- self.streamController?.close();
43
- } catch (error) {
44
- }
45
- this.ws.send(JSON.stringify(message));
46
- return new ReadableStream({
47
- start(controller) {
48
- self.streamController = controller;
49
- }
50
- });
45
+ onClose(callback) {
46
+ this.onCloseCallback = callback;
47
+ }
48
+ onError(callback) {
49
+ this.onErrorCallback = callback;
50
+ }
51
+ generate(message) {
52
+ this.ws.send(
53
+ JSON.stringify({
54
+ type: "generate",
55
+ ...message
56
+ })
57
+ );
58
+ }
59
+ flush() {
60
+ this.ws.send(JSON.stringify({ type: "flush" }));
61
+ }
62
+ stop() {
63
+ this.ws.send(JSON.stringify({ type: "stop" }));
51
64
  }
52
65
  close() {
53
66
  this.ws.close();
@@ -59,10 +72,11 @@ var TextToSpeech = class {
59
72
  constructor(phonic) {
60
73
  this.phonic = phonic;
61
74
  }
62
- async websocket() {
75
+ async websocket(params) {
63
76
  return new Promise((resolve) => {
64
77
  const wsBaseUrl = this.phonic.baseUrl.replace(/^http/, "ws");
65
- const ws = new WebSocket(`${wsBaseUrl}/v1/tts/ws`, {
78
+ const queryString = new URLSearchParams(params).toString();
79
+ const ws = new WebSocket(`${wsBaseUrl}/v1/tts/ws?${queryString}`, {
66
80
  headers: {
67
81
  Authorization: `Bearer ${this.phonic.apiKey}`
68
82
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "phonic",
3
- "version": "0.1.3",
3
+ "version": "0.2.0",
4
4
  "description": "Phonic Node.js SDK",
5
5
  "scripts": {
6
6
  "build": "tsup",
@@ -38,11 +38,11 @@
38
38
  "devDependencies": {
39
39
  "@biomejs/biome": "1.9.4",
40
40
  "@changesets/changelog-github": "0.5.0",
41
- "@changesets/cli": "2.27.10",
41
+ "@changesets/cli": "2.27.11",
42
42
  "@types/bun": "1.1.14",
43
43
  "tsup": "8.3.5",
44
44
  "typescript": "5.7.2",
45
- "zod": "3.23.8"
45
+ "zod": "3.24.1"
46
46
  },
47
47
  "files": ["dist/**"],
48
48
  "author": {