npm - phonic - Versions diffs - 0.4.0 → 0.6.0 - Mend

phonic 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/LICENSE CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2024 Phonic, Inc.
+Copyright (c) 2025 Phonic, Inc.
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

package/README.md CHANGED Viewed

@@ -7,7 +7,7 @@ Node.js library for the Phonic API.
 - [Usage](#usage)
   - [Get voices](#get-voices)
   - [Get voice by id](#get-voice-by-id)
-  - [Text-to-speech via WebSocket](#text-to-speech-via-websocket)
+  - [Speech-to-speech via WebSocket](#speech-to-speech-via-websocket)
 ## Installation
@@ -19,7 +19,7 @@ npm i phonic
 Grab an API key from [Phonic settings](https://phonic.co/settings) and pass it to the Phonic constructor.
-```js
+```ts
 import { Phonic } from "phonic";
 const phonic = new Phonic("ph_...");
@@ -29,7 +29,7 @@ const phonic = new Phonic("ph_...");
 ### Get voices
-```js
+```ts
 const { data, error } = await phonic.voices.list({ model: "shasta" });
 if (error === null) {
@@ -40,7 +40,7 @@ if (error === null) {
 ### Get voice by id
-```js
+```ts
 const { data, error } = await phonic.voices.get("meredith");
 if (error === null) {
@@ -48,16 +48,12 @@ if (error === null) {
 }
 ```
-### Text-to-speech via WebSocket
+### Speech-to-speech via WebSocket
 Open a WebSocket connection:
-```js
-const { data, error } = await phonic.tts.websocket({
-  model: "shasta",
-  output_format: "mulaw_8000",
-  voice_id: "meredith",
-});
+```ts
+const { data, error } = await phonic.sts.websocket();
 if (error !== null) {
   throw new Error(error.message);
@@ -67,69 +63,64 @@ if (error !== null) {
 const { phonicWebSocket } = data;
 ```
-Process audio chunks that Phonic sends back to you, by sending them to Twilio, for example:
-```js
-phonicWebSocket.onMessage((message) => {
-  if (message.type === "audio_chunk") {
-    ws.send(
-      JSON.stringify({
-        event: "media",
-        streamSid: "...",
-        media: {
-          payload: message.audio,
-        },
-      }),
-    );
-  }
-});
-```
-Send text chunks to Phonic for audio generation as you receive them from LLM:
-```js
-const stream = await openai.chat.completions.create(...);
+Send config params for the conversation:
-for await (const chunk of stream) {
-  const text = chunk.choices[0]?.delta?.content || "";
+```ts
+phonicWebSocket.config({
+  input_format: "mulaw_8000",
-  if (text) {
-    phonicWebSocket.generate({ text });
-  }
-}
+  // Optional fields
+  system_prompt: "You are a helpful assistant.",
+  welcome_message: "Hello, how can I help you?",
+  voice_id: "meredith",
+  output_format: "mulaw_8000"
+});
 ```
-Tell Phonic to finish generating audio for all text chunks you've sent:
+Stream input (user) audio chunks:
-```js
-phonicWebSocket.flush();
+```ts
+phonicWebSocket.audioChunk({
+  audio: "...", // base64 encoded audio chunk
+});
 ```
-You can also tell Phonic to stop sending audio chunks back, e.g. if the user interrupts the conversation:
+Process messages that Phonic sends back to you:
-```js
-phonicWebSocket.stop();
+```ts
+phonicWebSocket.onMessage((message) => {
+  switch (message.type) {
+    case "input_text": {
+      console.log(`User: ${message.text}`);
+      break;
+    }
+    case "audio_chunk": {
+      // Send the audio chunk to Twilio, for example:
+      twilioWebSocket.send(
+        JSON.stringify({
+          event: "media",
+          streamSid: "...",
+          media: {
+            payload: message.audio,
+          },
+        }),
+      );
+      break;
+    }
+  }
+});
 ```
-To close the WebSocket connection:
+To end the conversation, close the WebSocket:
-```js
+```ts
 phonicWebSocket.close();
 ```
-To know when the last audio chunk has been received:
-```js
-phonicWebSocket.onMessage((message) => {
-  if (message.type === "flushed") {
-    console.log("Last audio chunk received");
-  }
-});
-```
 You can also listen for close and error events:
-```js
+```ts
 phonicWebSocket.onClose((event) => {
   console.log(
     `Phonic WebSocket closed with code ${event.code} and reason "${event.reason}"`,

package/dist/index.d.mts CHANGED Viewed

@@ -18,24 +18,13 @@ type DataOrError<T> = Promise<{
     error: ErrorResponse;
 }>;
-type PhonicWebSocketParams = {
-    model?: string;
-    output_format?: string;
-    voice_id?: string;
-};
-type PhonicWebSocketResponseMessage = {
-    type: "config";
-    model: string;
-    output_format: string;
-    voice_id: string;
+type PhonicSTSWebSocketResponseMessage = {
+    type: "input_text";
+    text: string;
 } | {
     type: "audio_chunk";
-    audio: string;
     text: string;
-} | {
-    type: "flush_confirm";
-} | {
-    type: "stop_confirm";
+    audio: string;
 } | {
     type: "error";
     error: {
@@ -43,18 +32,18 @@ type PhonicWebSocketResponseMessage = {
         code?: string;
     };
     paramErrors?: {
-        model?: string;
-        output_format?: string;
+        system_prompt?: string;
+        welcome_message?: string;
         voice_id?: string;
-        text?: string;
-        speed?: string;
+        input_format?: string;
+        output_format?: string;
     };
 };
-type OnMessageCallback = (message: PhonicWebSocketResponseMessage) => void;
+type OnMessageCallback = (message: PhonicSTSWebSocketResponseMessage) => void;
 type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
 type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
-declare class PhonicWebSocket {
+declare class PhonicSTSWebSocket {
     private readonly ws;
     private onMessageCallback;
     private onCloseCallback;
@@ -63,20 +52,24 @@ declare class PhonicWebSocket {
     onMessage(callback: OnMessageCallback): void;
     onClose(callback: OnCloseCallback): void;
     onError(callback: OnErrorCallback): void;
-    generate(message: {
-        text: string;
-        speed?: number;
+    config(message: {
+        system_prompt?: string;
+        welcome_message?: string;
+        voice_id?: string;
+        input_format?: "pcm_44100" | "mulaw_8000";
+        output_format?: "pcm_44100" | "mulaw_8000";
+    }): void;
+    audioChunk(message: {
+        audio: string;
     }): void;
-    flush(): void;
-    stop(): void;
     close(): void;
 }
-declare class TextToSpeech {
+declare class SpeechToSpeech {
     private readonly phonic;
     constructor(phonic: Phonic);
-    websocket(params?: PhonicWebSocketParams): DataOrError<{
-        phonicWebSocket: PhonicWebSocket;
+    websocket(): DataOrError<{
+        phonicWebSocket: PhonicSTSWebSocket;
     }>;
 }
@@ -105,7 +98,7 @@ declare class Phonic {
     readonly baseUrl: string;
     private readonly headers;
     readonly voices: Voices;
-    readonly tts: TextToSpeech;
+    readonly sts: SpeechToSpeech;
     constructor(apiKey: string, config?: PhonicConfig);
     fetchRequest<T>(path: string, options: FetchOptions): DataOrError<T>;
     get<T>(path: string): Promise<{
@@ -117,4 +110,4 @@ declare class Phonic {
     }>;
 }
-export { Phonic, PhonicWebSocket };
+export { Phonic, PhonicSTSWebSocket };

package/dist/index.d.ts CHANGED Viewed

@@ -18,24 +18,13 @@ type DataOrError<T> = Promise<{
     error: ErrorResponse;
 }>;
-type PhonicWebSocketParams = {
-    model?: string;
-    output_format?: string;
-    voice_id?: string;
-};
-type PhonicWebSocketResponseMessage = {
-    type: "config";
-    model: string;
-    output_format: string;
-    voice_id: string;
+type PhonicSTSWebSocketResponseMessage = {
+    type: "input_text";
+    text: string;
 } | {
     type: "audio_chunk";
-    audio: string;
     text: string;
-} | {
-    type: "flush_confirm";
-} | {
-    type: "stop_confirm";
+    audio: string;
 } | {
     type: "error";
     error: {
@@ -43,18 +32,18 @@ type PhonicWebSocketResponseMessage = {
         code?: string;
     };
     paramErrors?: {
-        model?: string;
-        output_format?: string;
+        system_prompt?: string;
+        welcome_message?: string;
         voice_id?: string;
-        text?: string;
-        speed?: string;
+        input_format?: string;
+        output_format?: string;
     };
 };
-type OnMessageCallback = (message: PhonicWebSocketResponseMessage) => void;
+type OnMessageCallback = (message: PhonicSTSWebSocketResponseMessage) => void;
 type OnCloseCallback = (event: WebSocket.CloseEvent) => void;
 type OnErrorCallback = (event: WebSocket.ErrorEvent) => void;
-declare class PhonicWebSocket {
+declare class PhonicSTSWebSocket {
     private readonly ws;
     private onMessageCallback;
     private onCloseCallback;
@@ -63,20 +52,24 @@ declare class PhonicWebSocket {
     onMessage(callback: OnMessageCallback): void;
     onClose(callback: OnCloseCallback): void;
     onError(callback: OnErrorCallback): void;
-    generate(message: {
-        text: string;
-        speed?: number;
+    config(message: {
+        system_prompt?: string;
+        welcome_message?: string;
+        voice_id?: string;
+        input_format?: "pcm_44100" | "mulaw_8000";
+        output_format?: "pcm_44100" | "mulaw_8000";
+    }): void;
+    audioChunk(message: {
+        audio: string;
     }): void;
-    flush(): void;
-    stop(): void;
     close(): void;
 }
-declare class TextToSpeech {
+declare class SpeechToSpeech {
     private readonly phonic;
     constructor(phonic: Phonic);
-    websocket(params?: PhonicWebSocketParams): DataOrError<{
-        phonicWebSocket: PhonicWebSocket;
+    websocket(): DataOrError<{
+        phonicWebSocket: PhonicSTSWebSocket;
     }>;
 }
@@ -105,7 +98,7 @@ declare class Phonic {
     readonly baseUrl: string;
     private readonly headers;
     readonly voices: Voices;
-    readonly tts: TextToSpeech;
+    readonly sts: SpeechToSpeech;
     constructor(apiKey: string, config?: PhonicConfig);
     fetchRequest<T>(path: string, options: FetchOptions): DataOrError<T>;
     get<T>(path: string): Promise<{
@@ -117,4 +110,4 @@ declare class Phonic {
     }>;
 }
-export { Phonic, PhonicWebSocket };
+export { Phonic, PhonicSTSWebSocket };

package/dist/index.js CHANGED Viewed

@@ -35,13 +35,13 @@ __export(index_exports, {
 module.exports = __toCommonJS(index_exports);
 // package.json
-var version = "0.4.0";
+var version = "0.6.0";
-// src/tts/index.ts
+// src/sts/index.ts
 var import_ws = __toESM(require("ws"));
-// src/tts/websocket.ts
-var PhonicWebSocket = class {
+// src/sts/websocket.ts
+var PhonicSTSWebSocket = class {
   constructor(ws) {
     this.ws = ws;
     this.ws.onmessage = (event) => {
@@ -51,7 +51,9 @@ var PhonicWebSocket = class {
       if (typeof event.data !== "string") {
         throw new Error("Received non-string message");
       }
-      const dataObj = JSON.parse(event.data);
+      const dataObj = JSON.parse(
+        event.data
+      );
       this.onMessageCallback(dataObj);
     };
     this.ws.onclose = (event) => {
@@ -67,9 +69,10 @@ var PhonicWebSocket = class {
       this.onErrorCallback(event);
     };
     this.onMessage = this.onMessage.bind(this);
-    this.generate = this.generate.bind(this);
-    this.flush = this.flush.bind(this);
-    this.stop = this.stop.bind(this);
+    this.onClose = this.onClose.bind(this);
+    this.onError = this.onError.bind(this);
+    this.config = this.config.bind(this);
+    this.audioChunk = this.audioChunk.bind(this);
     this.close = this.close.bind(this);
   }
   onMessageCallback = null;
@@ -84,41 +87,42 @@ var PhonicWebSocket = class {
   onError(callback) {
     this.onErrorCallback = callback;
   }
-  generate(message) {
+  config(message) {
     this.ws.send(
       JSON.stringify({
-        type: "generate",
+        type: "config",
         ...message
       })
     );
   }
-  flush() {
-    this.ws.send(JSON.stringify({ type: "flush" }));
-  }
-  stop() {
-    this.ws.send(JSON.stringify({ type: "stop" }));
+  audioChunk(message) {
+    this.ws.send(
+      JSON.stringify({
+        type: "audio_chunk",
+        ...message
+      })
+    );
   }
   close() {
     this.ws.close();
   }
 };
-// src/tts/index.ts
-var TextToSpeech = class {
+// src/sts/index.ts
+var SpeechToSpeech = class {
   constructor(phonic) {
     this.phonic = phonic;
   }
-  async websocket(params) {
+  async websocket() {
     return new Promise((resolve) => {
       const wsBaseUrl = this.phonic.baseUrl.replace(/^http/, "ws");
-      const queryString = new URLSearchParams(params).toString();
-      const ws = new import_ws.default(`${wsBaseUrl}/v1/tts/ws?${queryString}`, {
+      const ws = new import_ws.default(`${wsBaseUrl}/v1/sts/ws`, {
         headers: {
           Authorization: `Bearer ${this.phonic.apiKey}`
         }
       });
       ws.onopen = () => {
-        const phonicWebSocket = new PhonicWebSocket(ws);
+        const phonicWebSocket = new PhonicSTSWebSocket(ws);
         resolve({ data: { phonicWebSocket }, error: null });
       };
       ws.onerror = (error) => {
@@ -178,7 +182,7 @@ var Phonic = class {
   baseUrl;
   headers;
   voices = new Voices(this);
-  tts = new TextToSpeech(this);
+  sts = new SpeechToSpeech(this);
   async fetchRequest(path, options) {
     try {
       const response = await fetch(`${this.baseUrl}/v1${path}`, {

package/dist/index.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 // package.json
-var version = "0.4.0";
+var version = "0.6.0";
-// src/tts/index.ts
+// src/sts/index.ts
 import WebSocket from "ws";
-// src/tts/websocket.ts
-var PhonicWebSocket = class {
+// src/sts/websocket.ts
+var PhonicSTSWebSocket = class {
   constructor(ws) {
     this.ws = ws;
     this.ws.onmessage = (event) => {
@@ -15,7 +15,9 @@ var PhonicWebSocket = class {
       if (typeof event.data !== "string") {
         throw new Error("Received non-string message");
       }
-      const dataObj = JSON.parse(event.data);
+      const dataObj = JSON.parse(
+        event.data
+      );
       this.onMessageCallback(dataObj);
     };
     this.ws.onclose = (event) => {
@@ -31,9 +33,10 @@ var PhonicWebSocket = class {
       this.onErrorCallback(event);
     };
     this.onMessage = this.onMessage.bind(this);
-    this.generate = this.generate.bind(this);
-    this.flush = this.flush.bind(this);
-    this.stop = this.stop.bind(this);
+    this.onClose = this.onClose.bind(this);
+    this.onError = this.onError.bind(this);
+    this.config = this.config.bind(this);
+    this.audioChunk = this.audioChunk.bind(this);
     this.close = this.close.bind(this);
   }
   onMessageCallback = null;
@@ -48,41 +51,42 @@ var PhonicWebSocket = class {
   onError(callback) {
     this.onErrorCallback = callback;
   }
-  generate(message) {
+  config(message) {
     this.ws.send(
       JSON.stringify({
-        type: "generate",
+        type: "config",
         ...message
       })
     );
   }
-  flush() {
-    this.ws.send(JSON.stringify({ type: "flush" }));
-  }
-  stop() {
-    this.ws.send(JSON.stringify({ type: "stop" }));
+  audioChunk(message) {
+    this.ws.send(
+      JSON.stringify({
+        type: "audio_chunk",
+        ...message
+      })
+    );
   }
   close() {
     this.ws.close();
   }
 };
-// src/tts/index.ts
-var TextToSpeech = class {
+// src/sts/index.ts
+var SpeechToSpeech = class {
   constructor(phonic) {
     this.phonic = phonic;
   }
-  async websocket(params) {
+  async websocket() {
     return new Promise((resolve) => {
       const wsBaseUrl = this.phonic.baseUrl.replace(/^http/, "ws");
-      const queryString = new URLSearchParams(params).toString();
-      const ws = new WebSocket(`${wsBaseUrl}/v1/tts/ws?${queryString}`, {
+      const ws = new WebSocket(`${wsBaseUrl}/v1/sts/ws`, {
         headers: {
           Authorization: `Bearer ${this.phonic.apiKey}`
         }
       });
       ws.onopen = () => {
-        const phonicWebSocket = new PhonicWebSocket(ws);
+        const phonicWebSocket = new PhonicSTSWebSocket(ws);
         resolve({ data: { phonicWebSocket }, error: null });
       };
       ws.onerror = (error) => {
@@ -142,7 +146,7 @@ var Phonic = class {
   baseUrl;
   headers;
   voices = new Voices(this);
-  tts = new TextToSpeech(this);
+  sts = new SpeechToSpeech(this);
   async fetchRequest(path, options) {
     try {
       const response = await fetch(`${this.baseUrl}/v1${path}`, {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "phonic",
-  "version": "0.4.0",
+  "version": "0.6.0",
   "description": "Phonic Node.js SDK",
   "scripts": {
     "build": "tsup",
@@ -33,13 +33,13 @@
     "url": "https://github.com/Phonic-Co/phonic-node/issues"
   },
   "dependencies": {
-    "ws": "8.18.0"
+    "ws": "8.18.1"
   },
   "devDependencies": {
     "@biomejs/biome": "1.9.4",
-    "@changesets/changelog-github": "0.5.0",
-    "@changesets/cli": "2.27.12",
-    "@types/bun": "1.2.2",
+    "@changesets/changelog-github": "0.5.1",
+    "@changesets/cli": "2.28.1",
+    "@types/bun": "1.2.3",
     "tsup": "8.3.6",
     "typescript": "5.7.3",
     "zod": "3.24.2"
@@ -51,8 +51,7 @@
   },
   "keywords": [
     "phonic",
-    "text-to-speech",
-    "tts",
+    "speech-to-speech",
     "javascript",
     "typescript",
     "ai",