agrasya-voice-sdk 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -30,6 +30,8 @@ interface AgrasyaEvents {
30
30
  onAudioLevel?: (level: number) => void;
31
31
  /** Emitted when the connection status changes */
32
32
  onStatusChange?: (status: "idle" | "initializing" | "connecting" | "connected" | "error") => void;
33
+ /** Emitted when the call duration updates */
34
+ onCallDurationUpdate?: (duration: number) => void;
33
35
  }
34
36
 
35
37
  /**
@@ -41,22 +43,22 @@ declare class AgrasyaVoiceSDK {
41
43
  private peerConnection;
42
44
  private dataChannel;
43
45
  private audioStream;
46
+ private socket;
44
47
  private sessionId;
45
- private status;
48
+ private ephemeralKey;
49
+ private callDuration;
50
+ private durationInterval;
46
51
  constructor(config: AgrasyaSDKConfig, events?: AgrasyaEvents);
47
52
  private log;
48
53
  private setStatus;
49
- /**
50
- * Starts a voice session.
51
- */
52
- start(): Promise<void>;
53
- /**
54
- * Stops the current voice session.
55
- */
56
- stop(): Promise<void>;
54
+ private startTimer;
55
+ private stopTimer;
57
56
  private initializeBackendSession;
58
- private connectWebRTC;
59
- private notifySessionEnd;
57
+ private setupWebRTC;
58
+ private setupSocketListeners;
59
+ startVoiceSession(language: string): Promise<void>;
60
+ stopVoiceSession(): Promise<void>;
61
+ private cleanup;
60
62
  }
61
63
 
62
64
  export { AgrasyaVoiceSDK };
package/dist/index.d.ts CHANGED
@@ -30,6 +30,8 @@ interface AgrasyaEvents {
30
30
  onAudioLevel?: (level: number) => void;
31
31
  /** Emitted when the connection status changes */
32
32
  onStatusChange?: (status: "idle" | "initializing" | "connecting" | "connected" | "error") => void;
33
+ /** Emitted when the call duration updates */
34
+ onCallDurationUpdate?: (duration: number) => void;
33
35
  }
34
36
 
35
37
  /**
@@ -41,22 +43,22 @@ declare class AgrasyaVoiceSDK {
41
43
  private peerConnection;
42
44
  private dataChannel;
43
45
  private audioStream;
46
+ private socket;
44
47
  private sessionId;
45
- private status;
48
+ private ephemeralKey;
49
+ private callDuration;
50
+ private durationInterval;
46
51
  constructor(config: AgrasyaSDKConfig, events?: AgrasyaEvents);
47
52
  private log;
48
53
  private setStatus;
49
- /**
50
- * Starts a voice session.
51
- */
52
- start(): Promise<void>;
53
- /**
54
- * Stops the current voice session.
55
- */
56
- stop(): Promise<void>;
54
+ private startTimer;
55
+ private stopTimer;
57
56
  private initializeBackendSession;
58
- private connectWebRTC;
59
- private notifySessionEnd;
57
+ private setupWebRTC;
58
+ private setupSocketListeners;
59
+ startVoiceSession(language: string): Promise<void>;
60
+ stopVoiceSession(): Promise<void>;
61
+ private cleanup;
60
62
  }
61
63
 
62
64
  export { AgrasyaVoiceSDK };
package/dist/index.js CHANGED
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
6
8
  var __export = (target, all) => {
7
9
  for (var name in all)
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
15
17
  }
16
18
  return to;
17
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
18
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
29
 
20
30
  // src/index.ts
@@ -23,18 +33,24 @@ __export(index_exports, {
23
33
  AgrasyaVoiceSDK: () => AgrasyaVoiceSDK
24
34
  });
25
35
  module.exports = __toCommonJS(index_exports);
36
+ var import_socket = __toESM(require("socket.io-client"));
37
+ var import_dotenv = __toESM(require("dotenv"));
38
+ import_dotenv.default.config();
26
39
  var AgrasyaVoiceSDK = class {
27
40
  config;
28
41
  events;
29
42
  peerConnection = null;
30
43
  dataChannel = null;
31
44
  audioStream = null;
45
+ socket = null;
32
46
  sessionId = null;
33
- status = "idle";
47
+ ephemeralKey = null;
48
+ callDuration = 0;
49
+ durationInterval = null;
34
50
  constructor(config, events = {}) {
35
51
  this.config = {
36
- baseUrl: "http://localhost:4001",
37
- language: "en",
52
+ baseUrl: process.env.API_BASE_URL,
53
+ language: "",
38
54
  debug: false,
39
55
  ...config
40
56
  };
@@ -46,56 +62,20 @@ var AgrasyaVoiceSDK = class {
46
62
  }
47
63
  }
48
64
  setStatus(status) {
49
- this.status = status;
50
65
  this.events.onStatusChange?.(status);
51
66
  }
52
- /**
53
- * Starts a voice session.
54
- */
55
- async start() {
56
- if (this.status !== "idle") {
57
- throw new Error("Session is already in progress or initializing");
58
- }
59
- try {
60
- this.setStatus("initializing");
61
- this.log("Initializing session...");
62
- const sessionData = await this.initializeBackendSession();
63
- this.sessionId = sessionData.sessionId;
64
- await this.connectWebRTC(sessionData.ephemeralKey);
65
- this.setStatus("connected");
66
- this.events.onStart?.();
67
- } catch (error) {
68
- this.setStatus("error");
69
- const err = error instanceof Error ? error : new Error(String(error));
70
- this.events.onError?.(err);
71
- throw err;
72
- }
67
+ startTimer() {
68
+ this.callDuration = 0;
69
+ this.durationInterval = setInterval(() => {
70
+ this.callDuration += 1;
71
+ this.events.onCallDurationUpdate?.(this.callDuration);
72
+ }, 1e3);
73
73
  }
74
- /**
75
- * Stops the current voice session.
76
- */
77
- async stop() {
78
- this.log("Stopping session...");
79
- if (this.peerConnection) {
80
- this.peerConnection.close();
81
- this.peerConnection = null;
82
- }
83
- if (this.dataChannel) {
84
- this.dataChannel.close();
85
- this.dataChannel = null;
74
+ stopTimer() {
75
+ if (this.durationInterval) {
76
+ clearInterval(this.durationInterval);
77
+ this.durationInterval = null;
86
78
  }
87
- if (this.audioStream) {
88
- this.audioStream.getTracks().forEach((track) => track.stop());
89
- this.audioStream = null;
90
- }
91
- if (this.sessionId) {
92
- this.notifySessionEnd(this.sessionId).catch(
93
- (err) => this.log("Failed to notify session end", err)
94
- );
95
- this.sessionId = null;
96
- }
97
- this.setStatus("idle");
98
- this.events.onStop?.();
99
79
  }
100
80
  async initializeBackendSession() {
101
81
  const url = `${this.config.baseUrl}/v1/agrasya/client-token?language=${this.config.language}`;
@@ -118,38 +98,37 @@ var AgrasyaVoiceSDK = class {
118
98
  }
119
99
  return result.data;
120
100
  }
121
- async connectWebRTC(ephemeralKey) {
122
- this.setStatus("connecting");
123
- this.log("Connecting to AI Provider via WebRTC...");
101
+ async setupWebRTC(ephemeralKey) {
124
102
  this.peerConnection = new RTCPeerConnection();
103
+ this.dataChannel = this.peerConnection.createDataChannel(
104
+ "oai-events"
105
+ );
106
+ if (this.dataChannel) {
107
+ this.dataChannel.onmessage = (event) => {
108
+ const data = JSON.parse(event.data);
109
+ this.events.onTranscript?.(data);
110
+ };
111
+ }
125
112
  this.peerConnection.ontrack = (event) => {
126
- this.log("Received remote audio track");
127
- const audio = new Audio();
128
- audio.srcObject = event.streams[0];
129
- audio.play().catch((err) => this.log("Failed to play audio", err));
113
+ const audioElement = document.createElement("audio");
114
+ audioElement.srcObject = event.streams[0];
115
+ audioElement.autoplay = true;
116
+ document.body.appendChild(audioElement);
130
117
  };
131
118
  this.audioStream = await navigator.mediaDevices.getUserMedia({
132
119
  audio: true
133
120
  });
134
- this.audioStream.getTracks().forEach((track) => {
135
- if (this.peerConnection && this.audioStream) {
136
- this.peerConnection.addTrack(track, this.audioStream);
137
- }
138
- });
139
- this.dataChannel = this.peerConnection.createDataChannel("oai-events");
140
- this.dataChannel.onmessage = (event) => {
141
- const data = JSON.parse(event.data);
142
- this.log("Received event:", data.type);
143
- if (data.type === "response.audio_transcript.delta") {
144
- this.events.onTranscript?.({ text: data.delta, isFinal: false });
145
- } else if (data.type === "response.audio_transcript.done") {
146
- this.events.onTranscript?.({ text: data.transcript, isFinal: true });
147
- }
148
- };
121
+ if (this.audioStream) {
122
+ const track = this.audioStream.getTracks()[0];
123
+ this.peerConnection.addTrack(
124
+ track,
125
+ this.audioStream
126
+ );
127
+ }
149
128
  const offer = await this.peerConnection.createOffer();
150
129
  await this.peerConnection.setLocalDescription(offer);
151
- const baseUrl = "https://api.openai.com/v1/realtime";
152
- const model = "gpt-4o-realtime-preview-2024-12-17";
130
+ const baseUrl = process.env.API_URL;
131
+ const model = process.env.MODEL_NAME;
153
132
  const sdpResponse = await fetch(`${baseUrl}?model=${model}`, {
154
133
  method: "POST",
155
134
  body: offer.sdp,
@@ -165,19 +144,60 @@ var AgrasyaVoiceSDK = class {
165
144
  type: "answer",
166
145
  sdp: await sdpResponse.text()
167
146
  };
168
- await this.peerConnection.setRemoteDescription(answer);
169
- this.log("WebRTC connection established");
147
+ const remoteDescription = {
148
+ type: answer.type,
149
+ sdp: answer.sdp
150
+ };
151
+ await this.peerConnection.setRemoteDescription(remoteDescription);
170
152
  }
171
- async notifySessionEnd(sessionId) {
172
- const url = `${this.config.baseUrl}/v1/agrasya/session-end/${sessionId}`;
173
- await fetch(url, {
174
- method: "PUT",
175
- headers: {
176
- "x-agrasya-client-api-key": this.config.apiKey,
177
- "Content-Type": "application/json"
178
- }
153
+ setupSocketListeners(socket) {
154
+ socket.on("disconnect", () => {
155
+ this.cleanup();
156
+ });
157
+ socket.on("session_ended", () => {
158
+ this.cleanup();
179
159
  });
180
160
  }
161
+ async startVoiceSession(language) {
162
+ try {
163
+ this.setStatus("initializing");
164
+ const sessionData = await this.initializeBackendSession();
165
+ this.sessionId = sessionData.sessionId;
166
+ this.ephemeralKey = sessionData.ephemeralKey;
167
+ await this.setupWebRTC(this.ephemeralKey);
168
+ this.socket = (0, import_socket.default)(this.config.baseUrl, { transports: ["websocket"] });
169
+ this.socket.emit("join_session", { sessionId: this.sessionId });
170
+ this.setupSocketListeners(this.socket);
171
+ this.setStatus("connected");
172
+ this.startTimer();
173
+ } catch (error) {
174
+ this.setStatus("error");
175
+ throw error;
176
+ }
177
+ }
178
+ async stopVoiceSession() {
179
+ this.cleanup();
180
+ }
181
+ cleanup() {
182
+ if (this.peerConnection) {
183
+ this.peerConnection.close();
184
+ this.peerConnection = null;
185
+ }
186
+ if (this.dataChannel) {
187
+ this.dataChannel.close();
188
+ this.dataChannel = null;
189
+ }
190
+ if (this.audioStream) {
191
+ this.audioStream.getTracks().forEach((track) => track.stop());
192
+ this.audioStream = null;
193
+ }
194
+ if (this.socket) {
195
+ this.socket.disconnect();
196
+ this.socket = null;
197
+ }
198
+ this.stopTimer();
199
+ this.setStatus("idle");
200
+ }
181
201
  };
182
202
  // Annotate the CommonJS export names for ESM import in node:
183
203
  0 && (module.exports = {
package/dist/index.mjs CHANGED
@@ -1,16 +1,22 @@
1
1
  // src/index.ts
2
+ import io from "socket.io-client";
3
+ import dotenv from "dotenv";
4
+ dotenv.config();
2
5
  var AgrasyaVoiceSDK = class {
3
6
  config;
4
7
  events;
5
8
  peerConnection = null;
6
9
  dataChannel = null;
7
10
  audioStream = null;
11
+ socket = null;
8
12
  sessionId = null;
9
- status = "idle";
13
+ ephemeralKey = null;
14
+ callDuration = 0;
15
+ durationInterval = null;
10
16
  constructor(config, events = {}) {
11
17
  this.config = {
12
- baseUrl: "http://localhost:4001",
13
- language: "en",
18
+ baseUrl: process.env.API_BASE_URL,
19
+ language: "",
14
20
  debug: false,
15
21
  ...config
16
22
  };
@@ -22,56 +28,20 @@ var AgrasyaVoiceSDK = class {
22
28
  }
23
29
  }
24
30
  setStatus(status) {
25
- this.status = status;
26
31
  this.events.onStatusChange?.(status);
27
32
  }
28
- /**
29
- * Starts a voice session.
30
- */
31
- async start() {
32
- if (this.status !== "idle") {
33
- throw new Error("Session is already in progress or initializing");
34
- }
35
- try {
36
- this.setStatus("initializing");
37
- this.log("Initializing session...");
38
- const sessionData = await this.initializeBackendSession();
39
- this.sessionId = sessionData.sessionId;
40
- await this.connectWebRTC(sessionData.ephemeralKey);
41
- this.setStatus("connected");
42
- this.events.onStart?.();
43
- } catch (error) {
44
- this.setStatus("error");
45
- const err = error instanceof Error ? error : new Error(String(error));
46
- this.events.onError?.(err);
47
- throw err;
48
- }
33
+ startTimer() {
34
+ this.callDuration = 0;
35
+ this.durationInterval = setInterval(() => {
36
+ this.callDuration += 1;
37
+ this.events.onCallDurationUpdate?.(this.callDuration);
38
+ }, 1e3);
49
39
  }
50
- /**
51
- * Stops the current voice session.
52
- */
53
- async stop() {
54
- this.log("Stopping session...");
55
- if (this.peerConnection) {
56
- this.peerConnection.close();
57
- this.peerConnection = null;
58
- }
59
- if (this.dataChannel) {
60
- this.dataChannel.close();
61
- this.dataChannel = null;
40
+ stopTimer() {
41
+ if (this.durationInterval) {
42
+ clearInterval(this.durationInterval);
43
+ this.durationInterval = null;
62
44
  }
63
- if (this.audioStream) {
64
- this.audioStream.getTracks().forEach((track) => track.stop());
65
- this.audioStream = null;
66
- }
67
- if (this.sessionId) {
68
- this.notifySessionEnd(this.sessionId).catch(
69
- (err) => this.log("Failed to notify session end", err)
70
- );
71
- this.sessionId = null;
72
- }
73
- this.setStatus("idle");
74
- this.events.onStop?.();
75
45
  }
76
46
  async initializeBackendSession() {
77
47
  const url = `${this.config.baseUrl}/v1/agrasya/client-token?language=${this.config.language}`;
@@ -94,38 +64,37 @@ var AgrasyaVoiceSDK = class {
94
64
  }
95
65
  return result.data;
96
66
  }
97
- async connectWebRTC(ephemeralKey) {
98
- this.setStatus("connecting");
99
- this.log("Connecting to AI Provider via WebRTC...");
67
+ async setupWebRTC(ephemeralKey) {
100
68
  this.peerConnection = new RTCPeerConnection();
69
+ this.dataChannel = this.peerConnection.createDataChannel(
70
+ "oai-events"
71
+ );
72
+ if (this.dataChannel) {
73
+ this.dataChannel.onmessage = (event) => {
74
+ const data = JSON.parse(event.data);
75
+ this.events.onTranscript?.(data);
76
+ };
77
+ }
101
78
  this.peerConnection.ontrack = (event) => {
102
- this.log("Received remote audio track");
103
- const audio = new Audio();
104
- audio.srcObject = event.streams[0];
105
- audio.play().catch((err) => this.log("Failed to play audio", err));
79
+ const audioElement = document.createElement("audio");
80
+ audioElement.srcObject = event.streams[0];
81
+ audioElement.autoplay = true;
82
+ document.body.appendChild(audioElement);
106
83
  };
107
84
  this.audioStream = await navigator.mediaDevices.getUserMedia({
108
85
  audio: true
109
86
  });
110
- this.audioStream.getTracks().forEach((track) => {
111
- if (this.peerConnection && this.audioStream) {
112
- this.peerConnection.addTrack(track, this.audioStream);
113
- }
114
- });
115
- this.dataChannel = this.peerConnection.createDataChannel("oai-events");
116
- this.dataChannel.onmessage = (event) => {
117
- const data = JSON.parse(event.data);
118
- this.log("Received event:", data.type);
119
- if (data.type === "response.audio_transcript.delta") {
120
- this.events.onTranscript?.({ text: data.delta, isFinal: false });
121
- } else if (data.type === "response.audio_transcript.done") {
122
- this.events.onTranscript?.({ text: data.transcript, isFinal: true });
123
- }
124
- };
87
+ if (this.audioStream) {
88
+ const track = this.audioStream.getTracks()[0];
89
+ this.peerConnection.addTrack(
90
+ track,
91
+ this.audioStream
92
+ );
93
+ }
125
94
  const offer = await this.peerConnection.createOffer();
126
95
  await this.peerConnection.setLocalDescription(offer);
127
- const baseUrl = "https://api.openai.com/v1/realtime";
128
- const model = "gpt-4o-realtime-preview-2024-12-17";
96
+ const baseUrl = process.env.API_URL;
97
+ const model = process.env.MODEL_NAME;
129
98
  const sdpResponse = await fetch(`${baseUrl}?model=${model}`, {
130
99
  method: "POST",
131
100
  body: offer.sdp,
@@ -141,19 +110,60 @@ var AgrasyaVoiceSDK = class {
141
110
  type: "answer",
142
111
  sdp: await sdpResponse.text()
143
112
  };
144
- await this.peerConnection.setRemoteDescription(answer);
145
- this.log("WebRTC connection established");
113
+ const remoteDescription = {
114
+ type: answer.type,
115
+ sdp: answer.sdp
116
+ };
117
+ await this.peerConnection.setRemoteDescription(remoteDescription);
146
118
  }
147
- async notifySessionEnd(sessionId) {
148
- const url = `${this.config.baseUrl}/v1/agrasya/session-end/${sessionId}`;
149
- await fetch(url, {
150
- method: "PUT",
151
- headers: {
152
- "x-agrasya-client-api-key": this.config.apiKey,
153
- "Content-Type": "application/json"
154
- }
119
+ setupSocketListeners(socket) {
120
+ socket.on("disconnect", () => {
121
+ this.cleanup();
122
+ });
123
+ socket.on("session_ended", () => {
124
+ this.cleanup();
155
125
  });
156
126
  }
127
+ async startVoiceSession(language) {
128
+ try {
129
+ this.setStatus("initializing");
130
+ const sessionData = await this.initializeBackendSession();
131
+ this.sessionId = sessionData.sessionId;
132
+ this.ephemeralKey = sessionData.ephemeralKey;
133
+ await this.setupWebRTC(this.ephemeralKey);
134
+ this.socket = io(this.config.baseUrl, { transports: ["websocket"] });
135
+ this.socket.emit("join_session", { sessionId: this.sessionId });
136
+ this.setupSocketListeners(this.socket);
137
+ this.setStatus("connected");
138
+ this.startTimer();
139
+ } catch (error) {
140
+ this.setStatus("error");
141
+ throw error;
142
+ }
143
+ }
144
+ async stopVoiceSession() {
145
+ this.cleanup();
146
+ }
147
+ cleanup() {
148
+ if (this.peerConnection) {
149
+ this.peerConnection.close();
150
+ this.peerConnection = null;
151
+ }
152
+ if (this.dataChannel) {
153
+ this.dataChannel.close();
154
+ this.dataChannel = null;
155
+ }
156
+ if (this.audioStream) {
157
+ this.audioStream.getTracks().forEach((track) => track.stop());
158
+ this.audioStream = null;
159
+ }
160
+ if (this.socket) {
161
+ this.socket.disconnect();
162
+ this.socket = null;
163
+ }
164
+ this.stopTimer();
165
+ this.setStatus("idle");
166
+ }
157
167
  };
158
168
  export {
159
169
  AgrasyaVoiceSDK
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agrasya-voice-sdk",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "main": "dist/index.js",
5
5
  "module": "dist/index.mjs",
6
6
  "types": "dist/index.d.ts",
@@ -12,5 +12,22 @@
12
12
  "dev": "tsup src/index.ts --format cjs,esm --watch --dts",
13
13
  "lint": "eslint src/**/*.ts",
14
14
  "test": "jest"
15
+ },
16
+ "devDependencies": {
17
+ "@types/jest": "^29.5.0",
18
+ "@types/webrtc": "^0.0.47",
19
+ "@typescript-eslint/eslint-plugin": "^6.0.0",
20
+ "@typescript-eslint/parser": "^6.0.0",
21
+ "eslint": "^8.50.0",
22
+ "jest": "^29.7.0",
23
+ "jest-environment-jsdom": "^30.3.0",
24
+ "ts-jest": "^29.1.1",
25
+ "tsup": "^8.0.0",
26
+ "typescript": "^5.0.0"
27
+ },
28
+ "dependencies": {
29
+ "dotenv": "^17.3.1",
30
+ "react-native-webrtc": "^124.0.7",
31
+ "socket.io-client": "^4.8.3"
15
32
  }
16
33
  }