@telnyx/voice-agent-tester 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,138 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+ import OpenAI from 'openai';
5
+
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+
9
+ /**
10
+ * Transcribes audio using OpenAI Whisper API
11
+ * @param {string} wavFilePath - Path to the WAV audio file
12
+ * @returns {Promise<string>} - The transcribed text
13
+ */
14
+ export async function transcribeAudio(wavFilePath) {
15
+ const openai = new OpenAI({
16
+ apiKey: process.env.OPENAI_API_KEY
17
+ });
18
+
19
+ if (!process.env.OPENAI_API_KEY) {
20
+ throw new Error('OPENAI_API_KEY environment variable is required for transcription');
21
+ }
22
+
23
+ try {
24
+ // Create a file stream for OpenAI
25
+ const audioFile = fs.createReadStream(wavFilePath);
26
+
27
+ const transcription = await openai.audio.transcriptions.create({
28
+ file: audioFile,
29
+ model: "whisper-1",
30
+ });
31
+
32
+ return transcription.text;
33
+ } catch (error) {
34
+ console.error('Error transcribing audio:', error);
35
+ throw new Error(`Transcription failed: ${error.message}`);
36
+ }
37
+ }
38
+
39
+ /**
40
+ * Evaluates transcribed text against evaluation criteria using LLM with structured output
41
+ * @param {string} transcription - The transcribed text
42
+ * @param {string} evaluationPrompt - The evaluation criteria
43
+ * @returns {Promise<{score: number, explanation: string}>} - The evaluation score from 0 to 1 and explanation
44
+ */
45
+ export async function evaluateTranscription(transcription, evaluationPrompt) {
46
+ const openai = new OpenAI({
47
+ apiKey: process.env.OPENAI_API_KEY
48
+ });
49
+
50
+ if (!process.env.OPENAI_API_KEY) {
51
+ throw new Error('OPENAI_API_KEY environment variable is required for evaluation');
52
+ }
53
+
54
+ try {
55
+ const response = await openai.chat.completions.create({
56
+ model: "gpt-4o-mini",
57
+ messages: [
58
+ {
59
+ role: "system",
60
+ content: "You are an AI assistant that evaluates transcribed speech against given criteria. Provide a numerical score from 0 to 1, where 0 means the criteria is not met at all and 1 means the criteria is fully met."
61
+ },
62
+ {
63
+ role: "user",
64
+ content: `Evaluation criteria: ${evaluationPrompt}\n\nTranscribed speech: "${transcription}"\n\nPlease evaluate whether the transcribed speech meets the criteria and provide a score from 0 to 1 with a brief explanation.`
65
+ }
66
+ ],
67
+ response_format: {
68
+ type: "json_schema",
69
+ json_schema: {
70
+ name: "transcription_evaluation",
71
+ schema: {
72
+ type: "object",
73
+ properties: {
74
+ score: {
75
+ type: "number",
76
+ description: "A score from 0 to 1 indicating how well the transcription meets the criteria"
77
+ },
78
+ explanation: {
79
+ type: "string",
80
+ description: "A brief explanation of the score"
81
+ }
82
+ },
83
+ required: ["score", "explanation"],
84
+ additionalProperties: false
85
+ },
86
+ strict: true
87
+ }
88
+ },
89
+ temperature: 0.1
90
+ });
91
+
92
+ const result = JSON.parse(response.choices[0].message.content);
93
+ return result;
94
+ } catch (error) {
95
+ console.error('Error evaluating transcription:', error);
96
+ throw new Error(`Evaluation failed: ${error.message}`);
97
+ }
98
+ }
99
+
100
+ /**
101
+ * Converts PCM buffer to WAV format
102
+ * @param {Buffer} pcmBuffer - The PCM audio data
103
+ * @param {number} sampleRate - Sample rate in Hz
104
+ * @param {number} channels - Number of audio channels
105
+ * @param {number} bitsPerSample - Bits per sample (usually 16)
106
+ * @returns {Buffer} - The WAV file buffer
107
+ */
108
+ export function pcmToWav(pcmBuffer, sampleRate, channels, bitsPerSample) {
109
+ const byteRate = sampleRate * channels * bitsPerSample / 8;
110
+ const blockAlign = channels * bitsPerSample / 8;
111
+ const dataSize = pcmBuffer.length;
112
+ const fileSize = 36 + dataSize;
113
+
114
+ const wavBuffer = Buffer.alloc(44 + dataSize);
115
+ let offset = 0;
116
+
117
+ // RIFF chunk descriptor
118
+ wavBuffer.write('RIFF', offset); offset += 4;
119
+ wavBuffer.writeUInt32LE(fileSize, offset); offset += 4;
120
+ wavBuffer.write('WAVE', offset); offset += 4;
121
+
122
+ // fmt sub-chunk
123
+ wavBuffer.write('fmt ', offset); offset += 4;
124
+ wavBuffer.writeUInt32LE(16, offset); offset += 4; // Sub-chunk size
125
+ wavBuffer.writeUInt16LE(1, offset); offset += 2; // Audio format (1 = PCM)
126
+ wavBuffer.writeUInt16LE(channels, offset); offset += 2;
127
+ wavBuffer.writeUInt32LE(sampleRate, offset); offset += 4;
128
+ wavBuffer.writeUInt32LE(byteRate, offset); offset += 4;
129
+ wavBuffer.writeUInt16LE(blockAlign, offset); offset += 2;
130
+ wavBuffer.writeUInt16LE(bitsPerSample, offset); offset += 2;
131
+
132
+ // data sub-chunk
133
+ wavBuffer.write('data', offset); offset += 4;
134
+ wavBuffer.writeUInt32LE(dataSize, offset); offset += 4;
135
+ pcmBuffer.copy(wavBuffer, offset);
136
+
137
+ return wavBuffer;
138
+ }