@speechall/sdk 1.0.0 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.beads/README.md +81 -0
- package/.beads/config.yaml +62 -0
- package/.beads/issues.jsonl +46 -0
- package/.beads/metadata.json +4 -0
- package/.env.example +5 -0
- package/.fernignore +45 -0
- package/.gitattributes +3 -0
- package/.github/copilot-instructions.md +78 -0
- package/.github/workflows/auto-release-simple.yml.deprecated +106 -0
- package/.github/workflows/auto-release.yml +67 -0
- package/.github/workflows/ci.yml +41 -0
- package/.github/workflows/release.yml +57 -0
- package/AGENTS.md +94 -0
- package/CHANGELOG.md +58 -0
- package/CLAUDE.md +75 -0
- package/README.md +294 -155
- package/examples/CLAUDE.md +136 -0
- package/examples/advanced-options.ts +213 -0
- package/examples/basic-transcription.ts +66 -0
- package/examples/error-handling.ts +251 -0
- package/examples/list-models.ts +112 -0
- package/examples/remote-transcription.ts +60 -0
- package/fern/fern.config.json +4 -0
- package/fern/generators.yml +43 -0
- package/jest.config.js +11 -0
- package/package.json +26 -46
- package/regenerate.sh +45 -0
- package/scripts/fix-generated-code.sh +25 -0
- package/src/BaseClient.ts +82 -0
- package/src/Client.ts +30 -0
- package/src/api/errors/BadRequestError.ts +22 -0
- package/src/api/errors/GatewayTimeoutError.ts +22 -0
- package/src/api/errors/InternalServerError.ts +22 -0
- package/src/api/errors/NotFoundError.ts +22 -0
- package/src/api/errors/PaymentRequiredError.ts +22 -0
- package/src/api/errors/ServiceUnavailableError.ts +22 -0
- package/src/api/errors/TooManyRequestsError.ts +22 -0
- package/src/api/errors/UnauthorizedError.ts +22 -0
- package/src/api/errors/index.ts +8 -0
- package/src/api/index.ts +3 -0
- package/src/api/resources/index.ts +5 -0
- package/src/api/resources/replacementRules/client/Client.ts +148 -0
- package/src/api/resources/replacementRules/client/index.ts +1 -0
- package/src/api/resources/replacementRules/client/requests/CreateReplacementRulesetRequest.ts +25 -0
- package/src/api/resources/replacementRules/client/requests/index.ts +1 -0
- package/src/api/resources/replacementRules/index.ts +2 -0
- package/src/api/resources/replacementRules/types/CreateReplacementRulesetResponse.ts +6 -0
- package/src/api/resources/replacementRules/types/index.ts +1 -0
- package/src/api/resources/speechToText/client/Client.ts +275 -0
- package/src/api/resources/speechToText/client/index.ts +1 -0
- package/src/api/resources/speechToText/client/requests/RemoteTranscriptionConfiguration.ts +20 -0
- package/src/api/resources/speechToText/client/requests/TranscribeRequest.ts +26 -0
- package/src/api/resources/speechToText/client/requests/index.ts +2 -0
- package/src/api/resources/speechToText/index.ts +1 -0
- package/src/api/types/BaseTranscriptionConfiguration.ts +29 -0
- package/src/api/types/ErrorResponse.ts +11 -0
- package/src/api/types/ExactRule.ts +13 -0
- package/src/api/types/RegexGroupRule.ts +28 -0
- package/src/api/types/RegexRule.ts +28 -0
- package/src/api/types/ReplacementRule.ts +25 -0
- package/src/api/types/SpeechToTextModel.ts +90 -0
- package/src/api/types/TranscriptLanguageCode.ts +114 -0
- package/src/api/types/TranscriptOutputFormat.ts +18 -0
- package/src/api/types/TranscriptionDetailed.ts +19 -0
- package/src/api/types/TranscriptionModelIdentifier.ts +80 -0
- package/src/api/types/TranscriptionOnlyText.ts +11 -0
- package/src/api/types/TranscriptionProvider.ts +23 -0
- package/src/api/types/TranscriptionResponse.ts +8 -0
- package/src/api/types/TranscriptionSegment.ts +17 -0
- package/src/api/types/TranscriptionWord.ts +17 -0
- package/src/api/types/index.ts +16 -0
- package/src/auth/BearerAuthProvider.ts +37 -0
- package/src/auth/index.ts +1 -0
- package/src/core/auth/AuthProvider.ts +6 -0
- package/src/core/auth/AuthRequest.ts +9 -0
- package/src/core/auth/BasicAuth.ts +32 -0
- package/src/core/auth/BearerToken.ts +20 -0
- package/src/core/auth/NoOpAuthProvider.ts +8 -0
- package/src/core/auth/index.ts +5 -0
- package/src/core/base64.ts +27 -0
- package/src/core/exports.ts +2 -0
- package/src/core/fetcher/APIResponse.ts +23 -0
- package/src/core/fetcher/BinaryResponse.ts +34 -0
- package/src/core/fetcher/EndpointMetadata.ts +13 -0
- package/src/core/fetcher/EndpointSupplier.ts +14 -0
- package/src/core/fetcher/Fetcher.ts +391 -0
- package/src/core/fetcher/Headers.ts +93 -0
- package/src/core/fetcher/HttpResponsePromise.ts +116 -0
- package/src/core/fetcher/RawResponse.ts +61 -0
- package/src/core/fetcher/Supplier.ts +11 -0
- package/src/core/fetcher/createRequestUrl.ts +6 -0
- package/src/core/fetcher/getErrorResponseBody.ts +33 -0
- package/src/core/fetcher/getFetchFn.ts +3 -0
- package/src/core/fetcher/getHeader.ts +8 -0
- package/src/core/fetcher/getRequestBody.ts +20 -0
- package/src/core/fetcher/getResponseBody.ts +58 -0
- package/src/core/fetcher/index.ts +11 -0
- package/src/core/fetcher/makeRequest.ts +42 -0
- package/src/core/fetcher/requestWithRetries.ts +64 -0
- package/src/core/fetcher/signals.ts +26 -0
- package/src/core/file/exports.ts +1 -0
- package/src/core/file/file.ts +217 -0
- package/src/core/file/index.ts +2 -0
- package/src/core/file/types.ts +81 -0
- package/src/core/headers.ts +35 -0
- package/src/core/index.ts +7 -0
- package/src/core/json.ts +27 -0
- package/src/core/logging/exports.ts +19 -0
- package/src/core/logging/index.ts +1 -0
- package/src/core/logging/logger.ts +203 -0
- package/src/core/runtime/index.ts +1 -0
- package/src/core/runtime/runtime.ts +134 -0
- package/src/core/url/encodePathParam.ts +18 -0
- package/src/core/url/index.ts +3 -0
- package/src/core/url/join.ts +79 -0
- package/src/core/url/qs.ts +74 -0
- package/src/environments.ts +7 -0
- package/src/errors/SpeechallError.ts +58 -0
- package/src/errors/SpeechallTimeoutError.ts +13 -0
- package/src/errors/handleNonStatusCodeError.ts +37 -0
- package/src/errors/index.ts +2 -0
- package/src/exports.ts +1 -0
- package/src/index.ts +6 -0
- package/test-import.ts +17 -0
- package/tests/integration/api.test.ts +93 -0
- package/tests/unit/client.test.ts +91 -0
- package/tsconfig.json +20 -0
- package/dist/api.d.ts +0 -501
- package/dist/api.d.ts.map +0 -1
- package/dist/api.js +0 -610
- package/dist/base.d.ts +0 -32
- package/dist/base.d.ts.map +0 -1
- package/dist/base.js +0 -35
- package/dist/common.d.ts +0 -14
- package/dist/common.d.ts.map +0 -1
- package/dist/common.js +0 -91
- package/dist/configuration.d.ts +0 -23
- package/dist/configuration.d.ts.map +0 -1
- package/dist/configuration.js +0 -25
- package/dist/esm/api.js +0 -592
- package/dist/esm/base.js +0 -27
- package/dist/esm/common.js +0 -79
- package/dist/esm/configuration.js +0 -21
- package/dist/esm/example.js +0 -131
- package/dist/esm/index.js +0 -2
- package/dist/example.d.ts +0 -3
- package/dist/example.d.ts.map +0 -1
- package/dist/example.js +0 -133
- package/dist/index.d.ts +0 -3
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -18
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advanced Options Example
|
|
3
|
+
*
|
|
4
|
+
* This example demonstrates how to use advanced transcription features including:
|
|
5
|
+
* - Speaker diarization (identifying different speakers)
|
|
6
|
+
* - Word-level timestamps
|
|
7
|
+
* - Custom vocabulary
|
|
8
|
+
* - Temperature control
|
|
9
|
+
* - Initial prompts for context
|
|
10
|
+
* - Different output formats (SRT, VTT)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import 'dotenv/config';
|
|
14
|
+
import { SpeechallClient } from '../src/index';
|
|
15
|
+
import * as fs from 'fs';
|
|
16
|
+
import * as path from 'path';
|
|
17
|
+
|
|
18
|
+
async function main() {
|
|
19
|
+
const client = new SpeechallClient({
|
|
20
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const audioFilePath = path.join(__dirname, 'sample-audio.wav');
|
|
24
|
+
|
|
25
|
+
if (!fs.existsSync(audioFilePath)) {
|
|
26
|
+
console.error(`Audio file not found at: ${audioFilePath}`);
|
|
27
|
+
console.error('Please provide a valid audio file path.');
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
console.log('=== Advanced Transcription Examples ===\n');
|
|
32
|
+
|
|
33
|
+
// Example 1: Transcription with speaker diarization and word-level timestamps
|
|
34
|
+
console.log('1. Transcription with Speaker Diarization and Word Timestamps');
|
|
35
|
+
console.log('-----------------------------------------------------------');
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const audioFile1 = fs.createReadStream(audioFilePath);
|
|
39
|
+
|
|
40
|
+
const response1 = await client.speechToText.transcribe(
|
|
41
|
+
audioFile1,
|
|
42
|
+
{
|
|
43
|
+
model: 'deepgram.nova-2', // Deepgram supports advanced features
|
|
44
|
+
language: 'en',
|
|
45
|
+
output_format: 'json', // Detailed output with timestamps
|
|
46
|
+
diarization: true, // Enable speaker identification
|
|
47
|
+
timestamp_granularity: 'word', // Word-level timestamps (vs 'segment')
|
|
48
|
+
punctuation: true,
|
|
49
|
+
}
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
console.log('Result:', JSON.stringify(response1, null, 2));
|
|
53
|
+
console.log('\n');
|
|
54
|
+
|
|
55
|
+
} catch (error) {
|
|
56
|
+
console.error('Example 1 failed:', error);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Example 2: Using custom vocabulary for better recognition
|
|
60
|
+
console.log('2. Transcription with Custom Vocabulary');
|
|
61
|
+
console.log('----------------------------------------');
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
const audioFile2 = fs.createReadStream(audioFilePath);
|
|
65
|
+
|
|
66
|
+
const response2 = await client.speechToText.transcribe(
|
|
67
|
+
audioFile2,
|
|
68
|
+
{
|
|
69
|
+
model: 'assemblyai.best',
|
|
70
|
+
language: 'en',
|
|
71
|
+
output_format: 'json',
|
|
72
|
+
// Provide domain-specific words for better recognition
|
|
73
|
+
custom_vocabulary: ['Speechall', 'API', 'TypeScript', 'SDK'],
|
|
74
|
+
punctuation: true,
|
|
75
|
+
}
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
console.log('Result:', JSON.stringify(response2, null, 2));
|
|
79
|
+
console.log('\n');
|
|
80
|
+
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error('Example 2 failed:', error);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Example 3: Using initial prompt for context and style
|
|
86
|
+
console.log('3. Transcription with Initial Prompt (Context)');
|
|
87
|
+
console.log('-----------------------------------------------');
|
|
88
|
+
|
|
89
|
+
try {
|
|
90
|
+
const audioFile3 = fs.createReadStream(audioFilePath);
|
|
91
|
+
|
|
92
|
+
const response3 = await client.speechToText.transcribe(
|
|
93
|
+
audioFile3,
|
|
94
|
+
{
|
|
95
|
+
model: 'openai.whisper-1',
|
|
96
|
+
language: 'en',
|
|
97
|
+
output_format: 'text',
|
|
98
|
+
// Provide context to improve accuracy and style
|
|
99
|
+
initial_prompt: 'This is a technical discussion about software development and APIs.',
|
|
100
|
+
temperature: 0.2, // Lower temperature for more deterministic output
|
|
101
|
+
}
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
console.log('Result:', response3);
|
|
105
|
+
console.log('\n');
|
|
106
|
+
|
|
107
|
+
} catch (error) {
|
|
108
|
+
console.error('Example 3 failed:', error);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Example 4: Generate SRT subtitles
|
|
112
|
+
console.log('4. Generate SRT Subtitles');
|
|
113
|
+
console.log('-------------------------');
|
|
114
|
+
|
|
115
|
+
try {
|
|
116
|
+
const audioFile4 = fs.createReadStream(audioFilePath);
|
|
117
|
+
|
|
118
|
+
const response4 = await client.speechToText.transcribe(
|
|
119
|
+
audioFile4,
|
|
120
|
+
{
|
|
121
|
+
model: 'openai.whisper-1',
|
|
122
|
+
language: 'en',
|
|
123
|
+
output_format: 'srt', // SRT subtitle format
|
|
124
|
+
}
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
console.log('SRT Output:');
|
|
128
|
+
console.log(response4);
|
|
129
|
+
console.log('\n');
|
|
130
|
+
|
|
131
|
+
// You can save this to a file
|
|
132
|
+
// fs.writeFileSync('subtitles.srt', response4 as string);
|
|
133
|
+
|
|
134
|
+
} catch (error) {
|
|
135
|
+
console.error('Example 4 failed:', error);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Example 5: Generate VTT subtitles
|
|
139
|
+
console.log('5. Generate VTT Subtitles');
|
|
140
|
+
console.log('-------------------------');
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const audioFile5 = fs.createReadStream(audioFilePath);
|
|
144
|
+
|
|
145
|
+
const response5 = await client.speechToText.transcribe(
|
|
146
|
+
audioFile5,
|
|
147
|
+
{
|
|
148
|
+
model: 'openai.whisper-1',
|
|
149
|
+
language: 'en',
|
|
150
|
+
output_format: 'vtt', // WebVTT subtitle format
|
|
151
|
+
}
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
console.log('VTT Output:');
|
|
155
|
+
console.log(response5);
|
|
156
|
+
console.log('\n');
|
|
157
|
+
|
|
158
|
+
} catch (error) {
|
|
159
|
+
console.error('Example 5 failed:', error);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Example 6: Using smart formatting (Deepgram feature)
|
|
163
|
+
console.log('6. Transcription with Smart Formatting');
|
|
164
|
+
console.log('---------------------------------------');
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
const audioFile6 = fs.createReadStream(audioFilePath);
|
|
168
|
+
|
|
169
|
+
const response6 = await client.speechToText.transcribe(
|
|
170
|
+
audioFile6,
|
|
171
|
+
{
|
|
172
|
+
model: 'deepgram.nova-2',
|
|
173
|
+
language: 'en',
|
|
174
|
+
output_format: 'json',
|
|
175
|
+
smart_format: true, // Format numbers, dates, currency, etc.
|
|
176
|
+
punctuation: true,
|
|
177
|
+
}
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
console.log('Result:', JSON.stringify(response6, null, 2));
|
|
181
|
+
console.log('\n');
|
|
182
|
+
|
|
183
|
+
} catch (error) {
|
|
184
|
+
console.error('Example 6 failed:', error);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Example 7: Specify expected number of speakers
|
|
188
|
+
console.log('7. Transcription with Expected Speaker Count');
|
|
189
|
+
console.log('---------------------------------------------');
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
const audioFile7 = fs.createReadStream(audioFilePath);
|
|
193
|
+
|
|
194
|
+
const response7 = await client.speechToText.transcribe(
|
|
195
|
+
audioFile7,
|
|
196
|
+
{
|
|
197
|
+
model: 'deepgram.nova-2',
|
|
198
|
+
language: 'en',
|
|
199
|
+
output_format: 'json',
|
|
200
|
+
diarization: true,
|
|
201
|
+
speakers_expected: 2, // Hint: expect 2 speakers in the audio
|
|
202
|
+
}
|
|
203
|
+
);
|
|
204
|
+
|
|
205
|
+
console.log('Result:', JSON.stringify(response7, null, 2));
|
|
206
|
+
|
|
207
|
+
} catch (error) {
|
|
208
|
+
console.error('Example 7 failed:', error);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Run the examples
|
|
213
|
+
main();
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Basic Transcription Example
|
|
3
|
+
*
|
|
4
|
+
* This example demonstrates how to transcribe a local audio file using the Speechall SDK.
|
|
5
|
+
* It shows the simplest way to get started with speech-to-text transcription.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import 'dotenv/config';
|
|
9
|
+
import { SpeechallClient } from '../src/index';
|
|
10
|
+
import * as fs from 'fs';
|
|
11
|
+
import * as path from 'path';
|
|
12
|
+
|
|
13
|
+
async function main() {
|
|
14
|
+
// Initialize the Speechall client with your API token
|
|
15
|
+
const client = new SpeechallClient({
|
|
16
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
// Path to your audio file
|
|
20
|
+
// Replace this with the path to your actual audio file
|
|
21
|
+
const audioFilePath = path.join(__dirname, 'sample-audio.wav');
|
|
22
|
+
|
|
23
|
+
// Check if file exists
|
|
24
|
+
if (!fs.existsSync(audioFilePath)) {
|
|
25
|
+
console.error(`Audio file not found at: ${audioFilePath}`);
|
|
26
|
+
console.error('Please provide a valid audio file path.');
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
console.log('Transcribing audio file...');
|
|
31
|
+
console.log(`File: ${audioFilePath}`);
|
|
32
|
+
|
|
33
|
+
try {
|
|
34
|
+
// Read the audio file
|
|
35
|
+
const audioFile = fs.createReadStream(audioFilePath);
|
|
36
|
+
|
|
37
|
+
// Transcribe the audio using the default settings
|
|
38
|
+
// This will return plain text output
|
|
39
|
+
const response = await client.speechToText.transcribe(
|
|
40
|
+
audioFile,
|
|
41
|
+
{
|
|
42
|
+
model: 'openai.whisper-1', // Using OpenAI's Whisper model
|
|
43
|
+
language: 'en', // English language
|
|
44
|
+
output_format: 'text', // Plain text output
|
|
45
|
+
}
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
// Display the transcription result
|
|
49
|
+
console.log('\n--- Transcription Result ---');
|
|
50
|
+
|
|
51
|
+
// The response can be either a string (text format) or an object (json format)
|
|
52
|
+
if (typeof response === 'string') {
|
|
53
|
+
console.log(response);
|
|
54
|
+
} else {
|
|
55
|
+
console.log(JSON.stringify(response, null, 2));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
} catch (error) {
|
|
59
|
+
console.error('Transcription failed:');
|
|
60
|
+
console.error(error);
|
|
61
|
+
process.exit(1);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Run the example
|
|
66
|
+
main();
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Error Handling Example
|
|
3
|
+
*
|
|
4
|
+
* This example demonstrates proper error handling when using the Speechall SDK.
|
|
5
|
+
* It shows how to catch and handle different types of errors that may occur
|
|
6
|
+
* during API calls.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import 'dotenv/config';
|
|
10
|
+
import { SpeechallClient, SpeechallError } from '../src/index';
|
|
11
|
+
import * as Speechall from '../src/api/index';
|
|
12
|
+
import * as fs from 'fs';
|
|
13
|
+
import * as path from 'path';
|
|
14
|
+
|
|
15
|
+
async function main() {
|
|
16
|
+
console.log('=== Error Handling Examples ===\n');
|
|
17
|
+
|
|
18
|
+
// Example 1: Handling authentication errors
|
|
19
|
+
console.log('1. Authentication Error');
|
|
20
|
+
console.log('-----------------------');
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
const clientWithBadToken = new SpeechallClient({
|
|
24
|
+
token: 'invalid-token',
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
const audioFile = fs.createReadStream(path.join(__dirname, 'sample-audio.wav'));
|
|
28
|
+
|
|
29
|
+
await clientWithBadToken.speechToText.transcribe(
|
|
30
|
+
audioFile,
|
|
31
|
+
{
|
|
32
|
+
model: 'openai.whisper-1',
|
|
33
|
+
}
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
} catch (error) {
|
|
37
|
+
if (error instanceof Speechall.UnauthorizedError) {
|
|
38
|
+
console.error('Authentication failed!');
|
|
39
|
+
console.error('Status:', error.statusCode);
|
|
40
|
+
console.error('Message:', error.message);
|
|
41
|
+
console.error('Details:', error.body);
|
|
42
|
+
} else {
|
|
43
|
+
console.error('Unexpected error:', error);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
console.log('\n');
|
|
47
|
+
|
|
48
|
+
// Example 2: Handling bad request errors (invalid parameters)
|
|
49
|
+
console.log('2. Bad Request Error');
|
|
50
|
+
console.log('--------------------');
|
|
51
|
+
|
|
52
|
+
try {
|
|
53
|
+
const client = new SpeechallClient({
|
|
54
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const audioFile = fs.createReadStream(path.join(__dirname, 'sample-audio.wav'));
|
|
58
|
+
|
|
59
|
+
// Intentionally use an invalid model to trigger an error
|
|
60
|
+
await client.speechToText.transcribe(
|
|
61
|
+
audioFile,
|
|
62
|
+
{
|
|
63
|
+
model: 'invalid.model' as any,
|
|
64
|
+
}
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
} catch (error) {
|
|
68
|
+
if (error instanceof Speechall.BadRequestError) {
|
|
69
|
+
console.error('Bad request!');
|
|
70
|
+
console.error('Status:', error.statusCode);
|
|
71
|
+
console.error('Message:', error.message);
|
|
72
|
+
console.error('Details:', error.body);
|
|
73
|
+
} else if (error instanceof SpeechallError) {
|
|
74
|
+
console.error('API Error:', error.message);
|
|
75
|
+
} else {
|
|
76
|
+
console.error('Unexpected error:', error);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
console.log('\n');
|
|
80
|
+
|
|
81
|
+
// Example 3: Handling rate limit errors
|
|
82
|
+
console.log('3. Rate Limit Error');
|
|
83
|
+
console.log('-------------------');
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
const client = new SpeechallClient({
|
|
87
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Make multiple rapid requests to potentially trigger rate limiting
|
|
91
|
+
// (In real scenarios, you'd implement proper rate limiting on your side)
|
|
92
|
+
const audioFile = fs.createReadStream(path.join(__dirname, 'sample-audio.wav'));
|
|
93
|
+
|
|
94
|
+
await client.speechToText.transcribe(
|
|
95
|
+
audioFile,
|
|
96
|
+
{
|
|
97
|
+
model: 'openai.whisper-1',
|
|
98
|
+
}
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
} catch (error) {
|
|
102
|
+
if (error instanceof Speechall.TooManyRequestsError) {
|
|
103
|
+
console.error('Rate limit exceeded!');
|
|
104
|
+
console.error('Status:', error.statusCode);
|
|
105
|
+
console.error('Message:', error.message);
|
|
106
|
+
console.error('Please wait before retrying.');
|
|
107
|
+
// In production, implement exponential backoff
|
|
108
|
+
} else {
|
|
109
|
+
console.error('Error:', error);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
console.log('\n');
|
|
113
|
+
|
|
114
|
+
// Example 4: Handling file not found errors
|
|
115
|
+
console.log('4. File Not Found Error');
|
|
116
|
+
console.log('-----------------------');
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
const client = new SpeechallClient({
|
|
120
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Try to transcribe a non-existent remote file
|
|
124
|
+
await client.speechToText.transcribeRemote({
|
|
125
|
+
file_url: 'https://dss-kiel.de/images/media_center/signals/lombard/male_0_kmh.mp3',
|
|
126
|
+
model: 'openai.whisper-1',
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
} catch (error) {
|
|
130
|
+
if (error instanceof Speechall.NotFoundError) {
|
|
131
|
+
console.error('Resource not found!');
|
|
132
|
+
console.error('Status:', error.statusCode);
|
|
133
|
+
console.error('Message:', error.message);
|
|
134
|
+
} else {
|
|
135
|
+
console.error('Error:', error);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
console.log('\n');
|
|
139
|
+
|
|
140
|
+
// Example 5: Comprehensive error handling with retry logic
|
|
141
|
+
console.log('5. Comprehensive Error Handling with Retry');
|
|
142
|
+
console.log('-------------------------------------------');
|
|
143
|
+
|
|
144
|
+
await transcribeWithRetry(
|
|
145
|
+
path.join(__dirname, 'sample-audio.wav'),
|
|
146
|
+
3 // Max retries
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Helper function demonstrating retry logic with exponential backoff
|
|
152
|
+
*/
|
|
153
|
+
async function transcribeWithRetry(
|
|
154
|
+
audioPath: string,
|
|
155
|
+
maxRetries: number = 3
|
|
156
|
+
): Promise<void> {
|
|
157
|
+
const client = new SpeechallClient({
|
|
158
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
let retries = 0;
|
|
162
|
+
let delay = 1000; // Start with 1 second delay
|
|
163
|
+
|
|
164
|
+
while (retries < maxRetries) {
|
|
165
|
+
try {
|
|
166
|
+
if (!fs.existsSync(audioPath)) {
|
|
167
|
+
console.log('Audio file not found. Creating a dummy request for demonstration.');
|
|
168
|
+
// For demo purposes, just show the error handling structure
|
|
169
|
+
throw new Error('File not found');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const audioFile = fs.createReadStream(audioPath);
|
|
173
|
+
|
|
174
|
+
const response = await client.speechToText.transcribe(
|
|
175
|
+
audioFile,
|
|
176
|
+
{
|
|
177
|
+
model: 'openai.whisper-1',
|
|
178
|
+
language: 'en',
|
|
179
|
+
}
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
console.log('Transcription successful!');
|
|
183
|
+
console.log('Result:', response);
|
|
184
|
+
return; // Success, exit the function
|
|
185
|
+
|
|
186
|
+
} catch (error) {
|
|
187
|
+
retries++;
|
|
188
|
+
|
|
189
|
+
// Handle different error types
|
|
190
|
+
if (error instanceof Speechall.TooManyRequestsError) {
|
|
191
|
+
console.error(`Rate limited. Retry ${retries}/${maxRetries} after ${delay}ms...`);
|
|
192
|
+
|
|
193
|
+
if (retries < maxRetries) {
|
|
194
|
+
await sleep(delay);
|
|
195
|
+
delay *= 2; // Exponential backoff
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
} else if (error instanceof Speechall.InternalServerError ||
|
|
199
|
+
error instanceof Speechall.ServiceUnavailableError ||
|
|
200
|
+
error instanceof Speechall.GatewayTimeoutError) {
|
|
201
|
+
console.error(`Server error. Retry ${retries}/${maxRetries} after ${delay}ms...`);
|
|
202
|
+
console.error('Error:', error.message);
|
|
203
|
+
|
|
204
|
+
if (retries < maxRetries) {
|
|
205
|
+
await sleep(delay);
|
|
206
|
+
delay *= 2; // Exponential backoff
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
} else if (error instanceof Speechall.UnauthorizedError) {
|
|
210
|
+
console.error('Authentication failed. Cannot retry.');
|
|
211
|
+
console.error('Please check your API key.');
|
|
212
|
+
throw error; // Don't retry authentication errors
|
|
213
|
+
|
|
214
|
+
} else if (error instanceof Speechall.BadRequestError) {
|
|
215
|
+
console.error('Bad request. Cannot retry.');
|
|
216
|
+
console.error('Please check your request parameters.');
|
|
217
|
+
throw error; // Don't retry bad requests
|
|
218
|
+
|
|
219
|
+
} else if (error instanceof Speechall.PaymentRequiredError) {
|
|
220
|
+
console.error('Payment required. Cannot retry.');
|
|
221
|
+
console.error('Please check your account billing.');
|
|
222
|
+
throw error; // Don't retry payment errors
|
|
223
|
+
|
|
224
|
+
} else if (error instanceof SpeechallError) {
|
|
225
|
+
console.error('API Error:', error.message);
|
|
226
|
+
throw error;
|
|
227
|
+
|
|
228
|
+
} else {
|
|
229
|
+
console.error('Unexpected error:', error);
|
|
230
|
+
throw error;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
console.error(`Max retries (${maxRetries}) exceeded. Giving up.`);
|
|
236
|
+
throw new Error('Transcription failed after retries');
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Helper function to sleep for a given duration
|
|
241
|
+
*/
|
|
242
|
+
function sleep(ms: number): Promise<void> {
|
|
243
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Run the examples
|
|
247
|
+
main().catch(error => {
|
|
248
|
+
console.error('Example script failed:');
|
|
249
|
+
console.error(error);
|
|
250
|
+
process.exit(1);
|
|
251
|
+
});
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* List Available Models Example
|
|
3
|
+
*
|
|
4
|
+
* This example demonstrates how to retrieve and display all available
|
|
5
|
+
* speech-to-text models from the Speechall API.
|
|
6
|
+
* Use this to discover what models are available and their capabilities.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import 'dotenv/config';
|
|
10
|
+
import { SpeechallClient } from '../src/index';
|
|
11
|
+
|
|
12
|
+
async function main() {
|
|
13
|
+
// Initialize the Speechall client with your API token
|
|
14
|
+
const client = new SpeechallClient({
|
|
15
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
console.log('Fetching available speech-to-text models...\n');
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
// Get the list of all available models
|
|
22
|
+
const models = await client.speechToText.listSpeechToTextModels();
|
|
23
|
+
|
|
24
|
+
console.log(`Found ${models.length} available models:\n`);
|
|
25
|
+
console.log('='.repeat(80));
|
|
26
|
+
|
|
27
|
+
// Display each model's information
|
|
28
|
+
models.forEach((model, index) => {
|
|
29
|
+
console.log(`\n${index + 1}. ${model.display_name}`);
|
|
30
|
+
console.log('-'.repeat(80));
|
|
31
|
+
|
|
32
|
+
// Display model identifier
|
|
33
|
+
if (model.id) {
|
|
34
|
+
console.log(` Identifier: ${model.id}`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Display provider
|
|
38
|
+
if (model.provider) {
|
|
39
|
+
console.log(` Provider: ${model.provider}`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Display description
|
|
43
|
+
if (model.description) {
|
|
44
|
+
console.log(` Description: ${model.description}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Display supported languages
|
|
48
|
+
if (model.supported_languages && Array.isArray(model.supported_languages)) {
|
|
49
|
+
console.log(` Supported Languages: ${model.supported_languages.join(', ')}`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Display cost
|
|
53
|
+
if (model.cost_per_second_usd) {
|
|
54
|
+
console.log(` Cost per second: $${model.cost_per_second_usd}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Display availability
|
|
58
|
+
console.log(` Available: ${model.is_available ? 'Yes' : 'No'}`);
|
|
59
|
+
|
|
60
|
+
// Display feature support
|
|
61
|
+
console.log(' Features:');
|
|
62
|
+
|
|
63
|
+
if (typeof model.punctuation === 'boolean') {
|
|
64
|
+
console.log(` - Punctuation: ${model.punctuation ? 'Yes' : 'No'}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (typeof model.diarization === 'boolean') {
|
|
68
|
+
console.log(` - Speaker Diarization: ${model.diarization ? 'Yes' : 'No'}`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (typeof model.word_timestamps === 'boolean') {
|
|
72
|
+
console.log(` - Word Timestamps: ${model.word_timestamps ? 'Yes' : 'No'}`);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (typeof model.custom_vocabulary_support === 'boolean') {
|
|
76
|
+
console.log(` - Custom Vocabulary: ${model.custom_vocabulary_support ? 'Yes' : 'No'}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (typeof model.streamable === 'boolean') {
|
|
80
|
+
console.log(` - Streamable: ${model.streamable ? 'Yes' : 'No'}`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Display subtitle format support
|
|
84
|
+
console.log(` - SRT Format: ${model.supports_srt ? 'Yes' : 'No'}`);
|
|
85
|
+
console.log(` - VTT Format: ${model.supports_vtt ? 'Yes' : 'No'}`);
|
|
86
|
+
|
|
87
|
+
// Display performance characteristics
|
|
88
|
+
if (model.real_time_factor) {
|
|
89
|
+
console.log(` Real-time Factor: ${model.real_time_factor}x`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (model.accuracy_tier) {
|
|
93
|
+
console.log(` Accuracy Tier: ${model.accuracy_tier}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (model.model_type) {
|
|
97
|
+
console.log(` Model Type: ${model.model_type}`);
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
console.log('\n' + '='.repeat(80));
|
|
102
|
+
console.log('\nTo use a model, reference it by its identifier (e.g., "openai.whisper-1")');
|
|
103
|
+
|
|
104
|
+
} catch (error) {
|
|
105
|
+
console.error('Failed to fetch models:');
|
|
106
|
+
console.error(error);
|
|
107
|
+
process.exit(1);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Run the example
|
|
112
|
+
main();
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Remote Transcription Example
|
|
3
|
+
*
|
|
4
|
+
* This example demonstrates how to transcribe an audio file from a publicly accessible URL.
|
|
5
|
+
* This is useful when your audio files are already hosted online (e.g., on S3, CDN, etc.).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import 'dotenv/config';
|
|
9
|
+
import { SpeechallClient } from '../src/index';
|
|
10
|
+
|
|
11
|
+
async function main() {
|
|
12
|
+
// Initialize the Speechall client with your API token
|
|
13
|
+
const client = new SpeechallClient({
|
|
14
|
+
token: process.env.SPEECHALL_API_KEY!,
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
// URL of the audio file to transcribe
|
|
18
|
+
// Replace this with your own publicly accessible audio URL
|
|
19
|
+
const audioUrl = 'https://example.com/path/to/audio.mp3';
|
|
20
|
+
|
|
21
|
+
console.log('Transcribing remote audio file...');
|
|
22
|
+
console.log(`URL: ${audioUrl}`);
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
// Transcribe the remote audio file
|
|
26
|
+
const response = await client.speechToText.transcribeRemote({
|
|
27
|
+
file_url: audioUrl,
|
|
28
|
+
model: 'openai.whisper-1',
|
|
29
|
+
language: 'en',
|
|
30
|
+
output_format: 'json', // Request JSON output for detailed information
|
|
31
|
+
punctuation: true, // Enable automatic punctuation
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
console.log('\n--- Transcription Result ---');
|
|
35
|
+
|
|
36
|
+
// When using JSON output format, the response contains detailed information
|
|
37
|
+
if (typeof response === 'string') {
|
|
38
|
+
console.log(response);
|
|
39
|
+
} else {
|
|
40
|
+
// The response object structure depends on the output format
|
|
41
|
+
// For 'json' format, it includes the text and metadata
|
|
42
|
+
console.log('Full response:');
|
|
43
|
+
console.log(JSON.stringify(response, null, 2));
|
|
44
|
+
|
|
45
|
+
// If the response has a text property, display it separately
|
|
46
|
+
if ('text' in response) {
|
|
47
|
+
console.log('\n--- Text Only ---');
|
|
48
|
+
console.log(response.text);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
} catch (error) {
|
|
53
|
+
console.error('Transcription failed:');
|
|
54
|
+
console.error(error);
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Run the example
|
|
60
|
+
main();
|