whisperai-sdk 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -110
- package/dist/client.d.ts +31 -16
- package/dist/constant.d.ts +4 -0
- package/dist/errors.d.ts +17 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +461 -113
- package/dist/types.d.ts +138 -94
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# whisperai-sdk
|
|
2
2
|
|
|
3
|
-
Unofficial TypeScript SDK for [WhisperAI](https://whisperai.com/).
|
|
3
|
+
Unofficial TypeScript SDK for [WhisperAI](https://whisperai.com/). Version 2 uses WhisperAI's signed Google Cloud Storage resumable-upload flow and requires Node.js 22 or newer.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
> This project is not affiliated with WhisperAI.
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -10,152 +10,126 @@ Unofficial TypeScript SDK for [WhisperAI](https://whisperai.com/). This package
|
|
|
10
10
|
npm install whisperai-sdk
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
##
|
|
13
|
+
## Transcribe a file
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
Initialize the client with your WhisperAI credentials.
|
|
15
|
+
`transcribe()` performs the complete operation: authentication, upload, retries, finalization, status polling, and fetching the completed transcription.
|
|
18
16
|
|
|
19
17
|
```typescript
|
|
20
|
-
import {
|
|
18
|
+
import { readFile } from "node:fs/promises"
|
|
19
|
+
import { WhisperClient } from "whisperai-sdk"
|
|
21
20
|
|
|
22
21
|
const client = new WhisperClient({
|
|
23
22
|
login: {
|
|
24
|
-
email:
|
|
25
|
-
password:
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// whisperUrl: "https://whisperai.com",
|
|
29
|
-
// chunkSize: 8 * 1024 * 1024 // 8MB
|
|
30
|
-
});
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
The client handles authentication automatically. It will log in on the first request and refresh the session if the token expires.
|
|
23
|
+
email: process.env.WHISPER_EMAIL!,
|
|
24
|
+
password: process.env.WHISPER_PASSWORD!
|
|
25
|
+
}
|
|
26
|
+
})
|
|
34
27
|
|
|
35
|
-
|
|
28
|
+
const audio = new Uint8Array(await readFile("./interview.m4a"))
|
|
29
|
+
const recording = await client.transcribe(audio, {
|
|
30
|
+
filename: "interview.m4a",
|
|
31
|
+
mimeType: "audio/x-m4a",
|
|
32
|
+
durationSeconds: 120
|
|
33
|
+
})
|
|
36
34
|
|
|
37
|
-
|
|
35
|
+
console.log(recording.transcription.content)
|
|
36
|
+
```
|
|
38
37
|
|
|
39
|
-
|
|
38
|
+
The default processing timeout is 30 minutes and the default polling interval is 2 seconds.
|
|
40
39
|
|
|
41
40
|
```typescript
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
console.log(`
|
|
49
|
-
|
|
50
|
-
// Get subscription details
|
|
51
|
-
const subscription = await client.subscriptionDetails();
|
|
41
|
+
const controller = new AbortController()
|
|
42
|
+
|
|
43
|
+
const recording = await client.transcribe(audio, metadata, {
|
|
44
|
+
timeoutMs: 45 * 60 * 1000,
|
|
45
|
+
pollIntervalMs: 3000,
|
|
46
|
+
signal: controller.signal,
|
|
47
|
+
onProgress: percentage => console.log(`Upload: ${percentage}%`)
|
|
48
|
+
})
|
|
52
49
|
```
|
|
53
50
|
|
|
54
|
-
|
|
51
|
+
## Streams
|
|
55
52
|
|
|
56
|
-
|
|
53
|
+
For streaming uploads, provide `totalSize` so the SDK can upload without buffering the entire file. If it is omitted, the stream is buffered first to determine its size.
|
|
57
54
|
|
|
58
55
|
```typescript
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
const result = await client.upload(buffer, {
|
|
66
|
-
filename: 'interview.mp3',
|
|
67
|
-
durationSeconds: 120, // Total duration in seconds
|
|
68
|
-
mimeType: 'audio/mpeg', // Optional
|
|
69
|
-
title: 'Interview with John Doe', // Optional
|
|
70
|
-
enableSpeakerDetection: true, // Optional
|
|
71
|
-
speakerCount: 'auto' // Optional: 'auto' or number
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
console.log(`Uploaded recording ID: ${result.id}`);
|
|
56
|
+
const recording = await client.transcribe(stream, {
|
|
57
|
+
filename: "meeting.webm",
|
|
58
|
+
mimeType: "audio/webm",
|
|
59
|
+
durationSeconds: 900,
|
|
60
|
+
totalSize: contentLength
|
|
61
|
+
})
|
|
75
62
|
```
|
|
76
63
|
|
|
77
|
-
|
|
64
|
+
## Start without waiting
|
|
78
65
|
|
|
79
|
-
|
|
66
|
+
Queue workers can upload and return immediately, then check the recording later.
|
|
80
67
|
|
|
81
68
|
```typescript
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
// Check recording status and get transcription result
|
|
88
|
-
const recording = await client.recording(recordingId);
|
|
89
|
-
|
|
90
|
-
if (recording.status === 'completed' && recording.transcription) {
|
|
91
|
-
console.log(recording.transcription.content);
|
|
92
|
-
|
|
93
|
-
// Access segments with timestamps
|
|
94
|
-
recording.transcription.segments.forEach(segment => {
|
|
95
|
-
console.log(`[${segment.start} - ${segment.end}]: ${segment.text}`);
|
|
96
|
-
});
|
|
97
|
-
}
|
|
69
|
+
const started = await client.startTranscription(audio, metadata)
|
|
70
|
+
console.log(started.id, started.status) // processing
|
|
71
|
+
|
|
72
|
+
const statuses = await client.recordingStatus([started.id])
|
|
73
|
+
const completed = await client.waitForTranscription(started.id)
|
|
98
74
|
```
|
|
99
75
|
|
|
100
|
-
|
|
76
|
+
`requestTranscription(recordingId)` is available for explicitly restarting or recovering an existing recording. A normal signed upload starts processing when the upload is completed, so it does not need this extra call.
|
|
101
77
|
|
|
102
|
-
|
|
78
|
+
## Upload metadata
|
|
103
79
|
|
|
104
|
-
|
|
105
|
-
const recordingId = 12345;
|
|
80
|
+
The SDK accepts the current WhisperAI transcription settings:
|
|
106
81
|
|
|
107
|
-
|
|
108
|
-
|
|
82
|
+
```typescript
|
|
83
|
+
await client.transcribe(audio, {
|
|
84
|
+
filename: "interview.m4a",
|
|
85
|
+
durationSeconds: 120,
|
|
86
|
+
language: "multi-auto",
|
|
87
|
+
enableSpeakerDetection: true,
|
|
88
|
+
speakerCount: "auto",
|
|
89
|
+
transcriptionStyle: "clean_readable",
|
|
90
|
+
importantTerms: "WhisperAI, Codex",
|
|
91
|
+
customPrompt: "Technical product interview",
|
|
92
|
+
speakerIdentificationEnabled: true,
|
|
93
|
+
speakerIdentificationMode: "role",
|
|
94
|
+
speakerIdentificationValues: ["Interviewer", "Guest"]
|
|
95
|
+
})
|
|
109
96
|
```
|
|
110
97
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
List and retrieve recordings.
|
|
98
|
+
## Other methods
|
|
114
99
|
|
|
115
100
|
```typescript
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
// search: "interview", // Optional search query
|
|
124
|
-
// status: "completed" // Optional status filter
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
console.log(`Found ${recordingsList.meta.totalItems} recordings`);
|
|
101
|
+
await client.user()
|
|
102
|
+
await client.usage()
|
|
103
|
+
await client.subscriptionDetails()
|
|
104
|
+
await client.recording(recordingId)
|
|
105
|
+
await client.recordings({ limit: 20, sort: "newest" })
|
|
106
|
+
await client.summary()
|
|
107
|
+
await client.translate(recordingId, "es")
|
|
128
108
|
```
|
|
129
109
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
Get a summary of your activity.
|
|
110
|
+
## Errors
|
|
133
111
|
|
|
134
112
|
```typescript
|
|
135
|
-
|
|
136
|
-
|
|
113
|
+
import {
|
|
114
|
+
WhisperApiError,
|
|
115
|
+
WhisperAuthError,
|
|
116
|
+
WhisperNetworkError,
|
|
117
|
+
WhisperTimeoutError,
|
|
118
|
+
WhisperTranscriptionError,
|
|
119
|
+
WhisperUploadError
|
|
120
|
+
} from "whisperai-sdk"
|
|
137
121
|
```
|
|
138
122
|
|
|
139
|
-
|
|
123
|
+
Upload diagnostics are enabled by default and sent best-effort to WhisperAI. Disable them globally with `diagnostics: false` in `ClientOptions`, or per operation with `{ diagnostics: false }`.
|
|
140
124
|
|
|
141
|
-
|
|
125
|
+
## Live smoke test
|
|
142
126
|
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if (error instanceof WhisperAuthError) {
|
|
150
|
-
console.error("Authentication failed. Check credentials.");
|
|
151
|
-
} else if (error instanceof WhisperNetworkError) {
|
|
152
|
-
console.error("Network issue.");
|
|
153
|
-
} else if (error instanceof WhisperApiError) {
|
|
154
|
-
console.error(`API Error ${error.status}: ${JSON.stringify(error.data)}`);
|
|
155
|
-
} else {
|
|
156
|
-
console.error("Unknown error:", error);
|
|
157
|
-
}
|
|
158
|
-
}
|
|
127
|
+
```bash
|
|
128
|
+
WHISPER_EMAIL=... \
|
|
129
|
+
WHISPER_PASSWORD=... \
|
|
130
|
+
WHISPER_AUDIO_PATH=./sample.m4a \
|
|
131
|
+
WHISPER_AUDIO_DURATION_SECONDS=10 \
|
|
132
|
+
bun test test/live.test.ts
|
|
159
133
|
```
|
|
160
134
|
|
|
161
135
|
## License
|
package/dist/client.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import type { ClientOptions,
|
|
1
|
+
import type { ClientOptions, CompletedRecordingResponse, FinalizeUploadResponse, InitMetaFile, OperationOptions, RecordingResponse, RecordingsQuery, RecordingsResponse, RecordingStatusResponse, SubscriptionDetailsResponse, SummaryResponse, TranscribeOptions, TranscriptionResponse, TranslateResponse, UsageInfo, UserInfo } from "./types.js";
|
|
2
|
+
type AudioInput = Uint8Array | ReadableStream<Uint8Array>;
|
|
2
3
|
export declare class WhisperClient {
|
|
3
4
|
private cookies?;
|
|
4
5
|
private readonly clientOptions;
|
|
@@ -7,36 +8,50 @@ export declare class WhisperClient {
|
|
|
7
8
|
user(): Promise<UserInfo>;
|
|
8
9
|
usage(): Promise<UsageInfo>;
|
|
9
10
|
subscriptionDetails(): Promise<SubscriptionDetailsResponse>;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
startTranscription(file: AudioInput, meta: InitMetaFile, options?: OperationOptions): Promise<FinalizeUploadResponse>;
|
|
12
|
+
transcribe(file: AudioInput, meta: InitMetaFile, options?: TranscribeOptions): Promise<CompletedRecordingResponse>;
|
|
13
|
+
requestTranscription(recordingId: number, signal?: AbortSignal): Promise<TranscriptionResponse>;
|
|
14
|
+
recordingStatus(recordingIds: number[], signal?: AbortSignal): Promise<RecordingStatusResponse[]>;
|
|
15
|
+
waitForTranscription(recordingId: number, options?: Pick<TranscribeOptions, "pollIntervalMs" | "timeoutMs" | "signal">): Promise<CompletedRecordingResponse>;
|
|
15
16
|
translate(recordingId: number, language: string): Promise<TranslateResponse>;
|
|
16
|
-
recording(recordingId: number): Promise<RecordingResponse>;
|
|
17
|
+
recording(recordingId: number, signal?: AbortSignal): Promise<RecordingResponse>;
|
|
17
18
|
recordings(query?: RecordingsQuery): Promise<RecordingsResponse>;
|
|
18
19
|
summary(): Promise<SummaryResponse>;
|
|
19
|
-
private
|
|
20
|
-
private
|
|
21
|
-
private
|
|
22
|
-
private
|
|
20
|
+
private buildUploadMetadata;
|
|
21
|
+
private startResumableSession;
|
|
22
|
+
private uploadToGcs;
|
|
23
|
+
private uploadRangeWithRetry;
|
|
24
|
+
private putRange;
|
|
25
|
+
private probeUpload;
|
|
26
|
+
private readChunks;
|
|
23
27
|
private recall;
|
|
24
28
|
private get;
|
|
25
29
|
private post;
|
|
26
|
-
private postForm;
|
|
27
30
|
private request;
|
|
31
|
+
private responseData;
|
|
32
|
+
private sendDiagnostic;
|
|
33
|
+
private isRetryable;
|
|
34
|
+
private retryDelay;
|
|
35
|
+
private sleep;
|
|
36
|
+
private throwIfAborted;
|
|
37
|
+
private nextOffset;
|
|
38
|
+
private toArrayBuffer;
|
|
39
|
+
private errorMessage;
|
|
40
|
+
private withDiagnosticId;
|
|
41
|
+
private createDiagnosticId;
|
|
28
42
|
private get loginLink();
|
|
29
43
|
private get userLink();
|
|
30
44
|
private get usageLink();
|
|
31
45
|
private get subscriptionDetailsLink();
|
|
32
|
-
private get
|
|
33
|
-
private
|
|
34
|
-
private get
|
|
46
|
+
private get signUploadLink();
|
|
47
|
+
private completeUploadLink;
|
|
48
|
+
private get diagnosticsLink();
|
|
35
49
|
private get transcriptionLink();
|
|
50
|
+
private get recordingStatusLink();
|
|
36
51
|
private get summaryLink();
|
|
37
52
|
private get recordingsLink();
|
|
38
53
|
private translateLink;
|
|
39
54
|
private recordingLink;
|
|
40
55
|
private mergeCookies;
|
|
41
|
-
private getCountChunks;
|
|
42
56
|
}
|
|
57
|
+
export {};
|
package/dist/constant.d.ts
CHANGED
|
@@ -7,3 +7,7 @@ export declare enum WhisperStatus {
|
|
|
7
7
|
FAILED = "failed",
|
|
8
8
|
CANCELLED = "cancelled"
|
|
9
9
|
}
|
|
10
|
+
export declare const DEFAULT_UPLOAD_CHUNK_SIZE: number;
|
|
11
|
+
export declare const DEFAULT_MAX_UPLOAD_ATTEMPTS = 5;
|
|
12
|
+
export declare const DEFAULT_POLL_INTERVAL_MS = 2000;
|
|
13
|
+
export declare const DEFAULT_TRANSCRIPTION_TIMEOUT_MS: number;
|
package/dist/errors.d.ts
CHANGED
|
@@ -16,3 +16,20 @@ export declare class WhisperNetworkError extends WhisperError {
|
|
|
16
16
|
readonly code = "NETWORK_ERROR";
|
|
17
17
|
constructor(cause?: unknown);
|
|
18
18
|
}
|
|
19
|
+
export declare class WhisperUploadError extends WhisperError {
|
|
20
|
+
readonly diagnosticId?: string | undefined;
|
|
21
|
+
readonly code = "UPLOAD_ERROR";
|
|
22
|
+
constructor(message: string, diagnosticId?: string | undefined, cause?: unknown);
|
|
23
|
+
}
|
|
24
|
+
export declare class WhisperTranscriptionError extends WhisperError {
|
|
25
|
+
readonly recordingId: number;
|
|
26
|
+
readonly status: string;
|
|
27
|
+
readonly code = "TRANSCRIPTION_ERROR";
|
|
28
|
+
constructor(recordingId: number, status: string);
|
|
29
|
+
}
|
|
30
|
+
export declare class WhisperTimeoutError extends WhisperError {
|
|
31
|
+
readonly recordingId: number;
|
|
32
|
+
readonly timeoutMs: number;
|
|
33
|
+
readonly code = "TIMEOUT_ERROR";
|
|
34
|
+
constructor(recordingId: number, timeoutMs: number);
|
|
35
|
+
}
|
package/dist/index.d.ts
CHANGED