@ainative/ai-kit-video 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +325 -0
- package/dist/index.d.mts +418 -0
- package/dist/index.d.ts +418 -0
- package/dist/index.js +1329 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +1309 -0
- package/dist/index.mjs.map +1 -0
- package/dist/processing/index.d.mts +292 -0
- package/dist/processing/index.d.ts +292 -0
- package/dist/processing/index.js +308 -0
- package/dist/processing/index.js.map +1 -0
- package/dist/processing/index.mjs +298 -0
- package/dist/processing/index.mjs.map +1 -0
- package/package.json +109 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Timestamp granularity options for Whisper transcription
|
|
3
|
+
*/
|
|
4
|
+
type TimestampGranularity = 'word' | 'segment';
|
|
5
|
+
/**
|
|
6
|
+
* Response format options for Whisper transcription
|
|
7
|
+
*/
|
|
8
|
+
type ResponseFormat = 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt';
|
|
9
|
+
/**
|
|
10
|
+
* Transcription segment with timing information
|
|
11
|
+
*/
|
|
12
|
+
interface TranscriptionSegment$1 {
|
|
13
|
+
id: number;
|
|
14
|
+
start: number;
|
|
15
|
+
end: number;
|
|
16
|
+
text: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Word-level timestamp information
|
|
20
|
+
*/
|
|
21
|
+
interface TranscriptionWord {
|
|
22
|
+
word: string;
|
|
23
|
+
start: number;
|
|
24
|
+
end: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Options for Whisper audio transcription
|
|
28
|
+
*/
|
|
29
|
+
interface TranscriptionOptions {
|
|
30
|
+
/**
|
|
31
|
+
* OpenAI API key for authentication
|
|
32
|
+
*/
|
|
33
|
+
apiKey: string;
|
|
34
|
+
/**
|
|
35
|
+
* The language of the input audio in ISO-639-1 format (e.g., 'en', 'fr', 'es').
|
|
36
|
+
* Providing this can improve accuracy and latency.
|
|
37
|
+
*/
|
|
38
|
+
language?: string;
|
|
39
|
+
/**
|
|
40
|
+
* Optional text to guide the model's style or continue a previous audio segment.
|
|
41
|
+
* The prompt should match the audio language and can be used for:
|
|
42
|
+
* - Maintaining consistent terminology (e.g., "Dr. Smith, AI, NLP")
|
|
43
|
+
* - Speaker identification hints (e.g., "Speaker 1, Speaker 2")
|
|
44
|
+
* - Ensuring proper punctuation and formatting
|
|
45
|
+
*/
|
|
46
|
+
prompt?: string;
|
|
47
|
+
/**
|
|
48
|
+
* The format of the transcript output.
|
|
49
|
+
* - 'json': Returns basic JSON with text only
|
|
50
|
+
* - 'text': Returns plain text
|
|
51
|
+
* - 'srt': Returns SRT subtitle format
|
|
52
|
+
* - 'verbose_json': Returns detailed JSON with timestamps and metadata
|
|
53
|
+
* - 'vtt': Returns WebVTT subtitle format
|
|
54
|
+
*/
|
|
55
|
+
response_format?: ResponseFormat;
|
|
56
|
+
/**
|
|
57
|
+
* The sampling temperature, between 0 and 1.
|
|
58
|
+
* Higher values like 0.8 will make the output more random,
|
|
59
|
+
* while lower values like 0.2 will make it more focused and deterministic.
|
|
60
|
+
*/
|
|
61
|
+
temperature?: number;
|
|
62
|
+
/**
|
|
63
|
+
* The timestamp granularities to populate for this transcription.
|
|
64
|
+
* Can include 'word' and/or 'segment' level timestamps.
|
|
65
|
+
* Note: There is no additional latency for segment timestamps, but word timestamps
|
|
66
|
+
* incur latency.
|
|
67
|
+
*/
|
|
68
|
+
timestamp_granularities?: TimestampGranularity[];
|
|
69
|
+
/**
|
|
70
|
+
* The ID of the model to use. Only 'whisper-1' is currently available.
|
|
71
|
+
*/
|
|
72
|
+
model?: string;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Result of audio transcription
|
|
76
|
+
*/
|
|
77
|
+
interface TranscriptionResult {
|
|
78
|
+
/**
|
|
79
|
+
* The transcribed text
|
|
80
|
+
*/
|
|
81
|
+
text: string;
|
|
82
|
+
/**
|
|
83
|
+
* The language of the transcription (only available with verbose_json)
|
|
84
|
+
*/
|
|
85
|
+
language?: string;
|
|
86
|
+
/**
|
|
87
|
+
* Duration of the audio in seconds (only available with verbose_json)
|
|
88
|
+
*/
|
|
89
|
+
duration?: number;
|
|
90
|
+
/**
|
|
91
|
+
* Segment-level timestamps (only available when timestamp_granularities includes 'segment')
|
|
92
|
+
*/
|
|
93
|
+
segments?: TranscriptionSegment$1[];
|
|
94
|
+
/**
|
|
95
|
+
* Word-level timestamps (only available when timestamp_granularities includes 'word')
|
|
96
|
+
*/
|
|
97
|
+
words?: TranscriptionWord[];
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Transcribe audio using OpenAI's Whisper API
|
|
101
|
+
*
|
|
102
|
+
* @param audioFile - The audio file to transcribe (File or Blob)
|
|
103
|
+
* @param options - Transcription options including API key and Whisper parameters
|
|
104
|
+
* @returns Promise resolving to the transcription result
|
|
105
|
+
*
|
|
106
|
+
* @throws {Error} When API key is missing or API call fails
|
|
107
|
+
*/
|
|
108
|
+
declare function transcribeAudio(audioFile: File | Blob, options: TranscriptionOptions): Promise<TranscriptionResult>;
|
|
109
|
+
/**
|
|
110
|
+
* Utility function to format transcription segments into a readable format
|
|
111
|
+
*
|
|
112
|
+
* @param segments - Array of transcription segments with timestamps
|
|
113
|
+
* @returns Formatted string with timestamps
|
|
114
|
+
*/
|
|
115
|
+
declare function formatSegments(segments?: TranscriptionSegment$1[]): string;
|
|
116
|
+
/**
|
|
117
|
+
* Utility function to extract speaker-labeled text from transcription
|
|
118
|
+
* Note: Whisper doesn't natively support speaker diarization, but you can
|
|
119
|
+
* guide it using prompts like "Speaker 1, Speaker 2" to encourage labeling.
|
|
120
|
+
*
|
|
121
|
+
* @param text - Transcribed text potentially containing speaker labels
|
|
122
|
+
* @returns Array of speaker segments
|
|
123
|
+
*/
|
|
124
|
+
declare function extractSpeakers(text: string): Array<{
|
|
125
|
+
speaker: string;
|
|
126
|
+
text: string;
|
|
127
|
+
}>;
|
|
128
|
+
/**
|
|
129
|
+
* Estimate the cost of transcribing audio based on duration
|
|
130
|
+
* Whisper pricing: $0.006 per minute
|
|
131
|
+
*
|
|
132
|
+
* @param durationSeconds - Duration of audio in seconds
|
|
133
|
+
* @returns Estimated cost in USD
|
|
134
|
+
*/
|
|
135
|
+
declare function estimateTranscriptionCost(durationSeconds: number): number;
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Transcription segment with timing
|
|
139
|
+
*/
|
|
140
|
+
interface TranscriptionSegment {
|
|
141
|
+
/** Transcribed text */
|
|
142
|
+
text: string;
|
|
143
|
+
/** Start timestamp in seconds */
|
|
144
|
+
start: number;
|
|
145
|
+
/** End timestamp in seconds */
|
|
146
|
+
end: number;
|
|
147
|
+
/** Confidence score (0-1) */
|
|
148
|
+
confidence?: number;
|
|
149
|
+
/** Speaker identifier */
|
|
150
|
+
speaker?: string;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Text formatting options
|
|
154
|
+
*/
|
|
155
|
+
interface TextFormattingOptions {
|
|
156
|
+
/** Enable automatic punctuation */
|
|
157
|
+
enablePunctuation?: boolean;
|
|
158
|
+
/** Enable capitalization */
|
|
159
|
+
enableCapitalization?: boolean;
|
|
160
|
+
/** Enable paragraph detection */
|
|
161
|
+
enableParagraphs?: boolean;
|
|
162
|
+
/** Maximum line length */
|
|
163
|
+
maxLineLength?: number;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Text Formatter for Transcription Processing
|
|
168
|
+
* @module @ainative/video/processing
|
|
169
|
+
*/
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* TextFormatter class for applying formatting to transcribed text
|
|
173
|
+
*
|
|
174
|
+
* Supports:
|
|
175
|
+
* - Automatic punctuation
|
|
176
|
+
* - Capitalization
|
|
177
|
+
* - Paragraph detection
|
|
178
|
+
* - Line length management
|
|
179
|
+
*
|
|
180
|
+
* @example
|
|
181
|
+
* ```typescript
|
|
182
|
+
* const formatter = new TextFormatter({
|
|
183
|
+
* enablePunctuation: true,
|
|
184
|
+
* enableCapitalization: true
|
|
185
|
+
* });
|
|
186
|
+
*
|
|
187
|
+
* const formatted = formatter.formatText('hello world');
|
|
188
|
+
* // Result: "Hello world."
|
|
189
|
+
* ```
|
|
190
|
+
*/
|
|
191
|
+
declare class TextFormatter {
|
|
192
|
+
private options;
|
|
193
|
+
/**
|
|
194
|
+
* Question word patterns for detecting questions
|
|
195
|
+
*/
|
|
196
|
+
private static readonly QUESTION_WORDS;
|
|
197
|
+
/**
|
|
198
|
+
* Default formatting options
|
|
199
|
+
*/
|
|
200
|
+
private static readonly DEFAULT_OPTIONS;
|
|
201
|
+
/**
|
|
202
|
+
* Creates a new TextFormatter instance
|
|
203
|
+
*
|
|
204
|
+
* @param options - Formatting options
|
|
205
|
+
*/
|
|
206
|
+
constructor(options?: TextFormattingOptions);
|
|
207
|
+
/**
|
|
208
|
+
* Apply punctuation to text
|
|
209
|
+
*
|
|
210
|
+
* Adds appropriate punctuation marks based on sentence structure:
|
|
211
|
+
* - Periods for statements
|
|
212
|
+
* - Question marks for questions
|
|
213
|
+
* - Preserves existing punctuation
|
|
214
|
+
*
|
|
215
|
+
* @param text - Input text
|
|
216
|
+
* @returns Text with punctuation applied
|
|
217
|
+
*/
|
|
218
|
+
applyPunctuation(text: string): string;
|
|
219
|
+
/**
|
|
220
|
+
* Apply capitalization to text
|
|
221
|
+
*
|
|
222
|
+
* Capitalizes:
|
|
223
|
+
* - First letter of text
|
|
224
|
+
* - Letters after sentence-ending punctuation
|
|
225
|
+
*
|
|
226
|
+
* @param text - Input text
|
|
227
|
+
* @returns Text with capitalization applied
|
|
228
|
+
*/
|
|
229
|
+
applyCapitalization(text: string): string;
|
|
230
|
+
/**
|
|
231
|
+
* Format a single text string with all enabled options
|
|
232
|
+
*
|
|
233
|
+
* @param text - Input text
|
|
234
|
+
* @returns Formatted text
|
|
235
|
+
*/
|
|
236
|
+
formatText(text: string): string;
|
|
237
|
+
/**
|
|
238
|
+
* Format a single line of text
|
|
239
|
+
*
|
|
240
|
+
* @param text - Input text (single line)
|
|
241
|
+
* @returns Formatted text
|
|
242
|
+
*/
|
|
243
|
+
private formatSingleLine;
|
|
244
|
+
/**
|
|
245
|
+
* Format an array of transcription segments
|
|
246
|
+
*
|
|
247
|
+
* Applies formatting to the text of each segment while preserving
|
|
248
|
+
* timestamps, confidence scores, and speaker information.
|
|
249
|
+
*
|
|
250
|
+
* @param segments - Array of transcription segments
|
|
251
|
+
* @returns Array of formatted segments
|
|
252
|
+
*/
|
|
253
|
+
formatSegments(segments: TranscriptionSegment[]): TranscriptionSegment[];
|
|
254
|
+
/**
|
|
255
|
+
* Get current formatting options
|
|
256
|
+
*
|
|
257
|
+
* @returns Current options
|
|
258
|
+
*/
|
|
259
|
+
getOptions(): Readonly<Required<TextFormattingOptions>>;
|
|
260
|
+
/**
|
|
261
|
+
* Update formatting options
|
|
262
|
+
*
|
|
263
|
+
* @param options - New options to merge with existing
|
|
264
|
+
*/
|
|
265
|
+
setOptions(options: Partial<TextFormattingOptions>): void;
|
|
266
|
+
/**
|
|
267
|
+
* Check if punctuation is enabled
|
|
268
|
+
*
|
|
269
|
+
* @returns True if punctuation is enabled
|
|
270
|
+
*/
|
|
271
|
+
isPunctuationEnabled(): boolean;
|
|
272
|
+
/**
|
|
273
|
+
* Check if capitalization is enabled
|
|
274
|
+
*
|
|
275
|
+
* @returns True if capitalization is enabled
|
|
276
|
+
*/
|
|
277
|
+
isCapitalizationEnabled(): boolean;
|
|
278
|
+
/**
|
|
279
|
+
* Check if paragraph detection is enabled
|
|
280
|
+
*
|
|
281
|
+
* @returns True if paragraph detection is enabled
|
|
282
|
+
*/
|
|
283
|
+
isParagraphsEnabled(): boolean;
|
|
284
|
+
/**
|
|
285
|
+
* Get maximum line length
|
|
286
|
+
*
|
|
287
|
+
* @returns Maximum line length in characters
|
|
288
|
+
*/
|
|
289
|
+
getMaxLineLength(): number;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
export { type ResponseFormat, TextFormatter, type TextFormattingOptions, type TimestampGranularity, type TranscriptionOptions, type TranscriptionResult, type TranscriptionSegment$1 as TranscriptionSegment, type TranscriptionWord, estimateTranscriptionCost, extractSpeakers, formatSegments, transcribeAudio };
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var OpenAI = require('openai');
|
|
4
|
+
|
|
5
|
+
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
6
|
+
|
|
7
|
+
var OpenAI__default = /*#__PURE__*/_interopDefault(OpenAI);
|
|
8
|
+
|
|
9
|
+
// src/processing/transcription.ts
|
|
10
|
+
async function transcribeAudio(audioFile, options) {
|
|
11
|
+
if (!options.apiKey || options.apiKey.trim() === "") {
|
|
12
|
+
throw new Error("OpenAI API key is required");
|
|
13
|
+
}
|
|
14
|
+
const openai = new OpenAI__default.default({
|
|
15
|
+
apiKey: options.apiKey
|
|
16
|
+
});
|
|
17
|
+
try {
|
|
18
|
+
const params = {
|
|
19
|
+
file: audioFile,
|
|
20
|
+
model: options.model || "whisper-1"
|
|
21
|
+
};
|
|
22
|
+
if (options.language) {
|
|
23
|
+
params.language = options.language;
|
|
24
|
+
}
|
|
25
|
+
if (options.prompt) {
|
|
26
|
+
params.prompt = options.prompt;
|
|
27
|
+
}
|
|
28
|
+
if (options.response_format) {
|
|
29
|
+
params.response_format = options.response_format;
|
|
30
|
+
}
|
|
31
|
+
if (options.temperature !== void 0) {
|
|
32
|
+
params.temperature = options.temperature;
|
|
33
|
+
}
|
|
34
|
+
if (options.timestamp_granularities && options.timestamp_granularities.length > 0) {
|
|
35
|
+
params.timestamp_granularities = options.timestamp_granularities;
|
|
36
|
+
}
|
|
37
|
+
const response = await openai.audio.transcriptions.create(params);
|
|
38
|
+
if (typeof response === "string") {
|
|
39
|
+
return { text: response };
|
|
40
|
+
}
|
|
41
|
+
const result = {
|
|
42
|
+
text: response.text
|
|
43
|
+
};
|
|
44
|
+
if ("language" in response) {
|
|
45
|
+
result.language = response.language;
|
|
46
|
+
}
|
|
47
|
+
if ("duration" in response) {
|
|
48
|
+
result.duration = response.duration;
|
|
49
|
+
}
|
|
50
|
+
if ("segments" in response) {
|
|
51
|
+
result.segments = response.segments;
|
|
52
|
+
}
|
|
53
|
+
if ("words" in response) {
|
|
54
|
+
result.words = response.words;
|
|
55
|
+
}
|
|
56
|
+
return result;
|
|
57
|
+
} catch (error) {
|
|
58
|
+
if (error instanceof Error) {
|
|
59
|
+
throw error;
|
|
60
|
+
}
|
|
61
|
+
throw new Error("Failed to transcribe audio: Unknown error");
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function formatSegments(segments) {
|
|
65
|
+
if (!segments || segments.length === 0) {
|
|
66
|
+
return "";
|
|
67
|
+
}
|
|
68
|
+
return segments.map((segment) => {
|
|
69
|
+
const start = formatTimestamp(segment.start);
|
|
70
|
+
const end = formatTimestamp(segment.end);
|
|
71
|
+
return `[${start} - ${end}] ${segment.text.trim()}`;
|
|
72
|
+
}).join("\n");
|
|
73
|
+
}
|
|
74
|
+
function formatTimestamp(seconds) {
|
|
75
|
+
const mins = Math.floor(seconds / 60);
|
|
76
|
+
const secs = seconds % 60;
|
|
77
|
+
return `${mins.toString().padStart(2, "0")}:${secs.toFixed(1).padStart(4, "0")}`;
|
|
78
|
+
}
|
|
79
|
+
function extractSpeakers(text) {
|
|
80
|
+
const speakerPattern = /(Speaker \d+|[A-Z][a-z]+ [A-Z][a-z]+):\s*([^.!?]+[.!?])/g;
|
|
81
|
+
const matches = [...text.matchAll(speakerPattern)];
|
|
82
|
+
return matches.filter((match) => match[1] && match[2]).map((match) => ({
|
|
83
|
+
speaker: match[1].trim(),
|
|
84
|
+
text: match[2].trim()
|
|
85
|
+
}));
|
|
86
|
+
}
|
|
87
|
+
function estimateTranscriptionCost(durationSeconds) {
|
|
88
|
+
const minutes = durationSeconds / 60;
|
|
89
|
+
const costPerMinute = 6e-3;
|
|
90
|
+
return minutes * costPerMinute;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// src/processing/text-formatter.ts
|
|
94
|
+
var _TextFormatter = class _TextFormatter {
|
|
95
|
+
/**
|
|
96
|
+
* Creates a new TextFormatter instance
|
|
97
|
+
*
|
|
98
|
+
* @param options - Formatting options
|
|
99
|
+
*/
|
|
100
|
+
constructor(options = {}) {
|
|
101
|
+
this.options = {
|
|
102
|
+
..._TextFormatter.DEFAULT_OPTIONS,
|
|
103
|
+
...options
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Apply punctuation to text
|
|
108
|
+
*
|
|
109
|
+
* Adds appropriate punctuation marks based on sentence structure:
|
|
110
|
+
* - Periods for statements
|
|
111
|
+
* - Question marks for questions
|
|
112
|
+
* - Preserves existing punctuation
|
|
113
|
+
*
|
|
114
|
+
* @param text - Input text
|
|
115
|
+
* @returns Text with punctuation applied
|
|
116
|
+
*/
|
|
117
|
+
applyPunctuation(text) {
|
|
118
|
+
if (!text) {
|
|
119
|
+
return "";
|
|
120
|
+
}
|
|
121
|
+
text = text.trim();
|
|
122
|
+
if (!text) {
|
|
123
|
+
return "";
|
|
124
|
+
}
|
|
125
|
+
if (/[.!?]$/.test(text)) {
|
|
126
|
+
return text;
|
|
127
|
+
}
|
|
128
|
+
const lowerText = text.toLowerCase();
|
|
129
|
+
const firstWord = lowerText.split(/\s+/)[0];
|
|
130
|
+
if (firstWord && _TextFormatter.QUESTION_WORDS.includes(firstWord)) {
|
|
131
|
+
return text + "?";
|
|
132
|
+
}
|
|
133
|
+
return text + ".";
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Apply capitalization to text
|
|
137
|
+
*
|
|
138
|
+
* Capitalizes:
|
|
139
|
+
* - First letter of text
|
|
140
|
+
* - Letters after sentence-ending punctuation
|
|
141
|
+
*
|
|
142
|
+
* @param text - Input text
|
|
143
|
+
* @returns Text with capitalization applied
|
|
144
|
+
*/
|
|
145
|
+
applyCapitalization(text) {
|
|
146
|
+
if (!text) {
|
|
147
|
+
return "";
|
|
148
|
+
}
|
|
149
|
+
let result = text.charAt(0).toUpperCase() + text.slice(1);
|
|
150
|
+
result = result.replace(/([.!?]\s+)([a-z])/g, (_match, punctuation, letter) => {
|
|
151
|
+
return punctuation + letter.toUpperCase();
|
|
152
|
+
});
|
|
153
|
+
return result;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Format a single text string with all enabled options
|
|
157
|
+
*
|
|
158
|
+
* @param text - Input text
|
|
159
|
+
* @returns Formatted text
|
|
160
|
+
*/
|
|
161
|
+
formatText(text) {
|
|
162
|
+
if (!text) {
|
|
163
|
+
return "";
|
|
164
|
+
}
|
|
165
|
+
let result = text;
|
|
166
|
+
if (text.includes("\n")) {
|
|
167
|
+
const lines = text.split("\n");
|
|
168
|
+
result = lines.map((line) => this.formatSingleLine(line)).join("\n");
|
|
169
|
+
return result;
|
|
170
|
+
}
|
|
171
|
+
return this.formatSingleLine(result);
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Format a single line of text
|
|
175
|
+
*
|
|
176
|
+
* @param text - Input text (single line)
|
|
177
|
+
* @returns Formatted text
|
|
178
|
+
*/
|
|
179
|
+
formatSingleLine(text) {
|
|
180
|
+
if (!text || !text.trim()) {
|
|
181
|
+
return text;
|
|
182
|
+
}
|
|
183
|
+
let result = text;
|
|
184
|
+
if (this.options.enableCapitalization) {
|
|
185
|
+
result = this.applyCapitalization(result);
|
|
186
|
+
}
|
|
187
|
+
if (this.options.enablePunctuation) {
|
|
188
|
+
result = this.applyPunctuation(result);
|
|
189
|
+
}
|
|
190
|
+
return result;
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Format an array of transcription segments
|
|
194
|
+
*
|
|
195
|
+
* Applies formatting to the text of each segment while preserving
|
|
196
|
+
* timestamps, confidence scores, and speaker information.
|
|
197
|
+
*
|
|
198
|
+
* @param segments - Array of transcription segments
|
|
199
|
+
* @returns Array of formatted segments
|
|
200
|
+
*/
|
|
201
|
+
formatSegments(segments) {
|
|
202
|
+
if (!segments || segments.length === 0) {
|
|
203
|
+
return [];
|
|
204
|
+
}
|
|
205
|
+
return segments.map((segment) => {
|
|
206
|
+
const formattedText = this.formatText(segment.text);
|
|
207
|
+
return {
|
|
208
|
+
...segment,
|
|
209
|
+
text: formattedText
|
|
210
|
+
};
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Get current formatting options
|
|
215
|
+
*
|
|
216
|
+
* @returns Current options
|
|
217
|
+
*/
|
|
218
|
+
getOptions() {
|
|
219
|
+
return { ...this.options };
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Update formatting options
|
|
223
|
+
*
|
|
224
|
+
* @param options - New options to merge with existing
|
|
225
|
+
*/
|
|
226
|
+
setOptions(options) {
|
|
227
|
+
this.options = {
|
|
228
|
+
...this.options,
|
|
229
|
+
...options
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Check if punctuation is enabled
|
|
234
|
+
*
|
|
235
|
+
* @returns True if punctuation is enabled
|
|
236
|
+
*/
|
|
237
|
+
isPunctuationEnabled() {
|
|
238
|
+
return this.options.enablePunctuation;
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Check if capitalization is enabled
|
|
242
|
+
*
|
|
243
|
+
* @returns True if capitalization is enabled
|
|
244
|
+
*/
|
|
245
|
+
isCapitalizationEnabled() {
|
|
246
|
+
return this.options.enableCapitalization;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Check if paragraph detection is enabled
|
|
250
|
+
*
|
|
251
|
+
* @returns True if paragraph detection is enabled
|
|
252
|
+
*/
|
|
253
|
+
isParagraphsEnabled() {
|
|
254
|
+
return this.options.enableParagraphs;
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Get maximum line length
|
|
258
|
+
*
|
|
259
|
+
* @returns Maximum line length in characters
|
|
260
|
+
*/
|
|
261
|
+
getMaxLineLength() {
|
|
262
|
+
return this.options.maxLineLength;
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
/**
|
|
266
|
+
* Question word patterns for detecting questions
|
|
267
|
+
*/
|
|
268
|
+
_TextFormatter.QUESTION_WORDS = [
|
|
269
|
+
"how",
|
|
270
|
+
"what",
|
|
271
|
+
"when",
|
|
272
|
+
"where",
|
|
273
|
+
"who",
|
|
274
|
+
"whom",
|
|
275
|
+
"whose",
|
|
276
|
+
"why",
|
|
277
|
+
"which",
|
|
278
|
+
"can",
|
|
279
|
+
"could",
|
|
280
|
+
"would",
|
|
281
|
+
"should",
|
|
282
|
+
"will",
|
|
283
|
+
"do",
|
|
284
|
+
"does",
|
|
285
|
+
"did",
|
|
286
|
+
"is",
|
|
287
|
+
"are",
|
|
288
|
+
"was",
|
|
289
|
+
"were"
|
|
290
|
+
];
|
|
291
|
+
/**
|
|
292
|
+
* Default formatting options
|
|
293
|
+
*/
|
|
294
|
+
_TextFormatter.DEFAULT_OPTIONS = {
|
|
295
|
+
enablePunctuation: true,
|
|
296
|
+
enableCapitalization: true,
|
|
297
|
+
enableParagraphs: false,
|
|
298
|
+
maxLineLength: 80
|
|
299
|
+
};
|
|
300
|
+
var TextFormatter = _TextFormatter;
|
|
301
|
+
|
|
302
|
+
exports.TextFormatter = TextFormatter;
|
|
303
|
+
exports.estimateTranscriptionCost = estimateTranscriptionCost;
|
|
304
|
+
exports.extractSpeakers = extractSpeakers;
|
|
305
|
+
exports.formatSegments = formatSegments;
|
|
306
|
+
exports.transcribeAudio = transcribeAudio;
|
|
307
|
+
//# sourceMappingURL=index.js.map
|
|
308
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/processing/transcription.ts","../../src/processing/text-formatter.ts"],"names":["OpenAI"],"mappings":";;;;;;;;;AA8HA,eAAsB,eAAA,CACpB,WACA,OAAA,EAC8B;AAE9B,EAAA,IAAI,CAAC,OAAA,CAAQ,MAAA,IAAU,QAAQ,MAAA,CAAO,IAAA,OAAW,EAAA,EAAI;AACnD,IAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,EAC9C;AAGA,EAAA,MAAM,MAAA,GAAS,IAAIA,uBAAA,CAAO;AAAA,IACxB,QAAQ,OAAA,CAAQ;AAAA,GACjB,CAAA;AAED,EAAA,IAAI;AAEF,IAAA,MAAM,MAAA,GAAoC;AAAA,MACxC,IAAA,EAAM,SAAA;AAAA,MACN,KAAA,EAAO,QAAQ,KAAA,IAAS;AAAA,KAC1B;AAGA,IAAA,IAAI,QAAQ,QAAA,EAAU;AACpB,MAAA,MAAA,CAAO,WAAW,OAAA,CAAQ,QAAA;AAAA,IAC5B;AAEA,IAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,MAAA,MAAA,CAAO,SAAS,OAAA,CAAQ,MAAA;AAAA,IAC1B;AAEA,IAAA,IAAI,QAAQ,eAAA,EAAiB;AAC3B,MAAA,MAAA,CAAO,kBAAkB,OAAA,CAAQ,eAAA;AAAA,IACnC;AAEA,IAAA,IAAI,OAAA,CAAQ,gBAAgB,KAAA,CAAA,EAAW;AACrC,MAAA,MAAA,CAAO,cAAc,OAAA,CAAQ,WAAA;AAAA,IAC/B;AAEA,IAAA,IAAI,OAAA,CAAQ,uBAAA,IAA2B,OAAA,CAAQ,uBAAA,CAAwB,SAAS,CAAA,EAAG;AACjF,MAAA,MAAA,CAAO,0BAA0B,OAAA,CAAQ,uBAAA;AAAA,IAC3C;AAGA,IAAA,MAAM,WAAW,MAAM,MAAA,CAAO,KAAA,CAAM,cAAA,CAAe,OAAO,MAAM,CAAA;AAGhE,IAAA,IAAI,OAAO,aAAa,QAAA,EAAU;AAEhC,MAAA,OAAO,EAAE,MAAM,QAAA,EAAS;AAAA,IAC1B;AAGA,IAAA,MAAM,MAAA,GAA8B;AAAA,MAClC,MAAO,QAAA,CAAiB;AAAA,KAC1B;AAGA,IAAA,IAAI,cAAc,QAAA,EAAU;AAC1B,MAAA,MAAA,CAAO,WAAY,QAAA,CAAiB,QAAA;AAAA,IACtC;AAEA,IAAA,IAAI,cAAc,QAAA,EAAU;AAC1B,MAAA,MAAA,CAAO,WAAY,QAAA,CAAiB,QAAA;AAAA,IACtC;AAEA,IAAA,IAAI,cAAc,QAAA,EAAU;AAC1B,MAAA,MAAA,CAAO,WAAY,QAAA,CAAiB,QAAA;AAAA,IACtC;AAEA,IAAA,IAAI,WAAW,QAAA,EAAU;AACvB,MAAA,MAAA,CAAO,QAAS,QAAA,CAAiB,KAAA;AAAA,IACnC;AAEA,IAAA,OAAO,MAAA;AAAA,EACT,SAAS,KAAA,EAAO;AAEd,IAAA,IAAI,iBAAiB,KAAA,EAAO;AAC1B,MAAA,MAAM,KAAA;AAAA,IACR;AACA,IAAA,MAAM,IAAI,MAAM,2CAA2C,CAAA;AAAA,EAC7D;AACF;AAQO,SAAS,eAAe,QAAA,EAA2C;AACxE,EAAA,IAAI,CAAC,QAAA,IAAY,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AACtC,IAAA,OAAO,EAAA;AAAA,EACT;AAEA,EAAA,OAAO,QAAA,CACJ,GAAA,CAAI,CAAC,OAAA,KAAY;AAChB,IAAA,MAAM,KAAA,GAAQ,eAAA,CAAgB,OAAA,CAAQ,KAAK,CAAA;AAC3C,IAAA,MAAM,GAAA,GAAM,eAAA,CAAgB,OAAA,CAAQ,GAAG,CAAA;AACvC,IAAA,OAAO,CAAA,CAAA,EAAI,KAAK,CAAA,GAAA,EAAM,GAAG,KAAK,OAAA,CAAQ,IAAA,CAAK,MAAM,CAAA,CAAA;AAAA,EACnD,CAAC,CAAA,CACA,IAAA,CAAK,IAAI,CAAA;AACd;AAQA,SAAS,gBAAgB,OAAA,EAAyB;AAChD,EAAA,MAAM,IAAA,GAAO,IAAA,CAAK,KAAA,CAAM,OAAA,GAAU,EAAE,CAAA;AACpC,EAAA,MAAM,OAAO,OAAA,GAAU,EAAA;AACvB,EAAA,OAAO,GAAG,IAAA,CAAK,QAAA,EAAS,CAAE,QAAA,CAAS,GAAG,GAAG,CAAC,CAAA,CAAA,EAAI,IAAA,CAAK,QAAQ,CAAC,CAAA,CAAE,QAAA,CAAS,CAAA,EAAG,GAAG,CAAC,CAAA,CAAA;AAChF;AAUO,SAAS,gBAAgB,IAAA,EAAwD;AACtF,EAAA,MAAM,cAAA,GAAiB,0DAAA;AACvB,EAAA,MAAM,UAAU,CAAC,GAAG,IAAA,CAAK,QAAA,CAAS,cAAc,CAAC,CAAA;AAEjD,EAAA,OAAO,OAAA,CACJ,MAAA,CAAO,CAAC,KAAA,KAAU,KAAA,CAAM,CAAC,CAAA,IAAK,KAAA,CAAM,CAAC,CAAC,CAAA,CACtC,GAAA,CAAI,CAAC,KAAA,MAAW;AAAA,IACf,OAAA,EAAS,KAAA,CAAM,CAAC,CAAA,CAAG,IAAA,EAAK;AAAA,IACxB,IAAA,EAAM,KAAA,CAAM,CAAC,CAAA,CAAG,IAAA;AAAK,GACvB,CAAE,CAAA;AACN;AASO,SAAS,0BAA0B,eAAA,EAAiC;AACzE,EAAA,MAAM,UAAU,eAAA,GAAkB,EAAA;AAClC,EAAA,MAAM,aAAA,GAAgB,IAAA;AACtB,EAAA,OAAO,OAAA,GAAU,aAAA;AACnB;;;ACrPO,IAAM,cAAA,GAAN,MAAM,cAAA,CAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA2BzB,WAAA,CAAY,OAAA,GAAiC,EAAC,EAAG;AAC/C,IAAA,IAAA,CAAK,OAAA,GAAU;AAAA,MACb,GAAG,cAAA,CAAc,eAAA;AAAA,MACjB,GAAG;AAAA,KACL;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,iBAAiB,IAAA,EAAsB;AAErC,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAGA,IAAA,IAAA,GAAO,KAAK,IAAA,EAAK;AAGjB,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAGA,IAAA,IAAI,QAAA,CAAS,IAAA,CAAK,IAAI,CAAA,EAAG;AACvB,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,MAAM,SAAA,GAAY,KAAK,WAAA,EAAY;AACnC,IAAA,MAAM,SAAA,GAAY,SAAA,CAAU,KAAA,CAAM,KAAK,EAAE,CAAC,CAAA;AAE1C,IAAA,IAAI,SAAA,IAAa,cAAA,CAAc,cAAA,CAAe,QAAA,CAAS,SAAS,CAAA,EAAG;AACjE,MAAA,OAAO,IAAA,GAAO,GAAA;AAAA,IAChB;AAGA,IAAA,OAAO,IAAA,GAAO,GAAA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,oBAAoB,IAAA,EAAsB;AAExC,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAGA,IAAA,IAAI,MAAA,GAAS,KAAK,MAAA,CAAO,CAAC,EAAE,WAAA,EAAY,GAAI,IAAA,CAAK,KAAA,CAAM,CAAC,CAAA;AAGxD,IAAA,MAAA,GAAS,OAAO,OAAA,CAAQ,oBAAA,EAAsB,CAAC,MAAA,EAAQ,aAAa,MAAA,KAAW;AAC7E,MAAA,OAAO,WAAA,GAAc,OAAO,WAAA,EAAY;AAAA,IAC1C,CAAC,CAAA;AAED,IAAA,OAAO,MAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,WAAW,IAAA,EAAsB;AAC/B,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAEA,IAAA,IAAI,MAAA,GAAS,IAAA;AAGb,IAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,MAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,KAAA,CAAM,IAAI,CAAA;AAC7B,MAAA,MAAA,GAAS,KAAA,CAAM,IAAI,CAAA,IAAA,KAAQ,IAAA,CAAK,iBAAiB,IAAI,CAAC,CAAA,CAAE,IAAA,CAAK,IAAI,CAAA;AACjE,MAAA,OAAO,MAAA;AAAA,IACT;AAEA,IAAA,OAAO,IAAA,CAAK,iBAAiB,MAAM,CAAA;AAAA,EACrC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQQ,iBAAiB,IAAA,EAAsB;AAC7C,IAAA,IAAI,CAAC,IAAA,IAAQ,CAAC,IAAA,CAAK,MAAK,EAAG;AACzB,MAAA,OAAO,IAAA;AAAA,IACT;AAEA,IAAA,IAAI,MAAA,GAAS,IAAA;AAGb,IAAA,IAAI,IAAA,CAAK,QAAQ,oBAAA,EAAsB;AACrC,MAAA,MAAA,GAAS,IAAA,CAAK,oBAAoB,MAAM,CAAA;AAAA,IAC1C;AAGA,IAAA,IAAI,IAAA,CAAK,QAAQ,iBAAA,EAAmB;AAClC,MAAA,MAAA,GAAS,IAAA,CAAK,iBAAiB,MAAM,CAAA;AAAA,IACvC;AAEA,IAAA,OAAO,MAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,eAAe,QAAA,EAA0D;AAEvE,IAAA,IAAI,CAAC,QAAA,IAAY,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AACtC,MAAA,OAAO,EAAC;AAAA,IACV;AAEA,IAAA,OAAO,QAAA,CAAS,IAAI,CAAA,OAAA,KAAW;AAE7B,MAAA,MAAM,aAAA,GAAgB,IAAA,CAAK,UAAA,CAAW,OAAA,CAAQ,IAAI,CAAA;AAGlD,MAAA,OAAO;AAAA,QACL,GAAG,OAAA;AAAA,QACH,IAAA,EAAM;AAAA,OACR;AAAA,IACF,CAAC,CAAA;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,UAAA,GAAwD;AACtD,IAAA,OAAO,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,EAC3B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,WAAW,OAAA,EAA+C;AACxD,IAAA,IAAA,CAAK,OAAA,GAAU;AAAA,MACb,GAAG,IAAA,CAAK,OAAA;AAAA,MACR,GAAG;AAAA,KACL;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,oBAAA,GAAgC;AAC9B,IAAA,OAAO,KAAK,OAAA,CAAQ,iBAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,uBAAA,GAAmC;AACjC,IAAA,OAAO,KAAK,OAAA,CAAQ,oBAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,mBAAA,GAA+B;AAC7B,IAAA,OAAO,KAAK,OAAA,CAAQ,gBAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,gBAAA,GAA2B;AACzB,IAAA,OAAO,KAAK,OAAA,CAAQ,aAAA;AAAA,EACtB;AACF,CAAA;AAAA;AAAA;AAAA;AA3Oa,cAAA,CAMa,cAAA,GAAiB;AAAA,EACvC,KAAA;AAAA,EAAO,MAAA;AAAA,EAAQ,MAAA;AAAA,EAAQ,OAAA;AAAA,EAAS,KAAA;AAAA,EAAO,MAAA;AAAA,EAAQ,OAAA;AAAA,EAC/C,KAAA;AAAA,EAAO,OAAA;AAAA,EAAS,KAAA;AAAA,EAAO,OAAA;AAAA,EAAS,OAAA;AAAA,EAAS,QAAA;AAAA,EACzC,MAAA;AAAA,EAAQ,IAAA;AAAA,EAAM,MAAA;AAAA,EAAQ,KAAA;AAAA,EAAO,IAAA;AAAA,EAAM,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO;AACnD,CAAA;AAAA;AAAA;AAAA;AAVW,cAAA,CAea,eAAA,GAAmD;AAAA,EACzE,iBAAA,EAAmB,IAAA;AAAA,EACnB,oBAAA,EAAsB,IAAA;AAAA,EACtB,gBAAA,EAAkB,KAAA;AAAA,EAClB,aAAA,EAAe;AACjB,CAAA;AApBK,IAAM,aAAA,GAAN","file":"index.js","sourcesContent":["import OpenAI from 'openai'\nimport type { TranscriptionCreateParams } from 'openai/resources/audio/transcriptions'\n\n/**\n * Timestamp granularity options for Whisper transcription\n */\nexport type TimestampGranularity = 'word' | 'segment'\n\n/**\n * Response format options for Whisper transcription\n */\nexport type ResponseFormat = 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt'\n\n/**\n * Transcription segment with timing information\n */\nexport interface TranscriptionSegment {\n id: number\n start: number\n end: number\n text: string\n}\n\n/**\n * Word-level timestamp information\n */\nexport interface TranscriptionWord {\n word: string\n start: number\n end: number\n}\n\n/**\n * Options for Whisper audio transcription\n */\nexport interface TranscriptionOptions {\n /**\n * OpenAI API key for authentication\n */\n apiKey: string\n\n /**\n * The language of the input audio in ISO-639-1 format (e.g., 'en', 'fr', 'es').\n * Providing this can improve accuracy and latency.\n */\n language?: string\n\n /**\n * Optional text to guide the model's style or continue a previous audio segment.\n * The prompt should match the audio language and can be used for:\n * - Maintaining consistent terminology (e.g., \"Dr. Smith, AI, NLP\")\n * - Speaker identification hints (e.g., \"Speaker 1, Speaker 2\")\n * - Ensuring proper punctuation and formatting\n */\n prompt?: string\n\n /**\n * The format of the transcript output.\n * - 'json': Returns basic JSON with text only\n * - 'text': Returns plain text\n * - 'srt': Returns SRT subtitle format\n * - 'verbose_json': Returns detailed JSON with timestamps and metadata\n * - 'vtt': Returns WebVTT subtitle format\n */\n response_format?: ResponseFormat\n\n /**\n * The sampling temperature, between 0 and 1.\n * Higher values like 0.8 will make the output more random,\n * while lower values like 0.2 will make it more focused and deterministic.\n */\n temperature?: number\n\n /**\n * The timestamp granularities to populate for this transcription.\n * Can include 'word' and/or 'segment' level timestamps.\n * Note: There is no additional latency for segment timestamps, but word timestamps\n * incur latency.\n */\n timestamp_granularities?: TimestampGranularity[]\n\n /**\n * The ID of the model to use. Only 'whisper-1' is currently available.\n */\n model?: string\n}\n\n/**\n * Result of audio transcription\n */\nexport interface TranscriptionResult {\n /**\n * The transcribed text\n */\n text: string\n\n /**\n * The language of the transcription (only available with verbose_json)\n */\n language?: string\n\n /**\n * Duration of the audio in seconds (only available with verbose_json)\n */\n duration?: number\n\n /**\n * Segment-level timestamps (only available when timestamp_granularities includes 'segment')\n */\n segments?: TranscriptionSegment[]\n\n /**\n * Word-level timestamps (only available when timestamp_granularities includes 'word')\n */\n words?: TranscriptionWord[]\n}\n\n/**\n * Transcribe audio using OpenAI's Whisper API\n *\n * @param audioFile - The audio file to transcribe (File or Blob)\n * @param options - Transcription options including API key and Whisper parameters\n * @returns Promise resolving to the transcription result\n *\n * @throws {Error} When API key is missing or API call fails\n */\nexport async function transcribeAudio(\n audioFile: File | Blob,\n options: TranscriptionOptions\n): Promise<TranscriptionResult> {\n // Validate API key\n if (!options.apiKey || options.apiKey.trim() === '') {\n throw new Error('OpenAI API key is required')\n }\n\n // Initialize OpenAI client\n const openai = new OpenAI({\n apiKey: options.apiKey,\n })\n\n try {\n // Build transcription parameters\n const params: TranscriptionCreateParams = {\n file: audioFile as File,\n model: options.model || 'whisper-1',\n }\n\n // Add optional parameters\n if (options.language) {\n params.language = options.language\n }\n\n if (options.prompt) {\n params.prompt = options.prompt\n }\n\n if (options.response_format) {\n params.response_format = options.response_format\n }\n\n if (options.temperature !== undefined) {\n params.temperature = options.temperature\n }\n\n if (options.timestamp_granularities && options.timestamp_granularities.length > 0) {\n params.timestamp_granularities = options.timestamp_granularities\n }\n\n // Call Whisper API\n const response = await openai.audio.transcriptions.create(params)\n\n // Parse and return result based on response type\n if (typeof response === 'string') {\n // For text, srt, vtt formats\n return { text: response }\n }\n\n // For json and verbose_json formats\n const result: TranscriptionResult = {\n text: (response as any).text,\n }\n\n // Add optional fields if present (from verbose_json)\n if ('language' in response) {\n result.language = (response as any).language\n }\n\n if ('duration' in response) {\n result.duration = (response as any).duration\n }\n\n if ('segments' in response) {\n result.segments = (response as any).segments\n }\n\n if ('words' in response) {\n result.words = (response as any).words\n }\n\n return result\n } catch (error) {\n // Re-throw with original error message\n if (error instanceof Error) {\n throw error\n }\n throw new Error('Failed to transcribe audio: Unknown error')\n }\n}\n\n/**\n * Utility function to format transcription segments into a readable format\n *\n * @param segments - Array of transcription segments with timestamps\n * @returns Formatted string with timestamps\n */\nexport function formatSegments(segments?: TranscriptionSegment[]): string {\n if (!segments || segments.length === 0) {\n return ''\n }\n\n return segments\n .map((segment) => {\n const start = formatTimestamp(segment.start)\n const end = formatTimestamp(segment.end)\n return `[${start} - ${end}] ${segment.text.trim()}`\n })\n .join('\\n')\n}\n\n/**\n * Format seconds into MM:SS.s format\n *\n * @param seconds - Time in seconds\n * @returns Formatted timestamp string\n */\nfunction formatTimestamp(seconds: number): string {\n const mins = Math.floor(seconds / 60)\n const secs = seconds % 60\n return `${mins.toString().padStart(2, '0')}:${secs.toFixed(1).padStart(4, '0')}`\n}\n\n/**\n * Utility function to extract speaker-labeled text from transcription\n * Note: Whisper doesn't natively support speaker diarization, but you can\n * guide it using prompts like \"Speaker 1, Speaker 2\" to encourage labeling.\n *\n * @param text - Transcribed text potentially containing speaker labels\n * @returns Array of speaker segments\n */\nexport function extractSpeakers(text: string): Array<{ speaker: string; text: string }> {\n const speakerPattern = /(Speaker \\d+|[A-Z][a-z]+ [A-Z][a-z]+):\\s*([^.!?]+[.!?])/g\n const matches = [...text.matchAll(speakerPattern)]\n\n return matches\n .filter((match) => match[1] && match[2])\n .map((match) => ({\n speaker: match[1]!.trim(),\n text: match[2]!.trim(),\n }))\n}\n\n/**\n * Estimate the cost of transcribing audio based on duration\n * Whisper pricing: $0.006 per minute\n *\n * @param durationSeconds - Duration of audio in seconds\n * @returns Estimated cost in USD\n */\nexport function estimateTranscriptionCost(durationSeconds: number): number {\n const minutes = durationSeconds / 60\n const costPerMinute = 0.006\n return minutes * costPerMinute\n}\n","/**\n * Text Formatter for Transcription Processing\n * @module @ainative/video/processing\n */\n\nimport type { TranscriptionSegment, TextFormattingOptions } from './types';\n\n/**\n * TextFormatter class for applying formatting to transcribed text\n *\n * Supports:\n * - Automatic punctuation\n * - Capitalization\n * - Paragraph detection\n * - Line length management\n *\n * @example\n * ```typescript\n * const formatter = new TextFormatter({\n * enablePunctuation: true,\n * enableCapitalization: true\n * });\n *\n * const formatted = formatter.formatText('hello world');\n * // Result: \"Hello world.\"\n * ```\n */\nexport class TextFormatter {\n private options: Required<TextFormattingOptions>;\n\n /**\n * Question word patterns for detecting questions\n */\n private static readonly QUESTION_WORDS = [\n 'how', 'what', 'when', 'where', 'who', 'whom', 'whose',\n 'why', 'which', 'can', 'could', 'would', 'should',\n 'will', 'do', 'does', 'did', 'is', 'are', 'was', 'were'\n ];\n\n /**\n * Default formatting options\n */\n private static readonly DEFAULT_OPTIONS: Required<TextFormattingOptions> = {\n enablePunctuation: true,\n enableCapitalization: true,\n enableParagraphs: false,\n maxLineLength: 80\n };\n\n /**\n * Creates a new TextFormatter instance\n *\n * @param options - Formatting options\n */\n constructor(options: TextFormattingOptions = {}) {\n this.options = {\n ...TextFormatter.DEFAULT_OPTIONS,\n ...options\n };\n }\n\n /**\n * Apply punctuation to text\n *\n * Adds appropriate punctuation marks based on sentence structure:\n * - Periods for statements\n * - Question marks for questions\n * - Preserves existing punctuation\n *\n * @param text - Input text\n * @returns Text with punctuation applied\n */\n applyPunctuation(text: string): string {\n // Handle null/undefined input\n if (!text) {\n return '';\n }\n\n // Trim whitespace\n text = text.trim();\n\n // Return empty string if only whitespace\n if (!text) {\n return '';\n }\n\n // Already has ending punctuation\n if (/[.!?]$/.test(text)) {\n return text;\n }\n\n // Check if it's a question\n const lowerText = text.toLowerCase();\n const firstWord = lowerText.split(/\\s+/)[0];\n\n if (firstWord && TextFormatter.QUESTION_WORDS.includes(firstWord)) {\n return text + '?';\n }\n\n // Default to period\n return text + '.';\n }\n\n /**\n * Apply capitalization to text\n *\n * Capitalizes:\n * - First letter of text\n * - Letters after sentence-ending punctuation\n *\n * @param text - Input text\n * @returns Text with capitalization applied\n */\n applyCapitalization(text: string): string {\n // Handle empty input\n if (!text) {\n return '';\n }\n\n // Capitalize first letter\n let result = text.charAt(0).toUpperCase() + text.slice(1);\n\n // Capitalize after sentence-ending punctuation\n result = result.replace(/([.!?]\\s+)([a-z])/g, (_match, punctuation, letter) => {\n return punctuation + letter.toUpperCase();\n });\n\n return result;\n }\n\n /**\n * Format a single text string with all enabled options\n *\n * @param text - Input text\n * @returns Formatted text\n */\n formatText(text: string): string {\n if (!text) {\n return '';\n }\n\n let result = text;\n\n // Handle multiline text\n if (text.includes('\\n')) {\n const lines = text.split('\\n');\n result = lines.map(line => this.formatSingleLine(line)).join('\\n');\n return result;\n }\n\n return this.formatSingleLine(result);\n }\n\n /**\n * Format a single line of text\n *\n * @param text - Input text (single line)\n * @returns Formatted text\n */\n private formatSingleLine(text: string): string {\n if (!text || !text.trim()) {\n return text;\n }\n\n let result = text;\n\n // Apply capitalization first\n if (this.options.enableCapitalization) {\n result = this.applyCapitalization(result);\n }\n\n // Apply punctuation\n if (this.options.enablePunctuation) {\n result = this.applyPunctuation(result);\n }\n\n return result;\n }\n\n /**\n * Format an array of transcription segments\n *\n * Applies formatting to the text of each segment while preserving\n * timestamps, confidence scores, and speaker information.\n *\n * @param segments - Array of transcription segments\n * @returns Array of formatted segments\n */\n formatSegments(segments: TranscriptionSegment[]): TranscriptionSegment[] {\n // Handle empty array\n if (!segments || segments.length === 0) {\n return [];\n }\n\n return segments.map(segment => {\n // Format the text\n const formattedText = this.formatText(segment.text);\n\n // Return new segment with formatted text, preserving all other properties\n return {\n ...segment,\n text: formattedText\n };\n });\n }\n\n /**\n * Get current formatting options\n *\n * @returns Current options\n */\n getOptions(): Readonly<Required<TextFormattingOptions>> {\n return { ...this.options };\n }\n\n /**\n * Update formatting options\n *\n * @param options - New options to merge with existing\n */\n setOptions(options: Partial<TextFormattingOptions>): void {\n this.options = {\n ...this.options,\n ...options\n };\n }\n\n /**\n * Check if punctuation is enabled\n *\n * @returns True if punctuation is enabled\n */\n isPunctuationEnabled(): boolean {\n return this.options.enablePunctuation;\n }\n\n /**\n * Check if capitalization is enabled\n *\n * @returns True if capitalization is enabled\n */\n isCapitalizationEnabled(): boolean {\n return this.options.enableCapitalization;\n }\n\n /**\n * Check if paragraph detection is enabled\n *\n * @returns True if paragraph detection is enabled\n */\n isParagraphsEnabled(): boolean {\n return this.options.enableParagraphs;\n }\n\n /**\n * Get maximum line length\n *\n * @returns Maximum line length in characters\n */\n getMaxLineLength(): number {\n return this.options.maxLineLength;\n }\n}\n"]}
|