@ainative/ai-kit-video 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,292 @@
1
+ /**
2
+ * Timestamp granularity options for Whisper transcription
3
+ */
4
+ type TimestampGranularity = 'word' | 'segment';
5
+ /**
6
+ * Response format options for Whisper transcription
7
+ */
8
+ type ResponseFormat = 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt';
9
+ /**
10
+ * Transcription segment with timing information
11
+ */
12
+ interface TranscriptionSegment$1 {
13
+ id: number;
14
+ start: number;
15
+ end: number;
16
+ text: string;
17
+ }
18
+ /**
19
+ * Word-level timestamp information
20
+ */
21
+ interface TranscriptionWord {
22
+ word: string;
23
+ start: number;
24
+ end: number;
25
+ }
26
+ /**
27
+ * Options for Whisper audio transcription
28
+ */
29
+ interface TranscriptionOptions {
30
+ /**
31
+ * OpenAI API key for authentication
32
+ */
33
+ apiKey: string;
34
+ /**
35
+ * The language of the input audio in ISO-639-1 format (e.g., 'en', 'fr', 'es').
36
+ * Providing this can improve accuracy and latency.
37
+ */
38
+ language?: string;
39
+ /**
40
+ * Optional text to guide the model's style or continue a previous audio segment.
41
+ * The prompt should match the audio language and can be used for:
42
+ * - Maintaining consistent terminology (e.g., "Dr. Smith, AI, NLP")
43
+ * - Speaker identification hints (e.g., "Speaker 1, Speaker 2")
44
+ * - Ensuring proper punctuation and formatting
45
+ */
46
+ prompt?: string;
47
+ /**
48
+ * The format of the transcript output.
49
+ * - 'json': Returns basic JSON with text only
50
+ * - 'text': Returns plain text
51
+ * - 'srt': Returns SRT subtitle format
52
+ * - 'verbose_json': Returns detailed JSON with timestamps and metadata
53
+ * - 'vtt': Returns WebVTT subtitle format
54
+ */
55
+ response_format?: ResponseFormat;
56
+ /**
57
+ * The sampling temperature, between 0 and 1.
58
+ * Higher values like 0.8 will make the output more random,
59
+ * while lower values like 0.2 will make it more focused and deterministic.
60
+ */
61
+ temperature?: number;
62
+ /**
63
+ * The timestamp granularities to populate for this transcription.
64
+ * Can include 'word' and/or 'segment' level timestamps.
65
+ * Note: There is no additional latency for segment timestamps, but word timestamps
66
+ * incur latency.
67
+ */
68
+ timestamp_granularities?: TimestampGranularity[];
69
+ /**
70
+ * The ID of the model to use. Only 'whisper-1' is currently available.
71
+ */
72
+ model?: string;
73
+ }
74
+ /**
75
+ * Result of audio transcription
76
+ */
77
+ interface TranscriptionResult {
78
+ /**
79
+ * The transcribed text
80
+ */
81
+ text: string;
82
+ /**
83
+ * The language of the transcription (only available with verbose_json)
84
+ */
85
+ language?: string;
86
+ /**
87
+ * Duration of the audio in seconds (only available with verbose_json)
88
+ */
89
+ duration?: number;
90
+ /**
91
+ * Segment-level timestamps (only available when timestamp_granularities includes 'segment')
92
+ */
93
+ segments?: TranscriptionSegment$1[];
94
+ /**
95
+ * Word-level timestamps (only available when timestamp_granularities includes 'word')
96
+ */
97
+ words?: TranscriptionWord[];
98
+ }
99
+ /**
100
+ * Transcribe audio using OpenAI's Whisper API
101
+ *
102
+ * @param audioFile - The audio file to transcribe (File or Blob)
103
+ * @param options - Transcription options including API key and Whisper parameters
104
+ * @returns Promise resolving to the transcription result
105
+ *
106
+ * @throws {Error} When API key is missing or API call fails
107
+ */
108
+ declare function transcribeAudio(audioFile: File | Blob, options: TranscriptionOptions): Promise<TranscriptionResult>;
109
+ /**
110
+ * Utility function to format transcription segments into a readable format
111
+ *
112
+ * @param segments - Array of transcription segments with timestamps
113
+ * @returns Formatted string with timestamps
114
+ */
115
+ declare function formatSegments(segments?: TranscriptionSegment$1[]): string;
116
+ /**
117
+ * Utility function to extract speaker-labeled text from transcription
118
+ * Note: Whisper doesn't natively support speaker diarization, but you can
119
+ * guide it using prompts like "Speaker 1, Speaker 2" to encourage labeling.
120
+ *
121
+ * @param text - Transcribed text potentially containing speaker labels
122
+ * @returns Array of speaker segments
123
+ */
124
+ declare function extractSpeakers(text: string): Array<{
125
+ speaker: string;
126
+ text: string;
127
+ }>;
128
+ /**
129
+ * Estimate the cost of transcribing audio based on duration
130
+ * Whisper pricing: $0.006 per minute
131
+ *
132
+ * @param durationSeconds - Duration of audio in seconds
133
+ * @returns Estimated cost in USD
134
+ */
135
+ declare function estimateTranscriptionCost(durationSeconds: number): number;
136
+
137
+ /**
138
+ * Transcription segment with timing
139
+ */
140
+ interface TranscriptionSegment {
141
+ /** Transcribed text */
142
+ text: string;
143
+ /** Start timestamp in seconds */
144
+ start: number;
145
+ /** End timestamp in seconds */
146
+ end: number;
147
+ /** Confidence score (0-1) */
148
+ confidence?: number;
149
+ /** Speaker identifier */
150
+ speaker?: string;
151
+ }
152
+ /**
153
+ * Text formatting options
154
+ */
155
+ interface TextFormattingOptions {
156
+ /** Enable automatic punctuation */
157
+ enablePunctuation?: boolean;
158
+ /** Enable capitalization */
159
+ enableCapitalization?: boolean;
160
+ /** Enable paragraph detection */
161
+ enableParagraphs?: boolean;
162
+ /** Maximum line length */
163
+ maxLineLength?: number;
164
+ }
165
+
166
+ /**
167
+ * Text Formatter for Transcription Processing
168
+ * @module @ainative/video/processing
169
+ */
170
+
171
+ /**
172
+ * TextFormatter class for applying formatting to transcribed text
173
+ *
174
+ * Supports:
175
+ * - Automatic punctuation
176
+ * - Capitalization
177
+ * - Paragraph detection
178
+ * - Line length management
179
+ *
180
+ * @example
181
+ * ```typescript
182
+ * const formatter = new TextFormatter({
183
+ * enablePunctuation: true,
184
+ * enableCapitalization: true
185
+ * });
186
+ *
187
+ * const formatted = formatter.formatText('hello world');
188
+ * // Result: "Hello world."
189
+ * ```
190
+ */
191
+ declare class TextFormatter {
192
+ private options;
193
+ /**
194
+ * Question word patterns for detecting questions
195
+ */
196
+ private static readonly QUESTION_WORDS;
197
+ /**
198
+ * Default formatting options
199
+ */
200
+ private static readonly DEFAULT_OPTIONS;
201
+ /**
202
+ * Creates a new TextFormatter instance
203
+ *
204
+ * @param options - Formatting options
205
+ */
206
+ constructor(options?: TextFormattingOptions);
207
+ /**
208
+ * Apply punctuation to text
209
+ *
210
+ * Adds appropriate punctuation marks based on sentence structure:
211
+ * - Periods for statements
212
+ * - Question marks for questions
213
+ * - Preserves existing punctuation
214
+ *
215
+ * @param text - Input text
216
+ * @returns Text with punctuation applied
217
+ */
218
+ applyPunctuation(text: string): string;
219
+ /**
220
+ * Apply capitalization to text
221
+ *
222
+ * Capitalizes:
223
+ * - First letter of text
224
+ * - Letters after sentence-ending punctuation
225
+ *
226
+ * @param text - Input text
227
+ * @returns Text with capitalization applied
228
+ */
229
+ applyCapitalization(text: string): string;
230
+ /**
231
+ * Format a single text string with all enabled options
232
+ *
233
+ * @param text - Input text
234
+ * @returns Formatted text
235
+ */
236
+ formatText(text: string): string;
237
+ /**
238
+ * Format a single line of text
239
+ *
240
+ * @param text - Input text (single line)
241
+ * @returns Formatted text
242
+ */
243
+ private formatSingleLine;
244
+ /**
245
+ * Format an array of transcription segments
246
+ *
247
+ * Applies formatting to the text of each segment while preserving
248
+ * timestamps, confidence scores, and speaker information.
249
+ *
250
+ * @param segments - Array of transcription segments
251
+ * @returns Array of formatted segments
252
+ */
253
+ formatSegments(segments: TranscriptionSegment[]): TranscriptionSegment[];
254
+ /**
255
+ * Get current formatting options
256
+ *
257
+ * @returns Current options
258
+ */
259
+ getOptions(): Readonly<Required<TextFormattingOptions>>;
260
+ /**
261
+ * Update formatting options
262
+ *
263
+ * @param options - New options to merge with existing
264
+ */
265
+ setOptions(options: Partial<TextFormattingOptions>): void;
266
+ /**
267
+ * Check if punctuation is enabled
268
+ *
269
+ * @returns True if punctuation is enabled
270
+ */
271
+ isPunctuationEnabled(): boolean;
272
+ /**
273
+ * Check if capitalization is enabled
274
+ *
275
+ * @returns True if capitalization is enabled
276
+ */
277
+ isCapitalizationEnabled(): boolean;
278
+ /**
279
+ * Check if paragraph detection is enabled
280
+ *
281
+ * @returns True if paragraph detection is enabled
282
+ */
283
+ isParagraphsEnabled(): boolean;
284
+ /**
285
+ * Get maximum line length
286
+ *
287
+ * @returns Maximum line length in characters
288
+ */
289
+ getMaxLineLength(): number;
290
+ }
291
+
292
+ export { type ResponseFormat, TextFormatter, type TextFormattingOptions, type TimestampGranularity, type TranscriptionOptions, type TranscriptionResult, type TranscriptionSegment$1 as TranscriptionSegment, type TranscriptionWord, estimateTranscriptionCost, extractSpeakers, formatSegments, transcribeAudio };
@@ -0,0 +1,308 @@
1
+ 'use strict';
2
+
3
+ var OpenAI = require('openai');
4
+
5
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
6
+
7
+ var OpenAI__default = /*#__PURE__*/_interopDefault(OpenAI);
8
+
9
+ // src/processing/transcription.ts
10
+ async function transcribeAudio(audioFile, options) {
11
+ if (!options.apiKey || options.apiKey.trim() === "") {
12
+ throw new Error("OpenAI API key is required");
13
+ }
14
+ const openai = new OpenAI__default.default({
15
+ apiKey: options.apiKey
16
+ });
17
+ try {
18
+ const params = {
19
+ file: audioFile,
20
+ model: options.model || "whisper-1"
21
+ };
22
+ if (options.language) {
23
+ params.language = options.language;
24
+ }
25
+ if (options.prompt) {
26
+ params.prompt = options.prompt;
27
+ }
28
+ if (options.response_format) {
29
+ params.response_format = options.response_format;
30
+ }
31
+ if (options.temperature !== void 0) {
32
+ params.temperature = options.temperature;
33
+ }
34
+ if (options.timestamp_granularities && options.timestamp_granularities.length > 0) {
35
+ params.timestamp_granularities = options.timestamp_granularities;
36
+ }
37
+ const response = await openai.audio.transcriptions.create(params);
38
+ if (typeof response === "string") {
39
+ return { text: response };
40
+ }
41
+ const result = {
42
+ text: response.text
43
+ };
44
+ if ("language" in response) {
45
+ result.language = response.language;
46
+ }
47
+ if ("duration" in response) {
48
+ result.duration = response.duration;
49
+ }
50
+ if ("segments" in response) {
51
+ result.segments = response.segments;
52
+ }
53
+ if ("words" in response) {
54
+ result.words = response.words;
55
+ }
56
+ return result;
57
+ } catch (error) {
58
+ if (error instanceof Error) {
59
+ throw error;
60
+ }
61
+ throw new Error("Failed to transcribe audio: Unknown error");
62
+ }
63
+ }
64
+ function formatSegments(segments) {
65
+ if (!segments || segments.length === 0) {
66
+ return "";
67
+ }
68
+ return segments.map((segment) => {
69
+ const start = formatTimestamp(segment.start);
70
+ const end = formatTimestamp(segment.end);
71
+ return `[${start} - ${end}] ${segment.text.trim()}`;
72
+ }).join("\n");
73
+ }
74
+ function formatTimestamp(seconds) {
75
+ const mins = Math.floor(seconds / 60);
76
+ const secs = seconds % 60;
77
+ return `${mins.toString().padStart(2, "0")}:${secs.toFixed(1).padStart(4, "0")}`;
78
+ }
79
+ function extractSpeakers(text) {
80
+ const speakerPattern = /(Speaker \d+|[A-Z][a-z]+ [A-Z][a-z]+):\s*([^.!?]+[.!?])/g;
81
+ const matches = [...text.matchAll(speakerPattern)];
82
+ return matches.filter((match) => match[1] && match[2]).map((match) => ({
83
+ speaker: match[1].trim(),
84
+ text: match[2].trim()
85
+ }));
86
+ }
87
+ function estimateTranscriptionCost(durationSeconds) {
88
+ const minutes = durationSeconds / 60;
89
+ const costPerMinute = 6e-3;
90
+ return minutes * costPerMinute;
91
+ }
92
+
93
+ // src/processing/text-formatter.ts
94
+ var _TextFormatter = class _TextFormatter {
95
+ /**
96
+ * Creates a new TextFormatter instance
97
+ *
98
+ * @param options - Formatting options
99
+ */
100
+ constructor(options = {}) {
101
+ this.options = {
102
+ ..._TextFormatter.DEFAULT_OPTIONS,
103
+ ...options
104
+ };
105
+ }
106
+ /**
107
+ * Apply punctuation to text
108
+ *
109
+ * Adds appropriate punctuation marks based on sentence structure:
110
+ * - Periods for statements
111
+ * - Question marks for questions
112
+ * - Preserves existing punctuation
113
+ *
114
+ * @param text - Input text
115
+ * @returns Text with punctuation applied
116
+ */
117
+ applyPunctuation(text) {
118
+ if (!text) {
119
+ return "";
120
+ }
121
+ text = text.trim();
122
+ if (!text) {
123
+ return "";
124
+ }
125
+ if (/[.!?]$/.test(text)) {
126
+ return text;
127
+ }
128
+ const lowerText = text.toLowerCase();
129
+ const firstWord = lowerText.split(/\s+/)[0];
130
+ if (firstWord && _TextFormatter.QUESTION_WORDS.includes(firstWord)) {
131
+ return text + "?";
132
+ }
133
+ return text + ".";
134
+ }
135
+ /**
136
+ * Apply capitalization to text
137
+ *
138
+ * Capitalizes:
139
+ * - First letter of text
140
+ * - Letters after sentence-ending punctuation
141
+ *
142
+ * @param text - Input text
143
+ * @returns Text with capitalization applied
144
+ */
145
+ applyCapitalization(text) {
146
+ if (!text) {
147
+ return "";
148
+ }
149
+ let result = text.charAt(0).toUpperCase() + text.slice(1);
150
+ result = result.replace(/([.!?]\s+)([a-z])/g, (_match, punctuation, letter) => {
151
+ return punctuation + letter.toUpperCase();
152
+ });
153
+ return result;
154
+ }
155
+ /**
156
+ * Format a single text string with all enabled options
157
+ *
158
+ * @param text - Input text
159
+ * @returns Formatted text
160
+ */
161
+ formatText(text) {
162
+ if (!text) {
163
+ return "";
164
+ }
165
+ let result = text;
166
+ if (text.includes("\n")) {
167
+ const lines = text.split("\n");
168
+ result = lines.map((line) => this.formatSingleLine(line)).join("\n");
169
+ return result;
170
+ }
171
+ return this.formatSingleLine(result);
172
+ }
173
+ /**
174
+ * Format a single line of text
175
+ *
176
+ * @param text - Input text (single line)
177
+ * @returns Formatted text
178
+ */
179
+ formatSingleLine(text) {
180
+ if (!text || !text.trim()) {
181
+ return text;
182
+ }
183
+ let result = text;
184
+ if (this.options.enableCapitalization) {
185
+ result = this.applyCapitalization(result);
186
+ }
187
+ if (this.options.enablePunctuation) {
188
+ result = this.applyPunctuation(result);
189
+ }
190
+ return result;
191
+ }
192
+ /**
193
+ * Format an array of transcription segments
194
+ *
195
+ * Applies formatting to the text of each segment while preserving
196
+ * timestamps, confidence scores, and speaker information.
197
+ *
198
+ * @param segments - Array of transcription segments
199
+ * @returns Array of formatted segments
200
+ */
201
+ formatSegments(segments) {
202
+ if (!segments || segments.length === 0) {
203
+ return [];
204
+ }
205
+ return segments.map((segment) => {
206
+ const formattedText = this.formatText(segment.text);
207
+ return {
208
+ ...segment,
209
+ text: formattedText
210
+ };
211
+ });
212
+ }
213
+ /**
214
+ * Get current formatting options
215
+ *
216
+ * @returns Current options
217
+ */
218
+ getOptions() {
219
+ return { ...this.options };
220
+ }
221
+ /**
222
+ * Update formatting options
223
+ *
224
+ * @param options - New options to merge with existing
225
+ */
226
+ setOptions(options) {
227
+ this.options = {
228
+ ...this.options,
229
+ ...options
230
+ };
231
+ }
232
+ /**
233
+ * Check if punctuation is enabled
234
+ *
235
+ * @returns True if punctuation is enabled
236
+ */
237
+ isPunctuationEnabled() {
238
+ return this.options.enablePunctuation;
239
+ }
240
+ /**
241
+ * Check if capitalization is enabled
242
+ *
243
+ * @returns True if capitalization is enabled
244
+ */
245
+ isCapitalizationEnabled() {
246
+ return this.options.enableCapitalization;
247
+ }
248
+ /**
249
+ * Check if paragraph detection is enabled
250
+ *
251
+ * @returns True if paragraph detection is enabled
252
+ */
253
+ isParagraphsEnabled() {
254
+ return this.options.enableParagraphs;
255
+ }
256
+ /**
257
+ * Get maximum line length
258
+ *
259
+ * @returns Maximum line length in characters
260
+ */
261
+ getMaxLineLength() {
262
+ return this.options.maxLineLength;
263
+ }
264
+ };
265
+ /**
266
+ * Question word patterns for detecting questions
267
+ */
268
+ _TextFormatter.QUESTION_WORDS = [
269
+ "how",
270
+ "what",
271
+ "when",
272
+ "where",
273
+ "who",
274
+ "whom",
275
+ "whose",
276
+ "why",
277
+ "which",
278
+ "can",
279
+ "could",
280
+ "would",
281
+ "should",
282
+ "will",
283
+ "do",
284
+ "does",
285
+ "did",
286
+ "is",
287
+ "are",
288
+ "was",
289
+ "were"
290
+ ];
291
+ /**
292
+ * Default formatting options
293
+ */
294
+ _TextFormatter.DEFAULT_OPTIONS = {
295
+ enablePunctuation: true,
296
+ enableCapitalization: true,
297
+ enableParagraphs: false,
298
+ maxLineLength: 80
299
+ };
300
+ var TextFormatter = _TextFormatter;
301
+
302
+ exports.TextFormatter = TextFormatter;
303
+ exports.estimateTranscriptionCost = estimateTranscriptionCost;
304
+ exports.extractSpeakers = extractSpeakers;
305
+ exports.formatSegments = formatSegments;
306
+ exports.transcribeAudio = transcribeAudio;
307
+ //# sourceMappingURL=index.js.map
308
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/processing/transcription.ts","../../src/processing/text-formatter.ts"],"names":["OpenAI"],"mappings":";;;;;;;;;AA8HA,eAAsB,eAAA,CACpB,WACA,OAAA,EAC8B;AAE9B,EAAA,IAAI,CAAC,OAAA,CAAQ,MAAA,IAAU,QAAQ,MAAA,CAAO,IAAA,OAAW,EAAA,EAAI;AACnD,IAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,EAC9C;AAGA,EAAA,MAAM,MAAA,GAAS,IAAIA,uBAAA,CAAO;AAAA,IACxB,QAAQ,OAAA,CAAQ;AAAA,GACjB,CAAA;AAED,EAAA,IAAI;AAEF,IAAA,MAAM,MAAA,GAAoC;AAAA,MACxC,IAAA,EAAM,SAAA;AAAA,MACN,KAAA,EAAO,QAAQ,KAAA,IAAS;AAAA,KAC1B;AAGA,IAAA,IAAI,QAAQ,QAAA,EAAU;AACpB,MAAA,MAAA,CAAO,WAAW,OAAA,CAAQ,QAAA;AAAA,IAC5B;AAEA,IAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,MAAA,MAAA,CAAO,SAAS,OAAA,CAAQ,MAAA;AAAA,IAC1B;AAEA,IAAA,IAAI,QAAQ,eAAA,EAAiB;AAC3B,MAAA,MAAA,CAAO,kBAAkB,OAAA,CAAQ,eAAA;AAAA,IACnC;AAEA,IAAA,IAAI,OAAA,CAAQ,gBAAgB,KAAA,CAAA,EAAW;AACrC,MAAA,MAAA,CAAO,cAAc,OAAA,CAAQ,WAAA;AAAA,IAC/B;AAEA,IAAA,IAAI,OAAA,CAAQ,uBAAA,IAA2B,OAAA,CAAQ,uBAAA,CAAwB,SAAS,CAAA,EAAG;AACjF,MAAA,MAAA,CAAO,0BAA0B,OAAA,CAAQ,uBAAA;AAAA,IAC3C;AAGA,IAAA,MAAM,WAAW,MAAM,MAAA,CAAO,KAAA,CAAM,cAAA,CAAe,OAAO,MAAM,CAAA;AAGhE,IAAA,IAAI,OAAO,aAAa,QAAA,EAAU;AAEhC,MAAA,OAAO,EAAE,MAAM,QAAA,EAAS;AAAA,IAC1B;AAGA,IAAA,MAAM,MAAA,GAA8B;AAAA,MAClC,MAAO,QAAA,CAAiB;AAAA,KAC1B;AAGA,IAAA,IAAI,cAAc,QAAA,EAAU;AAC1B,MAAA,MAAA,CAAO,WAAY,QAAA,CAAiB,QAAA;AAAA,IACtC;AAEA,IAAA,IAAI,cAAc,QAAA,EAAU;AAC1B,MAAA,MAAA,CAAO,WAAY,QAAA,CAAiB,QAAA;AAAA,IACtC;AAEA,IAAA,IAAI,cAAc,QAAA,EAAU;AAC1B,MAAA,MAAA,CAAO,WAAY,QAAA,CAAiB,QAAA;AAAA,IACtC;AAEA,IAAA,IAAI,WAAW,QAAA,EAAU;AACvB,MAAA,MAAA,CAAO,QAAS,QAAA,CAAiB,KAAA;AAAA,IACnC;AAEA,IAAA,OAAO,MAAA;AAAA,EACT,SAAS,KAAA,EAAO;AAEd,IAAA,IAAI,iBAAiB,KAAA,EAAO;AAC1B,MAAA,MAAM,KAAA;AAAA,IACR;AACA,IAAA,MAAM,IAAI,MAAM,2CAA2C,CAAA;AAAA,EAC7D;AACF;AAQO,SAAS,eAAe,QAAA,EAA2C;AACxE,EAAA,IAAI,CAAC,QAAA,IAAY,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AACtC,IAAA,OAAO,EAAA;AAAA,EACT;AAEA,EAAA,OAAO,QAAA,CACJ,GAAA,CAAI,CAAC,OAAA,KAAY;AAChB,IAAA,MAAM,KAAA,GAAQ,eAAA,CAAgB,OAAA,CAAQ,KAAK,CAAA;AAC3C,IAAA,MAAM,GAAA,GAAM,eAAA,CAAgB,OAAA,CAAQ,GAAG,CAAA;AACvC,IAAA,OAAO,CAAA,CAAA,EAAI,KAAK,CAAA,GAAA,EAAM,GAAG,KAAK,OAAA,CAAQ,IAAA,CAAK,MAAM,CAAA,CAAA;AAAA,EACnD,CAAC,CAAA,CACA,IAAA,CAAK,IAAI,CAAA;AACd;AAQA,SAAS,gBAAgB,OAAA,EAAyB;AAChD,EAAA,MAAM,IAAA,GAAO,IAAA,CAAK,KAAA,CAAM,OAAA,GAAU,EAAE,CAAA;AACpC,EAAA,MAAM,OAAO,OAAA,GAAU,EAAA;AACvB,EAAA,OAAO,GAAG,IAAA,CAAK,QAAA,EAAS,CAAE,QAAA,CAAS,GAAG,GAAG,CAAC,CAAA,CAAA,EAAI,IAAA,CAAK,QAAQ,CAAC,CAAA,CAAE,QAAA,CAAS,CAAA,EAAG,GAAG,CAAC,CAAA,CAAA;AAChF;AAUO,SAAS,gBAAgB,IAAA,EAAwD;AACtF,EAAA,MAAM,cAAA,GAAiB,0DAAA;AACvB,EAAA,MAAM,UAAU,CAAC,GAAG,IAAA,CAAK,QAAA,CAAS,cAAc,CAAC,CAAA;AAEjD,EAAA,OAAO,OAAA,CACJ,MAAA,CAAO,CAAC,KAAA,KAAU,KAAA,CAAM,CAAC,CAAA,IAAK,KAAA,CAAM,CAAC,CAAC,CAAA,CACtC,GAAA,CAAI,CAAC,KAAA,MAAW;AAAA,IACf,OAAA,EAAS,KAAA,CAAM,CAAC,CAAA,CAAG,IAAA,EAAK;AAAA,IACxB,IAAA,EAAM,KAAA,CAAM,CAAC,CAAA,CAAG,IAAA;AAAK,GACvB,CAAE,CAAA;AACN;AASO,SAAS,0BAA0B,eAAA,EAAiC;AACzE,EAAA,MAAM,UAAU,eAAA,GAAkB,EAAA;AAClC,EAAA,MAAM,aAAA,GAAgB,IAAA;AACtB,EAAA,OAAO,OAAA,GAAU,aAAA;AACnB;;;ACrPO,IAAM,cAAA,GAAN,MAAM,cAAA,CAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA2BzB,WAAA,CAAY,OAAA,GAAiC,EAAC,EAAG;AAC/C,IAAA,IAAA,CAAK,OAAA,GAAU;AAAA,MACb,GAAG,cAAA,CAAc,eAAA;AAAA,MACjB,GAAG;AAAA,KACL;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,iBAAiB,IAAA,EAAsB;AAErC,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAGA,IAAA,IAAA,GAAO,KAAK,IAAA,EAAK;AAGjB,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAGA,IAAA,IAAI,QAAA,CAAS,IAAA,CAAK,IAAI,CAAA,EAAG;AACvB,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,MAAM,SAAA,GAAY,KAAK,WAAA,EAAY;AACnC,IAAA,MAAM,SAAA,GAAY,SAAA,CAAU,KAAA,CAAM,KAAK,EAAE,CAAC,CAAA;AAE1C,IAAA,IAAI,SAAA,IAAa,cAAA,CAAc,cAAA,CAAe,QAAA,CAAS,SAAS,CAAA,EAAG;AACjE,MAAA,OAAO,IAAA,GAAO,GAAA;AAAA,IAChB;AAGA,IAAA,OAAO,IAAA,GAAO,GAAA;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,oBAAoB,IAAA,EAAsB;AAExC,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAGA,IAAA,IAAI,MAAA,GAAS,KAAK,MAAA,CAAO,CAAC,EAAE,WAAA,EAAY,GAAI,IAAA,CAAK,KAAA,CAAM,CAAC,CAAA;AAGxD,IAAA,MAAA,GAAS,OAAO,OAAA,CAAQ,oBAAA,EAAsB,CAAC,MAAA,EAAQ,aAAa,MAAA,KAAW;AAC7E,MAAA,OAAO,WAAA,GAAc,OAAO,WAAA,EAAY;AAAA,IAC1C,CAAC,CAAA;AAED,IAAA,OAAO,MAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,WAAW,IAAA,EAAsB;AAC/B,IAAA,IAAI,CAAC,IAAA,EAAM;AACT,MAAA,OAAO,EAAA;AAAA,IACT;AAEA,IAAA,IAAI,MAAA,GAAS,IAAA;AAGb,IAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,MAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,KAAA,CAAM,IAAI,CAAA;AAC7B,MAAA,MAAA,GAAS,KAAA,CAAM,IAAI,CAAA,IAAA,KAAQ,IAAA,CAAK,iBAAiB,IAAI,CAAC,CAAA,CAAE,IAAA,CAAK,IAAI,CAAA;AACjE,MAAA,OAAO,MAAA;AAAA,IACT;AAEA,IAAA,OAAO,IAAA,CAAK,iBAAiB,MAAM,CAAA;AAAA,EACrC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQQ,iBAAiB,IAAA,EAAsB;AAC7C,IAAA,IAAI,CAAC,IAAA,IAAQ,CAAC,IAAA,CAAK,MAAK,EAAG;AACzB,MAAA,OAAO,IAAA;AAAA,IACT;AAEA,IAAA,IAAI,MAAA,GAAS,IAAA;AAGb,IAAA,IAAI,IAAA,CAAK,QAAQ,oBAAA,EAAsB;AACrC,MAAA,MAAA,GAAS,IAAA,CAAK,oBAAoB,MAAM,CAAA;AAAA,IAC1C;AAGA,IAAA,IAAI,IAAA,CAAK,QAAQ,iBAAA,EAAmB;AAClC,MAAA,MAAA,GAAS,IAAA,CAAK,iBAAiB,MAAM,CAAA;AAAA,IACvC;AAEA,IAAA,OAAO,MAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,eAAe,QAAA,EAA0D;AAEvE,IAAA,IAAI,CAAC,QAAA,IAAY,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AACtC,MAAA,OAAO,EAAC;AAAA,IACV;AAEA,IAAA,OAAO,QAAA,CAAS,IAAI,CAAA,OAAA,KAAW;AAE7B,MAAA,MAAM,aAAA,GAAgB,IAAA,CAAK,UAAA,CAAW,OAAA,CAAQ,IAAI,CAAA;AAGlD,MAAA,OAAO;AAAA,QACL,GAAG,OAAA;AAAA,QACH,IAAA,EAAM;AAAA,OACR;AAAA,IACF,CAAC,CAAA;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,UAAA,GAAwD;AACtD,IAAA,OAAO,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,EAC3B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,WAAW,OAAA,EAA+C;AACxD,IAAA,IAAA,CAAK,OAAA,GAAU;AAAA,MACb,GAAG,IAAA,CAAK,OAAA;AAAA,MACR,GAAG;AAAA,KACL;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,oBAAA,GAAgC;AAC9B,IAAA,OAAO,KAAK,OAAA,CAAQ,iBAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,uBAAA,GAAmC;AACjC,IAAA,OAAO,KAAK,OAAA,CAAQ,oBAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,mBAAA,GAA+B;AAC7B,IAAA,OAAO,KAAK,OAAA,CAAQ,gBAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,gBAAA,GAA2B;AACzB,IAAA,OAAO,KAAK,OAAA,CAAQ,aAAA;AAAA,EACtB;AACF,CAAA;AAAA;AAAA;AAAA;AA3Oa,cAAA,CAMa,cAAA,GAAiB;AAAA,EACvC,KAAA;AAAA,EAAO,MAAA;AAAA,EAAQ,MAAA;AAAA,EAAQ,OAAA;AAAA,EAAS,KAAA;AAAA,EAAO,MAAA;AAAA,EAAQ,OAAA;AAAA,EAC/C,KAAA;AAAA,EAAO,OAAA;AAAA,EAAS,KAAA;AAAA,EAAO,OAAA;AAAA,EAAS,OAAA;AAAA,EAAS,QAAA;AAAA,EACzC,MAAA;AAAA,EAAQ,IAAA;AAAA,EAAM,MAAA;AAAA,EAAQ,KAAA;AAAA,EAAO,IAAA;AAAA,EAAM,KAAA;AAAA,EAAO,KAAA;AAAA,EAAO;AACnD,CAAA;AAAA;AAAA;AAAA;AAVW,cAAA,CAea,eAAA,GAAmD;AAAA,EACzE,iBAAA,EAAmB,IAAA;AAAA,EACnB,oBAAA,EAAsB,IAAA;AAAA,EACtB,gBAAA,EAAkB,KAAA;AAAA,EAClB,aAAA,EAAe;AACjB,CAAA;AApBK,IAAM,aAAA,GAAN","file":"index.js","sourcesContent":["import OpenAI from 'openai'\nimport type { TranscriptionCreateParams } from 'openai/resources/audio/transcriptions'\n\n/**\n * Timestamp granularity options for Whisper transcription\n */\nexport type TimestampGranularity = 'word' | 'segment'\n\n/**\n * Response format options for Whisper transcription\n */\nexport type ResponseFormat = 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt'\n\n/**\n * Transcription segment with timing information\n */\nexport interface TranscriptionSegment {\n id: number\n start: number\n end: number\n text: string\n}\n\n/**\n * Word-level timestamp information\n */\nexport interface TranscriptionWord {\n word: string\n start: number\n end: number\n}\n\n/**\n * Options for Whisper audio transcription\n */\nexport interface TranscriptionOptions {\n /**\n * OpenAI API key for authentication\n */\n apiKey: string\n\n /**\n * The language of the input audio in ISO-639-1 format (e.g., 'en', 'fr', 'es').\n * Providing this can improve accuracy and latency.\n */\n language?: string\n\n /**\n * Optional text to guide the model's style or continue a previous audio segment.\n * The prompt should match the audio language and can be used for:\n * - Maintaining consistent terminology (e.g., \"Dr. Smith, AI, NLP\")\n * - Speaker identification hints (e.g., \"Speaker 1, Speaker 2\")\n * - Ensuring proper punctuation and formatting\n */\n prompt?: string\n\n /**\n * The format of the transcript output.\n * - 'json': Returns basic JSON with text only\n * - 'text': Returns plain text\n * - 'srt': Returns SRT subtitle format\n * - 'verbose_json': Returns detailed JSON with timestamps and metadata\n * - 'vtt': Returns WebVTT subtitle format\n */\n response_format?: ResponseFormat\n\n /**\n * The sampling temperature, between 0 and 1.\n * Higher values like 0.8 will make the output more random,\n * while lower values like 0.2 will make it more focused and deterministic.\n */\n temperature?: number\n\n /**\n * The timestamp granularities to populate for this transcription.\n * Can include 'word' and/or 'segment' level timestamps.\n * Note: There is no additional latency for segment timestamps, but word timestamps\n * incur latency.\n */\n timestamp_granularities?: TimestampGranularity[]\n\n /**\n * The ID of the model to use. Only 'whisper-1' is currently available.\n */\n model?: string\n}\n\n/**\n * Result of audio transcription\n */\nexport interface TranscriptionResult {\n /**\n * The transcribed text\n */\n text: string\n\n /**\n * The language of the transcription (only available with verbose_json)\n */\n language?: string\n\n /**\n * Duration of the audio in seconds (only available with verbose_json)\n */\n duration?: number\n\n /**\n * Segment-level timestamps (only available when timestamp_granularities includes 'segment')\n */\n segments?: TranscriptionSegment[]\n\n /**\n * Word-level timestamps (only available when timestamp_granularities includes 'word')\n */\n words?: TranscriptionWord[]\n}\n\n/**\n * Transcribe audio using OpenAI's Whisper API\n *\n * @param audioFile - The audio file to transcribe (File or Blob)\n * @param options - Transcription options including API key and Whisper parameters\n * @returns Promise resolving to the transcription result\n *\n * @throws {Error} When API key is missing or API call fails\n */\nexport async function transcribeAudio(\n audioFile: File | Blob,\n options: TranscriptionOptions\n): Promise<TranscriptionResult> {\n // Validate API key\n if (!options.apiKey || options.apiKey.trim() === '') {\n throw new Error('OpenAI API key is required')\n }\n\n // Initialize OpenAI client\n const openai = new OpenAI({\n apiKey: options.apiKey,\n })\n\n try {\n // Build transcription parameters\n const params: TranscriptionCreateParams = {\n file: audioFile as File,\n model: options.model || 'whisper-1',\n }\n\n // Add optional parameters\n if (options.language) {\n params.language = options.language\n }\n\n if (options.prompt) {\n params.prompt = options.prompt\n }\n\n if (options.response_format) {\n params.response_format = options.response_format\n }\n\n if (options.temperature !== undefined) {\n params.temperature = options.temperature\n }\n\n if (options.timestamp_granularities && options.timestamp_granularities.length > 0) {\n params.timestamp_granularities = options.timestamp_granularities\n }\n\n // Call Whisper API\n const response = await openai.audio.transcriptions.create(params)\n\n // Parse and return result based on response type\n if (typeof response === 'string') {\n // For text, srt, vtt formats\n return { text: response }\n }\n\n // For json and verbose_json formats\n const result: TranscriptionResult = {\n text: (response as any).text,\n }\n\n // Add optional fields if present (from verbose_json)\n if ('language' in response) {\n result.language = (response as any).language\n }\n\n if ('duration' in response) {\n result.duration = (response as any).duration\n }\n\n if ('segments' in response) {\n result.segments = (response as any).segments\n }\n\n if ('words' in response) {\n result.words = (response as any).words\n }\n\n return result\n } catch (error) {\n // Re-throw with original error message\n if (error instanceof Error) {\n throw error\n }\n throw new Error('Failed to transcribe audio: Unknown error')\n }\n}\n\n/**\n * Utility function to format transcription segments into a readable format\n *\n * @param segments - Array of transcription segments with timestamps\n * @returns Formatted string with timestamps\n */\nexport function formatSegments(segments?: TranscriptionSegment[]): string {\n if (!segments || segments.length === 0) {\n return ''\n }\n\n return segments\n .map((segment) => {\n const start = formatTimestamp(segment.start)\n const end = formatTimestamp(segment.end)\n return `[${start} - ${end}] ${segment.text.trim()}`\n })\n .join('\\n')\n}\n\n/**\n * Format seconds into MM:SS.s format\n *\n * @param seconds - Time in seconds\n * @returns Formatted timestamp string\n */\nfunction formatTimestamp(seconds: number): string {\n const mins = Math.floor(seconds / 60)\n const secs = seconds % 60\n return `${mins.toString().padStart(2, '0')}:${secs.toFixed(1).padStart(4, '0')}`\n}\n\n/**\n * Utility function to extract speaker-labeled text from transcription\n * Note: Whisper doesn't natively support speaker diarization, but you can\n * guide it using prompts like \"Speaker 1, Speaker 2\" to encourage labeling.\n *\n * @param text - Transcribed text potentially containing speaker labels\n * @returns Array of speaker segments\n */\nexport function extractSpeakers(text: string): Array<{ speaker: string; text: string }> {\n const speakerPattern = /(Speaker \\d+|[A-Z][a-z]+ [A-Z][a-z]+):\\s*([^.!?]+[.!?])/g\n const matches = [...text.matchAll(speakerPattern)]\n\n return matches\n .filter((match) => match[1] && match[2])\n .map((match) => ({\n speaker: match[1]!.trim(),\n text: match[2]!.trim(),\n }))\n}\n\n/**\n * Estimate the cost of transcribing audio based on duration\n * Whisper pricing: $0.006 per minute\n *\n * @param durationSeconds - Duration of audio in seconds\n * @returns Estimated cost in USD\n */\nexport function estimateTranscriptionCost(durationSeconds: number): number {\n const minutes = durationSeconds / 60\n const costPerMinute = 0.006\n return minutes * costPerMinute\n}\n","/**\n * Text Formatter for Transcription Processing\n * @module @ainative/video/processing\n */\n\nimport type { TranscriptionSegment, TextFormattingOptions } from './types';\n\n/**\n * TextFormatter class for applying formatting to transcribed text\n *\n * Supports:\n * - Automatic punctuation\n * - Capitalization\n * - Paragraph detection\n * - Line length management\n *\n * @example\n * ```typescript\n * const formatter = new TextFormatter({\n * enablePunctuation: true,\n * enableCapitalization: true\n * });\n *\n * const formatted = formatter.formatText('hello world');\n * // Result: \"Hello world.\"\n * ```\n */\nexport class TextFormatter {\n private options: Required<TextFormattingOptions>;\n\n /**\n * Question word patterns for detecting questions\n */\n private static readonly QUESTION_WORDS = [\n 'how', 'what', 'when', 'where', 'who', 'whom', 'whose',\n 'why', 'which', 'can', 'could', 'would', 'should',\n 'will', 'do', 'does', 'did', 'is', 'are', 'was', 'were'\n ];\n\n /**\n * Default formatting options\n */\n private static readonly DEFAULT_OPTIONS: Required<TextFormattingOptions> = {\n enablePunctuation: true,\n enableCapitalization: true,\n enableParagraphs: false,\n maxLineLength: 80\n };\n\n /**\n * Creates a new TextFormatter instance\n *\n * @param options - Formatting options\n */\n constructor(options: TextFormattingOptions = {}) {\n this.options = {\n ...TextFormatter.DEFAULT_OPTIONS,\n ...options\n };\n }\n\n /**\n * Apply punctuation to text\n *\n * Adds appropriate punctuation marks based on sentence structure:\n * - Periods for statements\n * - Question marks for questions\n * - Preserves existing punctuation\n *\n * @param text - Input text\n * @returns Text with punctuation applied\n */\n applyPunctuation(text: string): string {\n // Handle null/undefined input\n if (!text) {\n return '';\n }\n\n // Trim whitespace\n text = text.trim();\n\n // Return empty string if only whitespace\n if (!text) {\n return '';\n }\n\n // Already has ending punctuation\n if (/[.!?]$/.test(text)) {\n return text;\n }\n\n // Check if it's a question\n const lowerText = text.toLowerCase();\n const firstWord = lowerText.split(/\\s+/)[0];\n\n if (firstWord && TextFormatter.QUESTION_WORDS.includes(firstWord)) {\n return text + '?';\n }\n\n // Default to period\n return text + '.';\n }\n\n /**\n * Apply capitalization to text\n *\n * Capitalizes:\n * - First letter of text\n * - Letters after sentence-ending punctuation\n *\n * @param text - Input text\n * @returns Text with capitalization applied\n */\n applyCapitalization(text: string): string {\n // Handle empty input\n if (!text) {\n return '';\n }\n\n // Capitalize first letter\n let result = text.charAt(0).toUpperCase() + text.slice(1);\n\n // Capitalize after sentence-ending punctuation\n result = result.replace(/([.!?]\\s+)([a-z])/g, (_match, punctuation, letter) => {\n return punctuation + letter.toUpperCase();\n });\n\n return result;\n }\n\n /**\n * Format a single text string with all enabled options\n *\n * @param text - Input text\n * @returns Formatted text\n */\n formatText(text: string): string {\n if (!text) {\n return '';\n }\n\n let result = text;\n\n // Handle multiline text\n if (text.includes('\\n')) {\n const lines = text.split('\\n');\n result = lines.map(line => this.formatSingleLine(line)).join('\\n');\n return result;\n }\n\n return this.formatSingleLine(result);\n }\n\n /**\n * Format a single line of text\n *\n * @param text - Input text (single line)\n * @returns Formatted text\n */\n private formatSingleLine(text: string): string {\n if (!text || !text.trim()) {\n return text;\n }\n\n let result = text;\n\n // Apply capitalization first\n if (this.options.enableCapitalization) {\n result = this.applyCapitalization(result);\n }\n\n // Apply punctuation\n if (this.options.enablePunctuation) {\n result = this.applyPunctuation(result);\n }\n\n return result;\n }\n\n /**\n * Format an array of transcription segments\n *\n * Applies formatting to the text of each segment while preserving\n * timestamps, confidence scores, and speaker information.\n *\n * @param segments - Array of transcription segments\n * @returns Array of formatted segments\n */\n formatSegments(segments: TranscriptionSegment[]): TranscriptionSegment[] {\n // Handle empty array\n if (!segments || segments.length === 0) {\n return [];\n }\n\n return segments.map(segment => {\n // Format the text\n const formattedText = this.formatText(segment.text);\n\n // Return new segment with formatted text, preserving all other properties\n return {\n ...segment,\n text: formattedText\n };\n });\n }\n\n /**\n * Get current formatting options\n *\n * @returns Current options\n */\n getOptions(): Readonly<Required<TextFormattingOptions>> {\n return { ...this.options };\n }\n\n /**\n * Update formatting options\n *\n * @param options - New options to merge with existing\n */\n setOptions(options: Partial<TextFormattingOptions>): void {\n this.options = {\n ...this.options,\n ...options\n };\n }\n\n /**\n * Check if punctuation is enabled\n *\n * @returns True if punctuation is enabled\n */\n isPunctuationEnabled(): boolean {\n return this.options.enablePunctuation;\n }\n\n /**\n * Check if capitalization is enabled\n *\n * @returns True if capitalization is enabled\n */\n isCapitalizationEnabled(): boolean {\n return this.options.enableCapitalization;\n }\n\n /**\n * Check if paragraph detection is enabled\n *\n * @returns True if paragraph detection is enabled\n */\n isParagraphsEnabled(): boolean {\n return this.options.enableParagraphs;\n }\n\n /**\n * Get maximum line length\n *\n * @returns Maximum line length in characters\n */\n getMaxLineLength(): number {\n return this.options.maxLineLength;\n }\n}\n"]}