@wovin/tranz 0.1.36 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/{audio.min.js → audio.js} +32 -18
- package/dist/index.d.ts +3 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/{index.min.js → index.js} +161 -29
- package/dist/providers.d.ts +1 -1
- package/dist/providers.d.ts.map +1 -1
- package/dist/{providers.min.js → providers.js} +68 -24
- package/dist/utils/audio/merge-results.d.ts +14 -12
- package/dist/utils/audio/merge-results.d.ts.map +1 -1
- package/dist/utils/transcription/format.d.ts +27 -0
- package/dist/utils/transcription/format.d.ts.map +1 -1
- package/dist/utils/transcription/providers.d.ts +30 -1
- package/dist/utils/transcription/providers.d.ts.map +1 -1
- package/dist/utils/transcription/transcribe.d.ts +5 -0
- package/dist/utils/transcription/transcribe.d.ts.map +1 -1
- package/package.json +10 -8
- package/src/audio.ts +25 -0
- package/src/index.ts +61 -0
- package/src/providers.ts +23 -0
- package/src/realtime.ts +58 -0
- package/src/utils/audio/index.ts +6 -0
- package/src/utils/audio/merge-results.ts +198 -0
- package/src/utils/audio/split.ts +504 -0
- package/src/utils/file-utils.ts +16 -0
- package/src/utils/transcription/format.ts +208 -0
- package/src/utils/transcription/mime-detection.ts +80 -0
- package/src/utils/transcription/providers.ts +572 -0
- package/src/utils/transcription/realtime.ts +821 -0
- package/src/utils/transcription/runtime.ts +40 -0
- package/src/utils/transcription/transcribe.ts +366 -0
- /package/dist/{realtime.min.js → realtime.js} +0 -0
|
@@ -245,6 +245,16 @@ var MistralProvider = class {
|
|
|
245
245
|
if (timestampGranularity) {
|
|
246
246
|
formData.append("timestamp_granularities", timestampGranularity);
|
|
247
247
|
}
|
|
248
|
+
if (params.contextBias && params.contextBias.length > 0) {
|
|
249
|
+
if (params.contextBias.length > VOXTRAL_LIMITS.maxContextBiasingTerms) {
|
|
250
|
+
throw new Error(
|
|
251
|
+
`contextBias has ${params.contextBias.length} terms; Voxtral limit is ${VOXTRAL_LIMITS.maxContextBiasingTerms}`
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
for (const term of params.contextBias) {
|
|
255
|
+
formData.append("context_bias[]", term);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
248
258
|
const response = await fetch("https://api.mistral.ai/v1/audio/transcriptions", {
|
|
249
259
|
method: "POST",
|
|
250
260
|
headers: {
|
|
@@ -260,14 +270,27 @@ var MistralProvider = class {
|
|
|
260
270
|
if (!result?.text) {
|
|
261
271
|
return { text: "", error: "No transcription returned", rawResponse: result };
|
|
262
272
|
}
|
|
263
|
-
const
|
|
273
|
+
const segments = Array.isArray(result.segments) && result.segments.length > 0 ? result.segments.map((seg) => ({
|
|
274
|
+
startMs: Math.round((seg.start ?? 0) * 1e3),
|
|
275
|
+
endMs: Math.round((seg.end ?? 0) * 1e3),
|
|
276
|
+
text: seg.text ?? "",
|
|
277
|
+
...seg.speaker_id !== void 0 ? { diarization: seg.speaker_id } : {}
|
|
278
|
+
})) : void 0;
|
|
279
|
+
let words;
|
|
280
|
+
if (Array.isArray(result.words) && result.words.length > 0) {
|
|
281
|
+
words = result.words;
|
|
282
|
+
} else if (Array.isArray(result.segments)) {
|
|
283
|
+
const nested = result.segments.flatMap((seg) => seg.words ?? []);
|
|
284
|
+
if (nested.length > 0) words = nested;
|
|
285
|
+
}
|
|
264
286
|
const duration = result.usage?.prompt_audio_seconds;
|
|
265
287
|
return {
|
|
266
288
|
text: result.text,
|
|
267
289
|
language: result.language ?? params.language,
|
|
268
290
|
model: result.model,
|
|
269
291
|
duration,
|
|
270
|
-
words,
|
|
292
|
+
...words ? { words } : {},
|
|
293
|
+
...segments ? { segments } : {},
|
|
271
294
|
rawResponse: result
|
|
272
295
|
};
|
|
273
296
|
}
|
|
@@ -620,6 +643,10 @@ async function autoSplitAudio(audioPath, outputDir, config = {}) {
|
|
|
620
643
|
}
|
|
621
644
|
|
|
622
645
|
// src/utils/audio/merge-results.ts
|
|
646
|
+
function prefixChunkLabel(chunkIndex, value) {
|
|
647
|
+
if (value === void 0) return void 0;
|
|
648
|
+
return `chunk${chunkIndex}/${String(value)}`;
|
|
649
|
+
}
|
|
623
650
|
function mergeTranscriptionResults(results, segments) {
|
|
624
651
|
if (results.length === 0) {
|
|
625
652
|
return {
|
|
@@ -628,10 +655,7 @@ function mergeTranscriptionResults(results, segments) {
|
|
|
628
655
|
};
|
|
629
656
|
}
|
|
630
657
|
if (results.length === 1) {
|
|
631
|
-
return
|
|
632
|
-
...results[0],
|
|
633
|
-
totalSegments: 1
|
|
634
|
-
};
|
|
658
|
+
return results[0];
|
|
635
659
|
}
|
|
636
660
|
const errors = results.map((r, i) => r.error ? `Segment ${i}: ${r.error}` : null).filter(Boolean);
|
|
637
661
|
if (errors.length > 0) {
|
|
@@ -652,12 +676,26 @@ function mergeTranscriptionResults(results, segments) {
|
|
|
652
676
|
start: (word.start || 0) + segment.startSec,
|
|
653
677
|
end: (word.end || 0) + segment.startSec,
|
|
654
678
|
confidence: word.confidence,
|
|
655
|
-
speaker: word.speaker
|
|
679
|
+
speaker: prefixChunkLabel(i, word.speaker)
|
|
680
|
+
});
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
const mergedSegments = [];
|
|
684
|
+
for (let i = 0; i < results.length; i++) {
|
|
685
|
+
const result = results[i];
|
|
686
|
+
const chunkOffsetMs = Math.round(segments[i].startSec * 1e3);
|
|
687
|
+
if (!result.segments) continue;
|
|
688
|
+
for (const seg of result.segments) {
|
|
689
|
+
mergedSegments.push({
|
|
690
|
+
startMs: seg.startMs + chunkOffsetMs,
|
|
691
|
+
endMs: seg.endMs + chunkOffsetMs,
|
|
692
|
+
text: seg.text,
|
|
693
|
+
...seg.diarization !== void 0 ? { diarization: prefixChunkLabel(i, seg.diarization) } : {}
|
|
656
694
|
});
|
|
657
695
|
}
|
|
658
696
|
}
|
|
659
697
|
const totalDuration = segments.reduce((sum, seg) => sum + seg.durationSec, 0);
|
|
660
|
-
const
|
|
698
|
+
const audioChunks = results.map((r, i) => ({
|
|
661
699
|
index: i,
|
|
662
700
|
startSec: segments[i].startSec,
|
|
663
701
|
endSec: segments[i].endSec,
|
|
@@ -665,24 +703,23 @@ function mergeTranscriptionResults(results, segments) {
|
|
|
665
703
|
}));
|
|
666
704
|
const mergedRawResponse = {
|
|
667
705
|
merged: true,
|
|
668
|
-
|
|
669
|
-
|
|
706
|
+
chunkCount: results.length,
|
|
707
|
+
chunks: results.map((r, i) => ({
|
|
670
708
|
index: i,
|
|
671
709
|
startSec: segments[i].startSec,
|
|
672
710
|
rawResponse: r.rawResponse
|
|
673
|
-
}))
|
|
674
|
-
words: mergedWords
|
|
711
|
+
}))
|
|
675
712
|
};
|
|
676
713
|
const firstResult = results[0];
|
|
677
714
|
return {
|
|
678
715
|
text: mergedText,
|
|
679
|
-
words: mergedWords,
|
|
680
716
|
duration: totalDuration,
|
|
681
717
|
language: firstResult.language,
|
|
682
718
|
model: firstResult.model,
|
|
683
719
|
rawResponse: mergedRawResponse,
|
|
684
|
-
|
|
685
|
-
|
|
720
|
+
audioChunks,
|
|
721
|
+
...mergedWords.length > 0 ? { words: mergedWords } : {},
|
|
722
|
+
...mergedSegments.length > 0 ? { segments: mergedSegments } : {}
|
|
686
723
|
};
|
|
687
724
|
}
|
|
688
725
|
|
|
@@ -784,6 +821,7 @@ function createMistralTranscriber(config) {
|
|
|
784
821
|
duration: knownDuration,
|
|
785
822
|
language,
|
|
786
823
|
model = defaultModel,
|
|
824
|
+
contextBias,
|
|
787
825
|
diarize = true,
|
|
788
826
|
timestamps = language ? void 0 : "segment",
|
|
789
827
|
autoSplit,
|
|
@@ -803,9 +841,10 @@ function createMistralTranscriber(config) {
|
|
|
803
841
|
model,
|
|
804
842
|
language,
|
|
805
843
|
diarize,
|
|
806
|
-
timestampGranularity: timestamps
|
|
844
|
+
timestampGranularity: timestamps,
|
|
845
|
+
contextBias
|
|
807
846
|
});
|
|
808
|
-
return
|
|
847
|
+
return result;
|
|
809
848
|
}
|
|
810
849
|
if (audioUrl) {
|
|
811
850
|
if (autoSplit === false) {
|
|
@@ -816,9 +855,10 @@ function createMistralTranscriber(config) {
|
|
|
816
855
|
model,
|
|
817
856
|
language,
|
|
818
857
|
diarize,
|
|
819
|
-
timestampGranularity: timestamps
|
|
858
|
+
timestampGranularity: timestamps,
|
|
859
|
+
contextBias
|
|
820
860
|
});
|
|
821
|
-
return
|
|
861
|
+
return result2;
|
|
822
862
|
}
|
|
823
863
|
let duration2 = knownDuration;
|
|
824
864
|
if (duration2 === void 0) {
|
|
@@ -840,9 +880,10 @@ function createMistralTranscriber(config) {
|
|
|
840
880
|
model,
|
|
841
881
|
language,
|
|
842
882
|
diarize,
|
|
843
|
-
timestampGranularity: timestamps
|
|
883
|
+
timestampGranularity: timestamps,
|
|
884
|
+
contextBias
|
|
844
885
|
});
|
|
845
|
-
return
|
|
886
|
+
return result2;
|
|
846
887
|
}
|
|
847
888
|
log.info(`Downloading URL to temp file for processing...`);
|
|
848
889
|
const outDir2 = splitOutputDir || path3.join(os.tmpdir(), `tranz-${Date.now()}`);
|
|
@@ -855,6 +896,7 @@ function createMistralTranscriber(config) {
|
|
|
855
896
|
model,
|
|
856
897
|
diarize,
|
|
857
898
|
timestamps,
|
|
899
|
+
contextBias,
|
|
858
900
|
autoSplit: true,
|
|
859
901
|
splitOutputDir: outDir2,
|
|
860
902
|
logger: customLogger,
|
|
@@ -881,9 +923,10 @@ function createMistralTranscriber(config) {
|
|
|
881
923
|
model,
|
|
882
924
|
language,
|
|
883
925
|
diarize,
|
|
884
|
-
timestampGranularity: timestamps
|
|
926
|
+
timestampGranularity: timestamps,
|
|
927
|
+
contextBias
|
|
885
928
|
});
|
|
886
|
-
return
|
|
929
|
+
return result;
|
|
887
930
|
}
|
|
888
931
|
log.info(`Duration ${duration.toFixed(1)}s > ${maxDuration}s, splitting audio...`);
|
|
889
932
|
const outDir = splitOutputDir || path3.join(os.tmpdir(), `tranz-split-${Date.now()}`);
|
|
@@ -902,7 +945,8 @@ function createMistralTranscriber(config) {
|
|
|
902
945
|
model,
|
|
903
946
|
language,
|
|
904
947
|
diarize,
|
|
905
|
-
timestampGranularity: timestamps
|
|
948
|
+
timestampGranularity: timestamps,
|
|
949
|
+
contextBias
|
|
906
950
|
});
|
|
907
951
|
results.push(result);
|
|
908
952
|
}
|
|
@@ -11,21 +11,23 @@ export interface WordData {
|
|
|
11
11
|
start: number;
|
|
12
12
|
end: number;
|
|
13
13
|
confidence?: number;
|
|
14
|
-
speaker?: string;
|
|
14
|
+
speaker?: string | number;
|
|
15
15
|
}
|
|
16
16
|
/**
|
|
17
|
-
*
|
|
17
|
+
* Metadata describing one audio chunk in an auto-split + merge run.
|
|
18
|
+
*/
|
|
19
|
+
export interface AudioChunk {
|
|
20
|
+
index: number;
|
|
21
|
+
startSec: number;
|
|
22
|
+
endSec: number;
|
|
23
|
+
text: string;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Merged transcription result with chunk-level metadata.
|
|
18
27
|
*/
|
|
19
28
|
export interface MergedTranscriptionResult extends TranscriptionResult {
|
|
20
|
-
/**
|
|
21
|
-
|
|
22
|
-
index: number;
|
|
23
|
-
startSec: number;
|
|
24
|
-
endSec: number;
|
|
25
|
-
text: string;
|
|
26
|
-
}[];
|
|
27
|
-
/** Total segments that were merged */
|
|
28
|
-
totalSegments?: number;
|
|
29
|
+
/** Audio chunks that were transcribed independently and merged. Absent when no split happened. */
|
|
30
|
+
audioChunks?: AudioChunk[];
|
|
29
31
|
}
|
|
30
32
|
/**
|
|
31
33
|
* Merge multiple transcription results from audio segments into one
|
|
@@ -40,7 +42,7 @@ export declare function mergeTranscriptionResults(results: TranscriptionResult[]
|
|
|
40
42
|
* Format merged results with optional segment markers in the text
|
|
41
43
|
*
|
|
42
44
|
* @param result - Merged transcription result
|
|
43
|
-
* @param includeMarkers - Whether to include [
|
|
45
|
+
* @param includeMarkers - Whether to include [Chunk N] markers
|
|
44
46
|
* @returns Formatted text
|
|
45
47
|
*/
|
|
46
48
|
export declare function formatMergedText(result: MergedTranscriptionResult, includeMarkers?: boolean): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"merge-results.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/merge-results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,mBAAmB,
|
|
1
|
+
{"version":3,"file":"merge-results.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/merge-results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAqB,MAAM,+BAA+B,CAAA;AAC3F,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,IAAI,EAAE,MAAM,CAAA;CACb;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,mBAAmB;IACpE,kGAAkG;IAClG,WAAW,CAAC,EAAE,UAAU,EAAE,CAAA;CAC3B;AAYD;;;;;;;GAOG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,mBAAmB,EAAE,EAC9B,QAAQ,EAAE,YAAY,EAAE,GACvB,yBAAyB,CAmG3B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,yBAAyB,EACjC,cAAc,GAAE,OAAe,GAC9B,MAAM,CAeR"}
|
|
@@ -11,4 +11,31 @@ export declare function formatTranscriptWithPauses(transcript: string, words: Ar
|
|
|
11
11
|
end: number;
|
|
12
12
|
confidence: number;
|
|
13
13
|
}>, shortPauseThreshold?: number, longPauseThreshold?: number): string;
|
|
14
|
+
import type { TranscriptionResult } from './providers.ts';
|
|
15
|
+
import type { MergedTranscriptionResult } from '../audio/merge-results.ts';
|
|
16
|
+
export interface FormatMarkdownOptions {
|
|
17
|
+
/** Silence gap (seconds) that ends a paragraph. Default 1.5. */
|
|
18
|
+
gapSec?: number;
|
|
19
|
+
/** Include `· Speaker N` in each paragraph header when diarization labels are present. Default true. */
|
|
20
|
+
speakerLabel?: boolean;
|
|
21
|
+
/** Prepend an `# <source>` title + bulleted metadata block. Default false. */
|
|
22
|
+
includeHeader?: boolean;
|
|
23
|
+
/** Source filename to use in the `# ` title and `Source:` line (when includeHeader=true). */
|
|
24
|
+
source?: string;
|
|
25
|
+
/** Total audio duration in seconds — used for `Duration:` line and for picking mm:ss vs h:mm:ss formatting. */
|
|
26
|
+
durationSec?: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Format a transcription result as readable Markdown with timestamped paragraphs.
|
|
30
|
+
*
|
|
31
|
+
* Groups adjacent segments into paragraphs, starting a new paragraph on either
|
|
32
|
+
* a silence gap ≥ `gapSec` OR a change in diarization label. Each paragraph is
|
|
33
|
+
* preceded by `**[mm:ss · Speaker N]**` (or `**[h:mm:ss · Speaker N]**` for
|
|
34
|
+
* audio ≥ 1h). The speaker suffix is dropped when no diarization labels are
|
|
35
|
+
* present or all segments share the same label.
|
|
36
|
+
*
|
|
37
|
+
* If `segments` is missing/empty, falls back to emitting `result.text` as a
|
|
38
|
+
* single (un-timestamped) paragraph.
|
|
39
|
+
*/
|
|
40
|
+
export declare function formatTranscriptAsMarkdown(result: TranscriptionResult | MergedTranscriptionResult, opts?: FormatMarkdownOptions): string;
|
|
14
41
|
//# sourceMappingURL=format.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/format.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CACzC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9E,mBAAmB,SAAM,EACzB,kBAAkB,SAAM,GACtB,MAAM,CAgDR"}
|
|
1
|
+
{"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/format.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CACzC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9E,mBAAmB,SAAM,EACzB,kBAAkB,SAAM,GACtB,MAAM,CAgDR;AAED,OAAO,KAAK,EAAE,mBAAmB,EAAqB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAE1E,MAAM,WAAW,qBAAqB;IACrC,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,wGAAwG;IACxG,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB,8EAA8E;IAC9E,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,6FAA6F;IAC7F,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,+GAA+G;IAC/G,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAsBD;;;;;;;;;;;GAWG;AACH,wBAAgB,0BAA0B,CACzC,MAAM,EAAE,mBAAmB,GAAG,yBAAyB,EACvD,IAAI,GAAE,qBAA0B,GAC9B,MAAM,CAuFR"}
|
|
@@ -2,6 +2,26 @@
|
|
|
2
2
|
* Transcription provider types and interfaces
|
|
3
3
|
* Defines the contract for all transcription providers
|
|
4
4
|
*/
|
|
5
|
+
/**
|
|
6
|
+
* A single transcription segment (one diarized turn, or one segment-granularity unit).
|
|
7
|
+
*
|
|
8
|
+
* Timestamps are integer milliseconds — normalized at the SDK boundary so consumers
|
|
9
|
+
* can pass straight into the wovin annotation schema (see docs/annotation-schema.md).
|
|
10
|
+
*
|
|
11
|
+
* `diarization` is the anonymous, per-recording diarization label as returned by
|
|
12
|
+
* the provider (Mistral: `"speaker_1"`, Deepgram: `0`, AssemblyAI: `"A"`, …).
|
|
13
|
+
* It is NOT a real-world speaker identity — that's a separate (future) `speakerId` field.
|
|
14
|
+
*
|
|
15
|
+
* When `mergeTranscriptionResults` joins multiple chunks, `diarization` is rewritten
|
|
16
|
+
* as `` `chunk${index}/${value}` `` because per-chunk labels are not comparable
|
|
17
|
+
* across chunks.
|
|
18
|
+
*/
|
|
19
|
+
export interface TranscriptSegment {
|
|
20
|
+
startMs: number;
|
|
21
|
+
endMs: number;
|
|
22
|
+
text: string;
|
|
23
|
+
diarization?: string | number;
|
|
24
|
+
}
|
|
5
25
|
/**
|
|
6
26
|
* Result object returned from transcription operations
|
|
7
27
|
* Contains the transcribed text and optional provider-specific metadata
|
|
@@ -15,8 +35,10 @@ export interface TranscriptionResult {
|
|
|
15
35
|
error?: string;
|
|
16
36
|
/** Confidence score of the transcription (0-1) */
|
|
17
37
|
confidence?: number;
|
|
18
|
-
/** Word-level data
|
|
38
|
+
/** Word-level data — populated only when granularity='word' or the provider returns it. Left undefined otherwise (not `[]`). */
|
|
19
39
|
words?: any[];
|
|
40
|
+
/** Segment-level data — populated when granularity='segment' (or the provider returns it). */
|
|
41
|
+
segments?: TranscriptSegment[];
|
|
20
42
|
/** Duration of audio in seconds */
|
|
21
43
|
duration?: number;
|
|
22
44
|
/** Detected or specified language code */
|
|
@@ -63,6 +85,13 @@ export interface TranscribeParams {
|
|
|
63
85
|
diarize?: boolean;
|
|
64
86
|
/** Timestamp granularity for transcription (Mistral-specific) */
|
|
65
87
|
timestampGranularity?: 'segment' | 'word';
|
|
88
|
+
/**
|
|
89
|
+
* Context biasing terms (Voxtral/Mistral-specific).
|
|
90
|
+
* Up to `VOXTRAL_LIMITS.maxContextBiasingTerms` (100) custom-vocabulary terms
|
|
91
|
+
* passed to the Voxtral transcribe endpoint as `context_bias[]` form fields.
|
|
92
|
+
* Ignored by non-Mistral providers.
|
|
93
|
+
*/
|
|
94
|
+
contextBias?: string[];
|
|
66
95
|
/** Path to model file (Whisper-specific) */
|
|
67
96
|
modelPath?: string;
|
|
68
97
|
/** Output directory for results (Whisper-specific) */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,
|
|
1
|
+
{"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;;;;;;;;;;;GAaG;AACH,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;CAG9B;AAED;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,gIAAgI;IAChI,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;IAC9B,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAElD,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAuIzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
|
|
@@ -27,6 +27,11 @@ export interface TranscribeOptions {
|
|
|
27
27
|
diarize?: boolean;
|
|
28
28
|
/** Timestamp granularity: 'word' | 'segment' (default: 'segment' when diarize=true, disabled if language set) */
|
|
29
29
|
timestamps?: 'word' | 'segment';
|
|
30
|
+
/**
|
|
31
|
+
* Context biasing terms — up to `VOXTRAL_LIMITS.maxContextBiasingTerms` (100)
|
|
32
|
+
* custom-vocabulary entries passed to Voxtral as `context_bias[]`. Mistral only.
|
|
33
|
+
*/
|
|
34
|
+
contextBias?: string[];
|
|
30
35
|
/** Auto-split long audio (default: true). For URLs, detects duration first. */
|
|
31
36
|
autoSplit?: boolean;
|
|
32
37
|
/** Output directory for split segments (default: system temp) */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iHAAiH;IACjH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AA6FD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,
|
|
1
|
+
{"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iHAAiH;IACjH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AA6FD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,CAmL7F;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wovin/tranz",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Audio transcription library with provider support and auto-splitting",
|
|
6
6
|
"author": "gotjoshua @gotjoshua",
|
|
@@ -12,29 +12,30 @@
|
|
|
12
12
|
"directory": "packages/@wovin/tranz"
|
|
13
13
|
},
|
|
14
14
|
"bugs": "https://gitlab.com/onezoomin/ztax/tranz/-/issues",
|
|
15
|
-
"main": "./dist/index.
|
|
16
|
-
"module": "./dist/index.
|
|
15
|
+
"main": "./dist/index.js",
|
|
16
|
+
"module": "./dist/index.js",
|
|
17
17
|
"types": "./dist/index.d.ts",
|
|
18
18
|
"exports": {
|
|
19
19
|
".": {
|
|
20
|
-
"import": "./dist/index.
|
|
20
|
+
"import": "./dist/index.js",
|
|
21
21
|
"types": "./dist/index.d.ts"
|
|
22
22
|
},
|
|
23
23
|
"./providers": {
|
|
24
|
-
"import": "./dist/providers.
|
|
24
|
+
"import": "./dist/providers.js",
|
|
25
25
|
"types": "./dist/providers.d.ts"
|
|
26
26
|
},
|
|
27
27
|
"./audio": {
|
|
28
|
-
"import": "./dist/audio.
|
|
28
|
+
"import": "./dist/audio.js",
|
|
29
29
|
"types": "./dist/audio.d.ts"
|
|
30
30
|
},
|
|
31
31
|
"./realtime": {
|
|
32
|
-
"import": "./dist/realtime.
|
|
32
|
+
"import": "./dist/realtime.js",
|
|
33
33
|
"types": "./dist/realtime.d.ts"
|
|
34
34
|
}
|
|
35
35
|
},
|
|
36
36
|
"files": [
|
|
37
|
-
"./dist/"
|
|
37
|
+
"./dist/",
|
|
38
|
+
"./src/"
|
|
38
39
|
],
|
|
39
40
|
"publishConfig": {
|
|
40
41
|
"access": "public"
|
|
@@ -72,6 +73,7 @@
|
|
|
72
73
|
"dev:code": "tsup --watch",
|
|
73
74
|
"dev:types": "tsc --emitDeclarationOnly --declaration --watch",
|
|
74
75
|
"clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
|
|
76
|
+
"test": "tsx --test test/*.test.ts",
|
|
75
77
|
"test:realtime": "tsx test/realtime-transcription.ts",
|
|
76
78
|
"test:realtime-api": "tsx test/realtime-api-test.ts"
|
|
77
79
|
}
|
package/src/audio.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @wovin/tranz/audio - Audio utilities for splitting and merging
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export {
|
|
6
|
+
autoSplitAudio,
|
|
7
|
+
analyzeSplitPoints,
|
|
8
|
+
detectSilenceRegions,
|
|
9
|
+
getAudioDuration,
|
|
10
|
+
findOptimalSplitPoints,
|
|
11
|
+
splitAudioAtPoints,
|
|
12
|
+
DEFAULT_SPLIT_CONFIG,
|
|
13
|
+
type SplitConfig,
|
|
14
|
+
type SilenceRegion,
|
|
15
|
+
type SplitPoint,
|
|
16
|
+
type AudioSegment,
|
|
17
|
+
type SplitAnalysis,
|
|
18
|
+
} from './utils/audio/split.ts'
|
|
19
|
+
|
|
20
|
+
export {
|
|
21
|
+
mergeTranscriptionResults,
|
|
22
|
+
formatMergedText,
|
|
23
|
+
type MergedTranscriptionResult,
|
|
24
|
+
type WordData,
|
|
25
|
+
} from './utils/audio/merge-results.ts'
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @wovin/tranz - Audio transcription library
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// Transcription providers
|
|
6
|
+
export {
|
|
7
|
+
createProvider,
|
|
8
|
+
MistralProvider,
|
|
9
|
+
WhisperProvider,
|
|
10
|
+
GreenPTProvider,
|
|
11
|
+
VOXTRAL_LIMITS,
|
|
12
|
+
type ProviderName,
|
|
13
|
+
type TranscribeParams,
|
|
14
|
+
type TranscriptionResult,
|
|
15
|
+
type TranscriptionProvider,
|
|
16
|
+
type TranscriptSegment,
|
|
17
|
+
} from './utils/transcription/providers.ts'
|
|
18
|
+
|
|
19
|
+
// Audio utilities
|
|
20
|
+
export {
|
|
21
|
+
autoSplitAudio,
|
|
22
|
+
analyzeSplitPoints,
|
|
23
|
+
detectSilenceRegions,
|
|
24
|
+
getAudioDuration,
|
|
25
|
+
findOptimalSplitPoints,
|
|
26
|
+
splitAudioAtPoints,
|
|
27
|
+
DEFAULT_SPLIT_CONFIG,
|
|
28
|
+
type SplitConfig,
|
|
29
|
+
type SilenceRegion,
|
|
30
|
+
type SplitPoint,
|
|
31
|
+
type AudioSegment,
|
|
32
|
+
type SplitAnalysis,
|
|
33
|
+
} from './utils/audio/split.ts'
|
|
34
|
+
|
|
35
|
+
// Result merging
|
|
36
|
+
export {
|
|
37
|
+
mergeTranscriptionResults,
|
|
38
|
+
formatMergedText,
|
|
39
|
+
type MergedTranscriptionResult,
|
|
40
|
+
type WordData,
|
|
41
|
+
type AudioChunk,
|
|
42
|
+
} from './utils/audio/merge-results.ts'
|
|
43
|
+
|
|
44
|
+
// Transcription formatting
|
|
45
|
+
export {
|
|
46
|
+
formatTranscriptWithPauses,
|
|
47
|
+
formatTranscriptAsMarkdown,
|
|
48
|
+
type FormatMarkdownOptions,
|
|
49
|
+
} from './utils/transcription/format.ts'
|
|
50
|
+
|
|
51
|
+
// MIME type detection
|
|
52
|
+
export { detectAudioMimeType } from './utils/transcription/mime-detection.ts'
|
|
53
|
+
|
|
54
|
+
// Simple high-level API
|
|
55
|
+
export {
|
|
56
|
+
createMistralTranscriber,
|
|
57
|
+
transcribe,
|
|
58
|
+
type TranscribeOptions,
|
|
59
|
+
type MistralTranscriberConfig,
|
|
60
|
+
type MistralTranscriber,
|
|
61
|
+
} from './utils/transcription/transcribe.ts'
|
package/src/providers.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @wovin/tranz/providers - Transcription provider implementations
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export {
|
|
6
|
+
createProvider,
|
|
7
|
+
MistralProvider,
|
|
8
|
+
WhisperProvider,
|
|
9
|
+
GreenPTProvider,
|
|
10
|
+
VOXTRAL_LIMITS,
|
|
11
|
+
type ProviderName,
|
|
12
|
+
type TranscribeParams,
|
|
13
|
+
type TranscriptionResult,
|
|
14
|
+
type TranscriptionProvider,
|
|
15
|
+
type TranscriptSegment,
|
|
16
|
+
} from './utils/transcription/providers.ts'
|
|
17
|
+
|
|
18
|
+
export {
|
|
19
|
+
createMistralTranscriber,
|
|
20
|
+
transcribe,
|
|
21
|
+
type TranscribeOptions,
|
|
22
|
+
type MistralTranscriberConfig,
|
|
23
|
+
} from './utils/transcription/transcribe.ts'
|
package/src/realtime.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime transcription API
|
|
3
|
+
*
|
|
4
|
+
* This module provides a simple, event-driven interface for streaming audio
|
|
5
|
+
* transcription using Mistral's realtime WebSocket API.
|
|
6
|
+
*
|
|
7
|
+
* **Node.js only** - Browser support is currently disabled due to WebSocket
|
|
8
|
+
* authentication limitations with Mistral API.
|
|
9
|
+
*
|
|
10
|
+
* @example Node.js
|
|
11
|
+
* ```typescript
|
|
12
|
+
* import {
|
|
13
|
+
* createRealtimeTranscriber,
|
|
14
|
+
* captureAudioFromMicrophone,
|
|
15
|
+
* } from '@wovin/tranz/realtime'
|
|
16
|
+
*
|
|
17
|
+
* const transcriber = createRealtimeTranscriber({
|
|
18
|
+
* apiKey: process.env.MISTRAL_API_KEY,
|
|
19
|
+
* })
|
|
20
|
+
*
|
|
21
|
+
* const { stream, stop } = await captureAudioFromMicrophone(16000)
|
|
22
|
+
*
|
|
23
|
+
* try {
|
|
24
|
+
* for await (const event of transcriber.transcribe(stream)) {
|
|
25
|
+
* if (event.type === 'transcription.text.delta') {
|
|
26
|
+
* process.stdout.write(event.text)
|
|
27
|
+
* } else if (event.type === 'transcription.done') {
|
|
28
|
+
* console.log('\nComplete!')
|
|
29
|
+
* break
|
|
30
|
+
* }
|
|
31
|
+
* }
|
|
32
|
+
* } finally {
|
|
33
|
+
* stop()
|
|
34
|
+
* }
|
|
35
|
+
* ```
|
|
36
|
+
*
|
|
37
|
+
* @module @wovin/tranz/realtime
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
export {
|
|
41
|
+
createRealtimeTranscriber,
|
|
42
|
+
captureAudioFromMicrophone,
|
|
43
|
+
captureAudioFromBrowser,
|
|
44
|
+
AudioEncoding,
|
|
45
|
+
type RealtimeEvent,
|
|
46
|
+
type RealtimeConfig,
|
|
47
|
+
type RealtimeTranscriber,
|
|
48
|
+
type TranscribeOptions,
|
|
49
|
+
type AudioFormat,
|
|
50
|
+
type AudioCaptureResult,
|
|
51
|
+
type SessionCreatedEvent,
|
|
52
|
+
type SessionUpdatedEvent,
|
|
53
|
+
type TranscriptionTextDeltaEvent,
|
|
54
|
+
type TranscriptionLanguageEvent,
|
|
55
|
+
type TranscriptionSegmentEvent,
|
|
56
|
+
type TranscriptionDoneEvent,
|
|
57
|
+
type ErrorEvent,
|
|
58
|
+
} from "./utils/transcription/realtime.js";
|