@wovin/tranz 0.1.35 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -245,6 +245,16 @@ var MistralProvider = class {
245
245
  if (timestampGranularity) {
246
246
  formData.append("timestamp_granularities", timestampGranularity);
247
247
  }
248
+ if (params.contextBias && params.contextBias.length > 0) {
249
+ if (params.contextBias.length > VOXTRAL_LIMITS.maxContextBiasingTerms) {
250
+ throw new Error(
251
+ `contextBias has ${params.contextBias.length} terms; Voxtral limit is ${VOXTRAL_LIMITS.maxContextBiasingTerms}`
252
+ );
253
+ }
254
+ for (const term of params.contextBias) {
255
+ formData.append("context_bias[]", term);
256
+ }
257
+ }
248
258
  const response = await fetch("https://api.mistral.ai/v1/audio/transcriptions", {
249
259
  method: "POST",
250
260
  headers: {
@@ -260,14 +270,27 @@ var MistralProvider = class {
260
270
  if (!result?.text) {
261
271
  return { text: "", error: "No transcription returned", rawResponse: result };
262
272
  }
263
- const words = result.words || result.segments?.flatMap((seg) => seg.words || []);
273
+ const segments = Array.isArray(result.segments) && result.segments.length > 0 ? result.segments.map((seg) => ({
274
+ startMs: Math.round((seg.start ?? 0) * 1e3),
275
+ endMs: Math.round((seg.end ?? 0) * 1e3),
276
+ text: seg.text ?? "",
277
+ ...seg.speaker_id !== void 0 ? { diarization: seg.speaker_id } : {}
278
+ })) : void 0;
279
+ let words;
280
+ if (Array.isArray(result.words) && result.words.length > 0) {
281
+ words = result.words;
282
+ } else if (Array.isArray(result.segments)) {
283
+ const nested = result.segments.flatMap((seg) => seg.words ?? []);
284
+ if (nested.length > 0) words = nested;
285
+ }
264
286
  const duration = result.usage?.prompt_audio_seconds;
265
287
  return {
266
288
  text: result.text,
267
289
  language: result.language ?? params.language,
268
290
  model: result.model,
269
291
  duration,
270
- words,
292
+ ...words ? { words } : {},
293
+ ...segments ? { segments } : {},
271
294
  rawResponse: result
272
295
  };
273
296
  }
@@ -620,6 +643,10 @@ async function autoSplitAudio(audioPath, outputDir, config = {}) {
620
643
  }
621
644
 
622
645
  // src/utils/audio/merge-results.ts
646
+ function prefixChunkLabel(chunkIndex, value) {
647
+ if (value === void 0) return void 0;
648
+ return `chunk${chunkIndex}/${String(value)}`;
649
+ }
623
650
  function mergeTranscriptionResults(results, segments) {
624
651
  if (results.length === 0) {
625
652
  return {
@@ -628,10 +655,7 @@ function mergeTranscriptionResults(results, segments) {
628
655
  };
629
656
  }
630
657
  if (results.length === 1) {
631
- return {
632
- ...results[0],
633
- totalSegments: 1
634
- };
658
+ return results[0];
635
659
  }
636
660
  const errors = results.map((r, i) => r.error ? `Segment ${i}: ${r.error}` : null).filter(Boolean);
637
661
  if (errors.length > 0) {
@@ -652,12 +676,26 @@ function mergeTranscriptionResults(results, segments) {
652
676
  start: (word.start || 0) + segment.startSec,
653
677
  end: (word.end || 0) + segment.startSec,
654
678
  confidence: word.confidence,
655
- speaker: word.speaker
679
+ speaker: prefixChunkLabel(i, word.speaker)
680
+ });
681
+ }
682
+ }
683
+ const mergedSegments = [];
684
+ for (let i = 0; i < results.length; i++) {
685
+ const result = results[i];
686
+ const chunkOffsetMs = Math.round(segments[i].startSec * 1e3);
687
+ if (!result.segments) continue;
688
+ for (const seg of result.segments) {
689
+ mergedSegments.push({
690
+ startMs: seg.startMs + chunkOffsetMs,
691
+ endMs: seg.endMs + chunkOffsetMs,
692
+ text: seg.text,
693
+ ...seg.diarization !== void 0 ? { diarization: prefixChunkLabel(i, seg.diarization) } : {}
656
694
  });
657
695
  }
658
696
  }
659
697
  const totalDuration = segments.reduce((sum, seg) => sum + seg.durationSec, 0);
660
- const segmentMeta = results.map((r, i) => ({
698
+ const audioChunks = results.map((r, i) => ({
661
699
  index: i,
662
700
  startSec: segments[i].startSec,
663
701
  endSec: segments[i].endSec,
@@ -665,24 +703,23 @@ function mergeTranscriptionResults(results, segments) {
665
703
  }));
666
704
  const mergedRawResponse = {
667
705
  merged: true,
668
- segmentCount: results.length,
669
- segments: results.map((r, i) => ({
706
+ chunkCount: results.length,
707
+ chunks: results.map((r, i) => ({
670
708
  index: i,
671
709
  startSec: segments[i].startSec,
672
710
  rawResponse: r.rawResponse
673
- })),
674
- words: mergedWords
711
+ }))
675
712
  };
676
713
  const firstResult = results[0];
677
714
  return {
678
715
  text: mergedText,
679
- words: mergedWords,
680
716
  duration: totalDuration,
681
717
  language: firstResult.language,
682
718
  model: firstResult.model,
683
719
  rawResponse: mergedRawResponse,
684
- segments: segmentMeta,
685
- totalSegments: results.length
720
+ audioChunks,
721
+ ...mergedWords.length > 0 ? { words: mergedWords } : {},
722
+ ...mergedSegments.length > 0 ? { segments: mergedSegments } : {}
686
723
  };
687
724
  }
688
725
 
@@ -784,6 +821,7 @@ function createMistralTranscriber(config) {
784
821
  duration: knownDuration,
785
822
  language,
786
823
  model = defaultModel,
824
+ contextBias,
787
825
  diarize = true,
788
826
  timestamps = language ? void 0 : "segment",
789
827
  autoSplit,
@@ -803,9 +841,10 @@ function createMistralTranscriber(config) {
803
841
  model,
804
842
  language,
805
843
  diarize,
806
- timestampGranularity: timestamps
844
+ timestampGranularity: timestamps,
845
+ contextBias
807
846
  });
808
- return { ...result, totalSegments: 1 };
847
+ return result;
809
848
  }
810
849
  if (audioUrl) {
811
850
  if (autoSplit === false) {
@@ -816,9 +855,10 @@ function createMistralTranscriber(config) {
816
855
  model,
817
856
  language,
818
857
  diarize,
819
- timestampGranularity: timestamps
858
+ timestampGranularity: timestamps,
859
+ contextBias
820
860
  });
821
- return { ...result2, totalSegments: 1 };
861
+ return result2;
822
862
  }
823
863
  let duration2 = knownDuration;
824
864
  if (duration2 === void 0) {
@@ -840,9 +880,10 @@ function createMistralTranscriber(config) {
840
880
  model,
841
881
  language,
842
882
  diarize,
843
- timestampGranularity: timestamps
883
+ timestampGranularity: timestamps,
884
+ contextBias
844
885
  });
845
- return { ...result2, totalSegments: 1 };
886
+ return result2;
846
887
  }
847
888
  log.info(`Downloading URL to temp file for processing...`);
848
889
  const outDir2 = splitOutputDir || path3.join(os.tmpdir(), `tranz-${Date.now()}`);
@@ -855,6 +896,7 @@ function createMistralTranscriber(config) {
855
896
  model,
856
897
  diarize,
857
898
  timestamps,
899
+ contextBias,
858
900
  autoSplit: true,
859
901
  splitOutputDir: outDir2,
860
902
  logger: customLogger,
@@ -881,9 +923,10 @@ function createMistralTranscriber(config) {
881
923
  model,
882
924
  language,
883
925
  diarize,
884
- timestampGranularity: timestamps
926
+ timestampGranularity: timestamps,
927
+ contextBias
885
928
  });
886
- return { ...result, totalSegments: 1 };
929
+ return result;
887
930
  }
888
931
  log.info(`Duration ${duration.toFixed(1)}s > ${maxDuration}s, splitting audio...`);
889
932
  const outDir = splitOutputDir || path3.join(os.tmpdir(), `tranz-split-${Date.now()}`);
@@ -902,7 +945,8 @@ function createMistralTranscriber(config) {
902
945
  model,
903
946
  language,
904
947
  diarize,
905
- timestampGranularity: timestamps
948
+ timestampGranularity: timestamps,
949
+ contextBias
906
950
  });
907
951
  results.push(result);
908
952
  }
@@ -11,21 +11,23 @@ export interface WordData {
11
11
  start: number;
12
12
  end: number;
13
13
  confidence?: number;
14
- speaker?: string;
14
+ speaker?: string | number;
15
15
  }
16
16
  /**
17
- * Merged transcription result with segment information
17
+ * Metadata describing one audio chunk in an auto-split + merge run.
18
+ */
19
+ export interface AudioChunk {
20
+ index: number;
21
+ startSec: number;
22
+ endSec: number;
23
+ text: string;
24
+ }
25
+ /**
26
+ * Merged transcription result with chunk-level metadata.
18
27
  */
19
28
  export interface MergedTranscriptionResult extends TranscriptionResult {
20
- /** Segment metadata for reference */
21
- segments?: {
22
- index: number;
23
- startSec: number;
24
- endSec: number;
25
- text: string;
26
- }[];
27
- /** Total segments that were merged */
28
- totalSegments?: number;
29
+ /** Audio chunks that were transcribed independently and merged. Absent when no split happened. */
30
+ audioChunks?: AudioChunk[];
29
31
  }
30
32
  /**
31
33
  * Merge multiple transcription results from audio segments into one
@@ -40,7 +42,7 @@ export declare function mergeTranscriptionResults(results: TranscriptionResult[]
40
42
  * Format merged results with optional segment markers in the text
41
43
  *
42
44
  * @param result - Merged transcription result
43
- * @param includeMarkers - Whether to include [Segment N] markers
45
+ * @param includeMarkers - Whether to include [Chunk N] markers
44
46
  * @returns Formatted text
45
47
  */
46
48
  export declare function formatMergedText(result: MergedTranscriptionResult, includeMarkers?: boolean): string;
@@ -1 +1 @@
1
- {"version":3,"file":"merge-results.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/merge-results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAA;AACxE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,CAAA;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,mBAAmB;IACpE,qCAAqC;IACrC,QAAQ,CAAC,EAAE;QACT,KAAK,EAAE,MAAM,CAAA;QACb,QAAQ,EAAE,MAAM,CAAA;QAChB,MAAM,EAAE,MAAM,CAAA;QACd,IAAI,EAAE,MAAM,CAAA;KACb,EAAE,CAAA;IACH,sCAAsC;IACtC,aAAa,CAAC,EAAE,MAAM,CAAA;CACvB;AAED;;;;;;;GAOG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,mBAAmB,EAAE,EAC9B,QAAQ,EAAE,YAAY,EAAE,GACvB,yBAAyB,CAqF3B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,yBAAyB,EACjC,cAAc,GAAE,OAAe,GAC9B,MAAM,CAeR"}
1
+ {"version":3,"file":"merge-results.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/merge-results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAqB,MAAM,+BAA+B,CAAA;AAC3F,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,IAAI,EAAE,MAAM,CAAA;CACb;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,mBAAmB;IACpE,kGAAkG;IAClG,WAAW,CAAC,EAAE,UAAU,EAAE,CAAA;CAC3B;AAYD;;;;;;;GAOG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,mBAAmB,EAAE,EAC9B,QAAQ,EAAE,YAAY,EAAE,GACvB,yBAAyB,CAmG3B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,yBAAyB,EACjC,cAAc,GAAE,OAAe,GAC9B,MAAM,CAeR"}
@@ -11,4 +11,31 @@ export declare function formatTranscriptWithPauses(transcript: string, words: Ar
11
11
  end: number;
12
12
  confidence: number;
13
13
  }>, shortPauseThreshold?: number, longPauseThreshold?: number): string;
14
+ import type { TranscriptionResult } from './providers.ts';
15
+ import type { MergedTranscriptionResult } from '../audio/merge-results.ts';
16
+ export interface FormatMarkdownOptions {
17
+ /** Silence gap (seconds) that ends a paragraph. Default 1.5. */
18
+ gapSec?: number;
19
+ /** Include `· Speaker N` in each paragraph header when diarization labels are present. Default true. */
20
+ speakerLabel?: boolean;
21
+ /** Prepend an `# <source>` title + bulleted metadata block. Default false. */
22
+ includeHeader?: boolean;
23
+ /** Source filename to use in the `# ` title and `Source:` line (when includeHeader=true). */
24
+ source?: string;
25
+ /** Total audio duration in seconds — used for `Duration:` line and for picking mm:ss vs h:mm:ss formatting. */
26
+ durationSec?: number;
27
+ }
28
+ /**
29
+ * Format a transcription result as readable Markdown with timestamped paragraphs.
30
+ *
31
+ * Groups adjacent segments into paragraphs, starting a new paragraph on either
32
+ * a silence gap ≥ `gapSec` OR a change in diarization label. Each paragraph is
33
+ * preceded by `**[mm:ss · Speaker N]**` (or `**[h:mm:ss · Speaker N]**` for
34
+ * audio ≥ 1h). The speaker suffix is dropped when no diarization labels are
35
+ * present or all segments share the same label.
36
+ *
37
+ * If `segments` is missing/empty, falls back to emitting `result.text` as a
38
+ * single (un-timestamped) paragraph.
39
+ */
40
+ export declare function formatTranscriptAsMarkdown(result: TranscriptionResult | MergedTranscriptionResult, opts?: FormatMarkdownOptions): string;
14
41
  //# sourceMappingURL=format.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/format.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CACzC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9E,mBAAmB,SAAM,EACzB,kBAAkB,SAAM,GACtB,MAAM,CAgDR"}
1
+ {"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/format.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CACzC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9E,mBAAmB,SAAM,EACzB,kBAAkB,SAAM,GACtB,MAAM,CAgDR;AAED,OAAO,KAAK,EAAE,mBAAmB,EAAqB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAE1E,MAAM,WAAW,qBAAqB;IACrC,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,wGAAwG;IACxG,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB,8EAA8E;IAC9E,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,6FAA6F;IAC7F,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,+GAA+G;IAC/G,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAsBD;;;;;;;;;;;GAWG;AACH,wBAAgB,0BAA0B,CACzC,MAAM,EAAE,mBAAmB,GAAG,yBAAyB,EACvD,IAAI,GAAE,qBAA0B,GAC9B,MAAM,CAuFR"}
@@ -2,6 +2,26 @@
2
2
  * Transcription provider types and interfaces
3
3
  * Defines the contract for all transcription providers
4
4
  */
5
+ /**
6
+ * A single transcription segment (one diarized turn, or one segment-granularity unit).
7
+ *
8
+ * Timestamps are integer milliseconds — normalized at the SDK boundary so consumers
9
+ * can pass straight into the wovin annotation schema (see docs/annotation-schema.md).
10
+ *
11
+ * `diarization` is the anonymous, per-recording diarization label as returned by
12
+ * the provider (Mistral: `"speaker_1"`, Deepgram: `0`, AssemblyAI: `"A"`, …).
13
+ * It is NOT a real-world speaker identity — that's a separate (future) `speakerId` field.
14
+ *
15
+ * When `mergeTranscriptionResults` joins multiple chunks, `diarization` is rewritten
16
+ * as `` `chunk${index}/${value}` `` because per-chunk labels are not comparable
17
+ * across chunks.
18
+ */
19
+ export interface TranscriptSegment {
20
+ startMs: number;
21
+ endMs: number;
22
+ text: string;
23
+ diarization?: string | number;
24
+ }
5
25
  /**
6
26
  * Result object returned from transcription operations
7
27
  * Contains the transcribed text and optional provider-specific metadata
@@ -15,8 +35,10 @@ export interface TranscriptionResult {
15
35
  error?: string;
16
36
  /** Confidence score of the transcription (0-1) */
17
37
  confidence?: number;
18
- /** Word-level data (timing, confidence, etc.) */
38
+ /** Word-level data populated only when granularity='word' or the provider returns it. Left undefined otherwise (not `[]`). */
19
39
  words?: any[];
40
+ /** Segment-level data — populated when granularity='segment' (or the provider returns it). */
41
+ segments?: TranscriptSegment[];
20
42
  /** Duration of audio in seconds */
21
43
  duration?: number;
22
44
  /** Detected or specified language code */
@@ -63,6 +85,13 @@ export interface TranscribeParams {
63
85
  diarize?: boolean;
64
86
  /** Timestamp granularity for transcription (Mistral-specific) */
65
87
  timestampGranularity?: 'segment' | 'word';
88
+ /**
89
+ * Context biasing terms (Voxtral/Mistral-specific).
90
+ * Up to `VOXTRAL_LIMITS.maxContextBiasingTerms` (100) custom-vocabulary terms
91
+ * passed to the Voxtral transcribe endpoint as `context_bias[]` form fields.
92
+ * Ignored by non-Mistral providers.
93
+ */
94
+ contextBias?: string[];
66
95
  /** Path to model file (Whisper-specific) */
67
96
  modelPath?: string;
68
97
  /** Output directory for results (Whisper-specific) */
@@ -1 +1 @@
1
- {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAElD,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAwGzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
1
+ {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;;;;;;;;;;;GAaG;AACH,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;CAG9B;AAED;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,gIAAgI;IAChI,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;IAC9B,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAElD,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAuIzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
@@ -27,6 +27,11 @@ export interface TranscribeOptions {
27
27
  diarize?: boolean;
28
28
  /** Timestamp granularity: 'word' | 'segment' (default: 'segment' when diarize=true, disabled if language set) */
29
29
  timestamps?: 'word' | 'segment';
30
+ /**
31
+ * Context biasing terms — up to `VOXTRAL_LIMITS.maxContextBiasingTerms` (100)
32
+ * custom-vocabulary entries passed to Voxtral as `context_bias[]`. Mistral only.
33
+ */
34
+ contextBias?: string[];
30
35
  /** Auto-split long audio (default: true). For URLs, detects duration first. */
31
36
  autoSplit?: boolean;
32
37
  /** Output directory for split segments (default: system temp) */
@@ -1 +1 @@
1
- {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iHAAiH;IACjH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AA6FD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,CA4K7F;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
1
+ {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iHAAiH;IACjH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AA6FD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,CAmL7F;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wovin/tranz",
3
- "version": "0.1.35",
3
+ "version": "0.2.0",
4
4
  "type": "module",
5
5
  "description": "Audio transcription library with provider support and auto-splitting",
6
6
  "author": "gotjoshua @gotjoshua",
@@ -12,29 +12,30 @@
12
12
  "directory": "packages/@wovin/tranz"
13
13
  },
14
14
  "bugs": "https://gitlab.com/onezoomin/ztax/tranz/-/issues",
15
- "main": "./dist/index.min.js",
16
- "module": "./dist/index.min.js",
15
+ "main": "./dist/index.js",
16
+ "module": "./dist/index.js",
17
17
  "types": "./dist/index.d.ts",
18
18
  "exports": {
19
19
  ".": {
20
- "import": "./dist/index.min.js",
20
+ "import": "./dist/index.js",
21
21
  "types": "./dist/index.d.ts"
22
22
  },
23
23
  "./providers": {
24
- "import": "./dist/providers.min.js",
24
+ "import": "./dist/providers.js",
25
25
  "types": "./dist/providers.d.ts"
26
26
  },
27
27
  "./audio": {
28
- "import": "./dist/audio.min.js",
28
+ "import": "./dist/audio.js",
29
29
  "types": "./dist/audio.d.ts"
30
30
  },
31
31
  "./realtime": {
32
- "import": "./dist/realtime.min.js",
32
+ "import": "./dist/realtime.js",
33
33
  "types": "./dist/realtime.d.ts"
34
34
  }
35
35
  },
36
36
  "files": [
37
- "./dist/"
37
+ "./dist/",
38
+ "./src/"
38
39
  ],
39
40
  "publishConfig": {
40
41
  "access": "public"
@@ -72,6 +73,7 @@
72
73
  "dev:code": "tsup --watch",
73
74
  "dev:types": "tsc --emitDeclarationOnly --declaration --watch",
74
75
  "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
76
+ "test": "tsx --test test/*.test.ts",
75
77
  "test:realtime": "tsx test/realtime-transcription.ts",
76
78
  "test:realtime-api": "tsx test/realtime-api-test.ts"
77
79
  }
package/src/audio.ts ADDED
@@ -0,0 +1,25 @@
1
+ /**
2
+ * @wovin/tranz/audio - Audio utilities for splitting and merging
3
+ */
4
+
5
+ export {
6
+ autoSplitAudio,
7
+ analyzeSplitPoints,
8
+ detectSilenceRegions,
9
+ getAudioDuration,
10
+ findOptimalSplitPoints,
11
+ splitAudioAtPoints,
12
+ DEFAULT_SPLIT_CONFIG,
13
+ type SplitConfig,
14
+ type SilenceRegion,
15
+ type SplitPoint,
16
+ type AudioSegment,
17
+ type SplitAnalysis,
18
+ } from './utils/audio/split.ts'
19
+
20
+ export {
21
+ mergeTranscriptionResults,
22
+ formatMergedText,
23
+ type MergedTranscriptionResult,
24
+ type WordData,
25
+ } from './utils/audio/merge-results.ts'
package/src/index.ts ADDED
@@ -0,0 +1,61 @@
1
+ /**
2
+ * @wovin/tranz - Audio transcription library
3
+ */
4
+
5
+ // Transcription providers
6
+ export {
7
+ createProvider,
8
+ MistralProvider,
9
+ WhisperProvider,
10
+ GreenPTProvider,
11
+ VOXTRAL_LIMITS,
12
+ type ProviderName,
13
+ type TranscribeParams,
14
+ type TranscriptionResult,
15
+ type TranscriptionProvider,
16
+ type TranscriptSegment,
17
+ } from './utils/transcription/providers.ts'
18
+
19
+ // Audio utilities
20
+ export {
21
+ autoSplitAudio,
22
+ analyzeSplitPoints,
23
+ detectSilenceRegions,
24
+ getAudioDuration,
25
+ findOptimalSplitPoints,
26
+ splitAudioAtPoints,
27
+ DEFAULT_SPLIT_CONFIG,
28
+ type SplitConfig,
29
+ type SilenceRegion,
30
+ type SplitPoint,
31
+ type AudioSegment,
32
+ type SplitAnalysis,
33
+ } from './utils/audio/split.ts'
34
+
35
+ // Result merging
36
+ export {
37
+ mergeTranscriptionResults,
38
+ formatMergedText,
39
+ type MergedTranscriptionResult,
40
+ type WordData,
41
+ type AudioChunk,
42
+ } from './utils/audio/merge-results.ts'
43
+
44
+ // Transcription formatting
45
+ export {
46
+ formatTranscriptWithPauses,
47
+ formatTranscriptAsMarkdown,
48
+ type FormatMarkdownOptions,
49
+ } from './utils/transcription/format.ts'
50
+
51
+ // MIME type detection
52
+ export { detectAudioMimeType } from './utils/transcription/mime-detection.ts'
53
+
54
+ // Simple high-level API
55
+ export {
56
+ createMistralTranscriber,
57
+ transcribe,
58
+ type TranscribeOptions,
59
+ type MistralTranscriberConfig,
60
+ type MistralTranscriber,
61
+ } from './utils/transcription/transcribe.ts'
@@ -0,0 +1,23 @@
1
+ /**
2
+ * @wovin/tranz/providers - Transcription provider implementations
3
+ */
4
+
5
+ export {
6
+ createProvider,
7
+ MistralProvider,
8
+ WhisperProvider,
9
+ GreenPTProvider,
10
+ VOXTRAL_LIMITS,
11
+ type ProviderName,
12
+ type TranscribeParams,
13
+ type TranscriptionResult,
14
+ type TranscriptionProvider,
15
+ type TranscriptSegment,
16
+ } from './utils/transcription/providers.ts'
17
+
18
+ export {
19
+ createMistralTranscriber,
20
+ transcribe,
21
+ type TranscribeOptions,
22
+ type MistralTranscriberConfig,
23
+ } from './utils/transcription/transcribe.ts'
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Realtime transcription API
3
+ *
4
+ * This module provides a simple, event-driven interface for streaming audio
5
+ * transcription using Mistral's realtime WebSocket API.
6
+ *
7
+ * **Node.js only** - Browser support is currently disabled due to WebSocket
8
+ * authentication limitations with Mistral API.
9
+ *
10
+ * @example Node.js
11
+ * ```typescript
12
+ * import {
13
+ * createRealtimeTranscriber,
14
+ * captureAudioFromMicrophone,
15
+ * } from '@wovin/tranz/realtime'
16
+ *
17
+ * const transcriber = createRealtimeTranscriber({
18
+ * apiKey: process.env.MISTRAL_API_KEY,
19
+ * })
20
+ *
21
+ * const { stream, stop } = await captureAudioFromMicrophone(16000)
22
+ *
23
+ * try {
24
+ * for await (const event of transcriber.transcribe(stream)) {
25
+ * if (event.type === 'transcription.text.delta') {
26
+ * process.stdout.write(event.text)
27
+ * } else if (event.type === 'transcription.done') {
28
+ * console.log('\nComplete!')
29
+ * break
30
+ * }
31
+ * }
32
+ * } finally {
33
+ * stop()
34
+ * }
35
+ * ```
36
+ *
37
+ * @module @wovin/tranz/realtime
38
+ */
39
+
40
+ export {
41
+ createRealtimeTranscriber,
42
+ captureAudioFromMicrophone,
43
+ captureAudioFromBrowser,
44
+ AudioEncoding,
45
+ type RealtimeEvent,
46
+ type RealtimeConfig,
47
+ type RealtimeTranscriber,
48
+ type TranscribeOptions,
49
+ type AudioFormat,
50
+ type AudioCaptureResult,
51
+ type SessionCreatedEvent,
52
+ type SessionUpdatedEvent,
53
+ type TranscriptionTextDeltaEvent,
54
+ type TranscriptionLanguageEvent,
55
+ type TranscriptionSegmentEvent,
56
+ type TranscriptionDoneEvent,
57
+ type ErrorEvent,
58
+ } from "./utils/transcription/realtime.js";
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Audio utilities for tranz-cli
3
+ */
4
+
5
+ export * from './split.ts'
6
+ export * from './merge-results.ts'