npm - @wovin/tranz - Versions diffs - 0.0.26 - Mend

@wovin/tranz 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/LICENSE +661 -0
package/dist/audio.d.ts +6 -0
package/dist/audio.d.ts.map +1 -0
package/dist/audio.min.js +302 -0
package/dist/index.d.ts +9 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.min.js +769 -0
package/dist/providers.d.ts +6 -0
package/dist/providers.d.ts.map +1 -0
package/dist/providers.min.js +681 -0
package/dist/utils/audio/index.d.ts +6 -0
package/dist/utils/audio/index.d.ts.map +1 -0
package/dist/utils/audio/merge-results.d.ts +47 -0
package/dist/utils/audio/merge-results.d.ts.map +1 -0
package/dist/utils/audio/split.d.ts +106 -0
package/dist/utils/audio/split.d.ts.map +1 -0
package/dist/utils/file-utils.d.ts +6 -0
package/dist/utils/file-utils.d.ts.map +1 -0
package/dist/utils/transcription/format.d.ts +14 -0
package/dist/utils/transcription/format.d.ts.map +1 -0
package/dist/utils/transcription/mime-detection.d.ts +25 -0
package/dist/utils/transcription/mime-detection.d.ts.map +1 -0
package/dist/utils/transcription/providers.d.ts +146 -0
package/dist/utils/transcription/providers.d.ts.map +1 -0
package/dist/utils/transcription/transcribe.d.ts +59 -0
package/dist/utils/transcription/transcribe.d.ts.map +1 -0
package/package.json +62 -0

package/dist/audio.min.js ADDED Viewed

@@ -0,0 +1,302 @@
+// src/utils/audio/split.ts
+import ffmpeg from "fluent-ffmpeg";
+import * as fs from "fs";
+import path from "path";
+import { spawn } from "child_process";
+var DEFAULT_SPLIT_CONFIG = {
+  maxDurationSec: 600,
+  // 10 minutes
+  minSilenceDurSec: 1,
+  silenceThreshold: "-35dB",
+  preferLongerSilence: true,
+  silenceBuffer: 0.2
+};
+async function getAudioDuration(audioPath) {
+  return new Promise((resolve, reject) => {
+    ffmpeg.ffprobe(audioPath, (err, metadata) => {
+      if (err) {
+        reject(new Error(`Failed to probe audio: ${err.message}`));
+        return;
+      }
+      const duration = metadata.format.duration;
+      if (typeof duration !== "number") {
+        reject(new Error("Could not determine audio duration"));
+        return;
+      }
+      resolve(duration);
+    });
+  });
+}
+async function detectSilenceRegions(audioPath, config = {}) {
+  const { minSilenceDurSec, silenceThreshold } = { ...DEFAULT_SPLIT_CONFIG, ...config };
+  return new Promise((resolve, reject) => {
+    const silenceRegions = [];
+    const args = [
+      "-i",
+      audioPath,
+      "-af",
+      `silencedetect=n=${silenceThreshold}:d=${minSilenceDurSec}`,
+      "-f",
+      "wav",
+      "-ac",
+      "1",
+      "-ar",
+      "8000",
+      "pipe:1"
+    ];
+    const proc = spawn("ffmpeg", args);
+    proc.stdout.on("data", () => {
+    });
+    proc.stderr.on("data", (data) => {
+      const lines = data.toString().split("\n");
+      for (const line of lines) {
+        if (line.includes("silence_end:")) {
+          const match = line.match(/silence_end:\s*([\d.]+)\s*\|\s*silence_duration:\s*([\d.]+)/);
+          if (match) {
+            const endSec = parseFloat(match[1]);
+            const durationSec = parseFloat(match[2]);
+            if (!isNaN(endSec) && !isNaN(durationSec)) {
+              silenceRegions.push({
+                startSec: endSec - durationSec,
+                endSec,
+                durationSec
+              });
+            }
+          }
+        }
+      }
+    });
+    proc.on("close", (code) => {
+      if (code === 0 || silenceRegions.length > 0) {
+        resolve(silenceRegions);
+      } else {
+        reject(new Error(`FFmpeg exited with code ${code}`));
+      }
+    });
+    proc.on("error", (err) => {
+      reject(new Error(`Silence detection failed: ${err.message}`));
+    });
+  });
+}
+function findOptimalSplitPoints(silenceRegions, totalDuration, config = {}) {
+  const { maxDurationSec, preferLongerSilence, silenceBuffer } = {
+    ...DEFAULT_SPLIT_CONFIG,
+    ...config
+  };
+  if (totalDuration <= maxDurationSec) {
+    return [];
+  }
+  const numSegments = Math.ceil(totalDuration / maxDurationSec);
+  const idealSegmentDuration = totalDuration / numSegments;
+  const splitPoints = [];
+  for (let i = 1; i < numSegments; i++) {
+    const idealSplitTime = idealSegmentDuration * i;
+    const windowSize = idealSegmentDuration * 0.3;
+    const windowStart = idealSplitTime - windowSize;
+    const windowEnd = idealSplitTime + windowSize;
+    const candidateSilences = silenceRegions.filter((silence) => {
+      const silenceMid = (silence.startSec + silence.endSec) / 2;
+      return silenceMid >= windowStart && silenceMid <= windowEnd;
+    });
+    let bestSplitPoint;
+    if (candidateSilences.length > 0) {
+      let bestScore = -Infinity;
+      let bestSilence = candidateSilences[0];
+      for (const silence of candidateSilences) {
+        const silenceMid = (silence.startSec + silence.endSec) / 2;
+        const proximityScore = 1 - Math.abs(silenceMid - idealSplitTime) / windowSize;
+        const score = preferLongerSilence ? silence.durationSec * proximityScore : proximityScore;
+        if (score > bestScore) {
+          bestScore = score;
+          bestSilence = silence;
+        }
+      }
+      bestSplitPoint = {
+        timeSec: (bestSilence.startSec + bestSilence.endSec) / 2,
+        silenceDuration: bestSilence.durationSec
+      };
+    } else {
+      bestSplitPoint = {
+        timeSec: idealSplitTime,
+        silenceDuration: 0
+      };
+    }
+    splitPoints.push(bestSplitPoint);
+  }
+  return splitPoints.sort((a, b) => a.timeSec - b.timeSec);
+}
+async function splitAudioAtPoints(audioPath, splitPoints, totalDuration, outputDir, baseName) {
+  fs.mkdirSync(outputDir, { recursive: true });
+  const segments = [];
+  const boundaries = [0, ...splitPoints.map((sp) => sp.timeSec), totalDuration];
+  const splitPromises = [];
+  for (let i = 0; i < boundaries.length - 1; i++) {
+    const startSec = boundaries[i];
+    const endSec = boundaries[i + 1];
+    const durationSec = endSec - startSec;
+    const outputPath = path.join(outputDir, `${baseName}-segment-${i.toString().padStart(3, "0")}.wav`);
+    const segment = {
+      index: i,
+      startSec,
+      endSec,
+      durationSec,
+      outputPath
+    };
+    segments.push(segment);
+    const extractPromise = new Promise((resolve, reject) => {
+      ffmpeg(audioPath).setStartTime(startSec).setDuration(durationSec).audioFrequency(16e3).outputOptions(["-ac 1", "-c:a pcm_s16le"]).output(outputPath).on("error", (err) => reject(new Error(`Failed to extract segment ${i}: ${err.message}`))).on("end", () => resolve()).run();
+    });
+    splitPromises.push(extractPromise);
+  }
+  await Promise.all(splitPromises);
+  return segments;
+}
+async function autoSplitAudio(audioPath, outputDir, config = {}) {
+  const mergedConfig = { ...DEFAULT_SPLIT_CONFIG, ...config };
+  const totalDuration = await getAudioDuration(audioPath);
+  if (totalDuration <= mergedConfig.maxDurationSec) {
+    return [
+      {
+        index: 0,
+        startSec: 0,
+        endSec: totalDuration,
+        durationSec: totalDuration,
+        outputPath: audioPath
+      }
+    ];
+  }
+  const silenceRegions = await detectSilenceRegions(audioPath, mergedConfig);
+  const splitPoints = findOptimalSplitPoints(silenceRegions, totalDuration, mergedConfig);
+  const baseName = path.basename(audioPath, path.extname(audioPath));
+  const segments = await splitAudioAtPoints(
+    audioPath,
+    splitPoints,
+    totalDuration,
+    outputDir,
+    baseName
+  );
+  return segments;
+}
+async function analyzeSplitPoints(audioPath, config = {}) {
+  const mergedConfig = { ...DEFAULT_SPLIT_CONFIG, ...config };
+  const totalDuration = await getAudioDuration(audioPath);
+  const needsSplit = totalDuration > mergedConfig.maxDurationSec;
+  if (!needsSplit) {
+    return {
+      totalDuration,
+      numSegments: 1,
+      splitPoints: [],
+      silenceRegions: [],
+      needsSplit: false
+    };
+  }
+  const silenceRegions = await detectSilenceRegions(audioPath, mergedConfig);
+  const splitPoints = findOptimalSplitPoints(silenceRegions, totalDuration, mergedConfig);
+  return {
+    totalDuration,
+    numSegments: splitPoints.length + 1,
+    splitPoints,
+    silenceRegions,
+    needsSplit: true
+  };
+}
+// src/utils/audio/merge-results.ts
+function mergeTranscriptionResults(results, segments) {
+  if (results.length === 0) {
+    return {
+      text: "",
+      error: "No results to merge"
+    };
+  }
+  if (results.length === 1) {
+    return {
+      ...results[0],
+      totalSegments: 1
+    };
+  }
+  const errors = results.map((r, i) => r.error ? `Segment ${i}: ${r.error}` : null).filter(Boolean);
+  if (errors.length > 0) {
+    return {
+      text: "",
+      error: `Errors in segments: ${errors.join("; ")}`
+    };
+  }
+  const mergedText = results.map((r) => r.text.trim()).join("\n\n");
+  const mergedWords = [];
+  for (let i = 0; i < results.length; i++) {
+    const result = results[i];
+    const segment = segments[i];
+    const words = result.words || result.rawResponse?.words || [];
+    for (const word of words) {
+      mergedWords.push({
+        word: word.word || word.text,
+        start: (word.start || 0) + segment.startSec,
+        end: (word.end || 0) + segment.startSec,
+        confidence: word.confidence,
+        speaker: word.speaker
+      });
+    }
+  }
+  const totalDuration = segments.reduce((sum, seg) => sum + seg.durationSec, 0);
+  const segmentMeta = results.map((r, i) => ({
+    index: i,
+    startSec: segments[i].startSec,
+    endSec: segments[i].endSec,
+    text: r.text.trim()
+  }));
+  const mergedRawResponse = {
+    merged: true,
+    segmentCount: results.length,
+    segments: results.map((r, i) => ({
+      index: i,
+      startSec: segments[i].startSec,
+      rawResponse: r.rawResponse
+    })),
+    words: mergedWords
+  };
+  const firstResult = results[0];
+  return {
+    text: mergedText,
+    words: mergedWords,
+    duration: totalDuration,
+    language: firstResult.language,
+    model: firstResult.model,
+    rawResponse: mergedRawResponse,
+    segments: segmentMeta,
+    totalSegments: results.length
+  };
+}
+function formatMergedText(result, includeMarkers = false) {
+  if (!result.segments || result.segments.length <= 1) {
+    return result.text;
+  }
+  if (!includeMarkers) {
+    return result.text;
+  }
+  return result.segments.map((seg, i) => {
+    const timeStr = formatTimestamp(seg.startSec);
+    return `[Segment ${i + 1} @ ${timeStr}]
+${seg.text}`;
+  }).join("\n\n");
+}
+function formatTimestamp(seconds) {
+  const hours = Math.floor(seconds / 3600);
+  const minutes = Math.floor(seconds % 3600 / 60);
+  const secs = Math.floor(seconds % 60);
+  if (hours > 0) {
+    return `${hours}:${minutes.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
+  }
+  return `${minutes}:${secs.toString().padStart(2, "0")}`;
+}
+export {
+  DEFAULT_SPLIT_CONFIG,
+  analyzeSplitPoints,
+  autoSplitAudio,
+  detectSilenceRegions,
+  findOptimalSplitPoints,
+  formatMergedText,
+  getAudioDuration,
+  mergeTranscriptionResults,
+  splitAudioAtPoints
+};

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * @wovin/tranz - Audio transcription library
+ */
+export { createProvider, MistralProvider, WhisperProvider, GreenPTProvider, VOXTRAL_LIMITS, type ProviderName, type TranscribeParams, type TranscriptionResult, type TranscriptionProvider, } from './utils/transcription/providers.js';
+export { autoSplitAudio, analyzeSplitPoints, detectSilenceRegions, getAudioDuration, findOptimalSplitPoints, splitAudioAtPoints, DEFAULT_SPLIT_CONFIG, type SplitConfig, type SilenceRegion, type SplitPoint, type AudioSegment, type SplitAnalysis, } from './utils/audio/split.js';
+export { mergeTranscriptionResults, formatMergedText, type MergedTranscriptionResult, type WordData, } from './utils/audio/merge-results.js';
+export { formatTranscriptWithPauses } from './utils/transcription/format.js';
+export { createMistralTranscriber, transcribe, type TranscribeOptions, type MistralTranscriberConfig, } from './utils/transcription/transcribe.js';
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EACL,cAAc,EACd,eAAe,EACf,eAAe,EACf,eAAe,EACf,cAAc,EACd,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,qBAAqB,GAC3B,MAAM,oCAAoC,CAAA;AAG3C,OAAO,EACL,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,wBAAwB,CAAA;AAG/B,OAAO,EACL,yBAAyB,EACzB,gBAAgB,EAChB,KAAK,yBAAyB,EAC9B,KAAK,QAAQ,GACd,MAAM,gCAAgC,CAAA;AAGvC,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAA;AAG5E,OAAO,EACL,wBAAwB,EACxB,UAAU,EACV,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,GAC9B,MAAM,qCAAqC,CAAA"}