npm - @storyteller-platform/ghost-story - Versions diffs - 0.1.10 → 0.1.11 - Mend

@storyteller-platform/ghost-story 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.cjs +7 -0
package/dist/index.d.cts +1 -0
package/dist/index.d.ts +1 -0
package/dist/index.js +8 -0
package/dist/recognition/WhisperServerSTT.cjs +46 -4
package/dist/recognition/WhisperServerSTT.js +47 -4
package/dist/utilities/WhisperTimeline.cjs +22 -8
package/dist/utilities/WhisperTimeline.js +22 -8
package/package.json +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -72,6 +72,9 @@ __export(index_exports, {
   serviceCapabilities: () => import_audio.serviceCapabilities,
   setConversionMode: () => import_config2.setConversionMode,
   setTimingEnabled: () => import_config2.setTimingEnabled,
+  spacelessScriptPattern: () => import_SpacelessScripts.spacelessScriptPattern,
+  spacelessScripts: () => import_SpacelessScripts.spacelessScripts,
+  startsWithSpacelessScript: () => import_SpacelessScripts.startsWithSpacelessScript,
   toBuffer: () => import_audio.toBuffer,
   toFilePath: () => import_audio.toFilePath,
   toReadStream: () => import_audio.toReadStream,
@@ -87,6 +90,7 @@ var import_config2 = require("./config.cjs");
 var import_OpenAICloudSTT = require("./recognition/OpenAICloudSTT.cjs");
 var import_WhisperCppSTT = require("./recognition/WhisperCppSTT.cjs");
 var import_WhisperServerSTT = require("./recognition/WhisperServerSTT.cjs");
+var import_SpacelessScripts = require("./utilities/SpacelessScripts.cjs");
 var import_Timing = require("./utilities/Timing.cjs");
 var import_Silero = require("./vad/Silero.cjs");
 // Annotate the CommonJS export names for ESM import in node:
@@ -145,6 +149,9 @@ var import_Silero = require("./vad/Silero.cjs");
   serviceCapabilities,
   setConversionMode,
   setTimingEnabled,
+  spacelessScriptPattern,
+  spacelessScripts,
+  startsWithSpacelessScript,
   toBuffer,
   toFilePath,
   toReadStream,

package/dist/index.d.cts CHANGED Viewed

@@ -9,6 +9,7 @@ export { OpenAICloudSTTOptions, RecognitionResult as OpenAIResult, inputPreferen
 export { WhisperCppModelId, WhisperCppOptions, RecognitionResult as WhisperCppResult, recognize as recognizeWhisperCpp, inputPreference as whisperCppInputPreference } from './recognition/WhisperCppSTT.cjs';
 export { WhisperServerOptions, RecognitionResult as WhisperServerResult, recognize as recognizeWhisperServer, inputPreference as whisperServerInputPreference } from './recognition/WhisperServerSTT.cjs';
 export { Timeline, TimelineEntry, TimelineEntryType } from './utilities/Timeline.cjs';
+export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript } from './utilities/SpacelessScripts.cjs';
 export { AggregatedStats, PhaseTiming, Timing, TimingAggregator, TimingSpan, TimingSummary, createAggregator, createTiming, formatDuration, formatPercentage, formatSingleReport, printSingleReport } from './utilities/Timing.cjs';
 export { SileroOptions, VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline } from './vad/Silero.cjs';
 import 'node:fs';

package/dist/index.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export { OpenAICloudSTTOptions, RecognitionResult as OpenAIResult, inputPreferen
 export { WhisperCppModelId, WhisperCppOptions, RecognitionResult as WhisperCppResult, recognize as recognizeWhisperCpp, inputPreference as whisperCppInputPreference } from './recognition/WhisperCppSTT.js';
 export { WhisperServerOptions, RecognitionResult as WhisperServerResult, recognize as recognizeWhisperServer, inputPreference as whisperServerInputPreference } from './recognition/WhisperServerSTT.js';
 export { Timeline, TimelineEntry, TimelineEntryType } from './utilities/Timeline.js';
+export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript } from './utilities/SpacelessScripts.js';
 export { AggregatedStats, PhaseTiming, Timing, TimingAggregator, TimingSpan, TimingSummary, createAggregator, createTiming, formatDuration, formatPercentage, formatSingleReport, printSingleReport } from './utilities/Timing.js';
 export { SileroOptions, VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline } from './vad/Silero.js';
 import 'node:fs';

package/dist/index.js CHANGED Viewed

@@ -62,6 +62,11 @@ import {
   inputPreference as inputPreference3,
   recognize as recognize4
 } from "./recognition/WhisperServerSTT.js";
+import {
+  spacelessScriptPattern,
+  spacelessScripts,
+  startsWithSpacelessScript
+} from "./utilities/SpacelessScripts.js";
 import {
   Timing,
   TimingAggregator,
@@ -132,6 +137,9 @@ export {
   serviceCapabilities,
   setConversionMode,
   setTimingEnabled,
+  spacelessScriptPattern,
+  spacelessScripts,
+  startsWithSpacelessScript,
   toBuffer,
   toFilePath,
   toReadStream,

package/dist/recognition/WhisperServerSTT.cjs CHANGED Viewed

@@ -59,11 +59,49 @@ async function recognize(input, languageCode, timing, options) {
     if (languageCode) {
       form.append("language", languageCode);
     }
-    const url = `${opts.baseURL}${opts.inferencePath}`;
+    const baseUrl = opts.baseURL.replace(/\/+$/g, "");
+    const url = `${baseUrl}${opts.inferencePath}`;
     const headers = {};
     if (opts.apiKey) {
       headers["Authorization"] = `Bearer ${opts.apiKey}`;
     }
+    const configResponse = await fetch(`${baseUrl}/config`, {
+      headers,
+      dispatcher: (0, import_fetch.createTimeoutAgent)(opts.timeout)
+    });
+    let whisperConfig = null;
+    if (configResponse.ok) {
+      try {
+        const [config, audioLength] = await Promise.all([
+          configResponse.json(),
+          (0, import_audio.getAudioDuration)(filePath)
+        ]);
+        whisperConfig = {
+          ...config,
+          audioDuration: audioLength
+        };
+        const effectiveProcessors = (0, import_WhisperTimeline.calculateEffectiveProcessors)(
+          audioLength,
+          whisperConfig.maxProcessors
+        );
+        if (effectiveProcessors !== whisperConfig.processors) {
+          const configForm = new FormData();
+          configForm.append("processors", String(effectiveProcessors));
+          configForm.append("threads", String(whisperConfig.threads));
+          await fetch(`${baseUrl}/config`, {
+            method: "POST",
+            headers,
+            body: configForm
+          });
+          whisperConfig.processors = effectiveProcessors;
+        }
+      } catch (e) {
+        console.warn(
+          `Failed to get config from Whisper server, continuing with default config. If you aren't using ghost-story server, this is expected`,
+          e
+        );
+      }
+    }
     const response = await timing.timeAsync(
       "upload",
       async () => fetch(url, {
@@ -81,7 +119,8 @@ async function recognize(input, languageCode, timing, options) {
     const data = await response.json();
     const { timeline, transcript } = await extractTimelineAndTranscript(
       data,
-      filePath
+      filePath,
+      whisperConfig
     );
     if (!timeline) {
       throw new Error(
@@ -93,7 +132,7 @@ async function recognize(input, languageCode, timing, options) {
     await prepared.cleanup();
   }
 }
-async function extractTimelineAndTranscript(response, audioPath) {
+async function extractTimelineAndTranscript(response, audioPath, whisperConfig) {
   var _a, _b, _c, _d;
   if (response.segments.length === 0) {
     return { timeline: [], transcript: ((_a = response.text) == null ? void 0 : _a.trim()) ?? "" };
@@ -101,7 +140,10 @@ async function extractTimelineAndTranscript(response, audioPath) {
   const hasNestedWords = (((_c = (_b = response.segments[0]) == null ? void 0 : _b.words) == null ? void 0 : _c.length) ?? 0) > 0;
   if (hasNestedWords) {
     const rawSegments = (0, import_WhisperTimeline.parseWhisperServerOutput)(response.segments);
-    const splitBoundaries = await detectSplitBoundaries(rawSegments, audioPath);
+    const splitBoundaries = (whisperConfig == null ? void 0 : whisperConfig.audioDuration) ? (0, import_WhisperTimeline.calculateWhisperSplits)(
+      whisperConfig.audioDuration,
+      whisperConfig.processors
+    ) : await detectSplitBoundaries(rawSegments, audioPath);
     const timeline2 = (0, import_WhisperTimeline.extractCorrectedTimeline)(rawSegments, { splitBoundaries });
     const transcript = timeline2.map((entry) => entry.text).join(" ");
     return { timeline: timeline2, transcript };

package/dist/recognition/WhisperServerSTT.js CHANGED Viewed

@@ -9,6 +9,7 @@ import {
 } from "../audio/index.js";
 import { createTimeoutAgent } from "../fetch.js";
 import {
+  calculateEffectiveProcessors,
   calculateWhisperSplits,
   countProcessorBoundaries,
   extractCorrectedTimeline,
@@ -46,11 +47,49 @@ async function recognize(input, languageCode, timing, options) {
     if (languageCode) {
       form.append("language", languageCode);
     }
-    const url = `${opts.baseURL}${opts.inferencePath}`;
+    const baseUrl = opts.baseURL.replace(/\/+$/g, "");
+    const url = `${baseUrl}${opts.inferencePath}`;
     const headers = {};
     if (opts.apiKey) {
       headers["Authorization"] = `Bearer ${opts.apiKey}`;
     }
+    const configResponse = await fetch(`${baseUrl}/config`, {
+      headers,
+      dispatcher: createTimeoutAgent(opts.timeout)
+    });
+    let whisperConfig = null;
+    if (configResponse.ok) {
+      try {
+        const [config, audioLength] = await Promise.all([
+          configResponse.json(),
+          getAudioDuration(filePath)
+        ]);
+        whisperConfig = {
+          ...config,
+          audioDuration: audioLength
+        };
+        const effectiveProcessors = calculateEffectiveProcessors(
+          audioLength,
+          whisperConfig.maxProcessors
+        );
+        if (effectiveProcessors !== whisperConfig.processors) {
+          const configForm = new FormData();
+          configForm.append("processors", String(effectiveProcessors));
+          configForm.append("threads", String(whisperConfig.threads));
+          await fetch(`${baseUrl}/config`, {
+            method: "POST",
+            headers,
+            body: configForm
+          });
+          whisperConfig.processors = effectiveProcessors;
+        }
+      } catch (e) {
+        console.warn(
+          `Failed to get config from Whisper server, continuing with default config. If you aren't using ghost-story server, this is expected`,
+          e
+        );
+      }
+    }
     const response = await timing.timeAsync(
       "upload",
       async () => fetch(url, {
@@ -68,7 +107,8 @@ async function recognize(input, languageCode, timing, options) {
     const data = await response.json();
     const { timeline, transcript } = await extractTimelineAndTranscript(
       data,
-      filePath
+      filePath,
+      whisperConfig
     );
     if (!timeline) {
       throw new Error(
@@ -80,7 +120,7 @@ async function recognize(input, languageCode, timing, options) {
     await prepared.cleanup();
   }
 }
-async function extractTimelineAndTranscript(response, audioPath) {
+async function extractTimelineAndTranscript(response, audioPath, whisperConfig) {
   var _a, _b, _c, _d;
   if (response.segments.length === 0) {
     return { timeline: [], transcript: ((_a = response.text) == null ? void 0 : _a.trim()) ?? "" };
@@ -88,7 +128,10 @@ async function extractTimelineAndTranscript(response, audioPath) {
   const hasNestedWords = (((_c = (_b = response.segments[0]) == null ? void 0 : _b.words) == null ? void 0 : _c.length) ?? 0) > 0;
   if (hasNestedWords) {
     const rawSegments = parseWhisperServerOutput(response.segments);
-    const splitBoundaries = await detectSplitBoundaries(rawSegments, audioPath);
+    const splitBoundaries = (whisperConfig == null ? void 0 : whisperConfig.audioDuration) ? calculateWhisperSplits(
+      whisperConfig.audioDuration,
+      whisperConfig.processors
+    ) : await detectSplitBoundaries(rawSegments, audioPath);
     const timeline2 = extractCorrectedTimeline(rawSegments, { splitBoundaries });
     const transcript = timeline2.map((entry) => entry.text).join(" ");
     return { timeline: timeline2, transcript };

package/dist/utilities/WhisperTimeline.cjs CHANGED Viewed

@@ -302,14 +302,28 @@ function extractCorrectedTimeline(segments, options = {}) {
     if (!segment) continue;
     const segmentStart = segment.segmentStart;
     const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
-    const boundary = detectProcessorBoundary(segment, state);
-    if (boundary.isBoundary) {
-      state.cumulativeOffset = getBetterCumulativeOffset(
-        state,
-        segment,
-        splitBoundaries,
-        usedSplits
-      );
+    if (splitBoundaries.length > 0) {
+      const boundary = splitBoundaries.find((boundary2) => {
+        return Math.abs(boundary2 - segmentStart) < 2;
+      });
+      if (boundary) {
+        state.cumulativeOffset = getBetterCumulativeOffset(
+          state,
+          segment,
+          splitBoundaries,
+          usedSplits
+        );
+      }
+    } else {
+      const boundary = detectProcessorBoundary(segment, state);
+      if (boundary.isBoundary) {
+        state.cumulativeOffset = getBetterCumulativeOffset(
+          state,
+          segment,
+          splitBoundaries,
+          usedSplits
+        );
+      }
     }
     const nextSegment = segments[i + 1] ?? null;
     if (nextSegment && isTimeTravelingSegment(nextSegment)) {

package/dist/utilities/WhisperTimeline.js CHANGED Viewed

@@ -272,14 +272,28 @@ function extractCorrectedTimeline(segments, options = {}) {
     if (!segment) continue;
     const segmentStart = segment.segmentStart;
     const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
-    const boundary = detectProcessorBoundary(segment, state);
-    if (boundary.isBoundary) {
-      state.cumulativeOffset = getBetterCumulativeOffset(
-        state,
-        segment,
-        splitBoundaries,
-        usedSplits
-      );
+    if (splitBoundaries.length > 0) {
+      const boundary = splitBoundaries.find((boundary2) => {
+        return Math.abs(boundary2 - segmentStart) < 2;
+      });
+      if (boundary) {
+        state.cumulativeOffset = getBetterCumulativeOffset(
+          state,
+          segment,
+          splitBoundaries,
+          usedSplits
+        );
+      }
+    } else {
+      const boundary = detectProcessorBoundary(segment, state);
+      if (boundary.isBoundary) {
+        state.cumulativeOffset = getBetterCumulativeOffset(
+          state,
+          segment,
+          splitBoundaries,
+          usedSplits
+        );
+      }
     }
     const nextSegment = segments[i + 1] ?? null;
     if (nextSegment && isTimeTravelingSegment(nextSegment)) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@storyteller-platform/ghost-story",
-  "version": "0.1.10",
+  "version": "0.1.11",
   "description": "An easy-to-use speech toolset. Fork of the original echogarden project.",
   "author": "Thomas F. K. Jorna",
   "license": "GPL-3.0",