npm - @storyteller-platform/align - Versions diffs - 0.0.1 - Mend

@storyteller-platform/align 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/LICENSE.txt +21 -0
package/README.md +3 -0
package/dist/align/align.cjs +525 -0
package/dist/align/align.d.cts +58 -0
package/dist/align/align.d.ts +58 -0
package/dist/align/align.js +458 -0
package/dist/align/fuzzy.cjs +164 -0
package/dist/align/fuzzy.d.cts +6 -0
package/dist/align/fuzzy.d.ts +6 -0
package/dist/align/fuzzy.js +141 -0
package/dist/align/getSentenceRanges.cjs +304 -0
package/dist/align/getSentenceRanges.d.cts +31 -0
package/dist/align/getSentenceRanges.d.ts +31 -0
package/dist/align/getSentenceRanges.js +277 -0
package/dist/align/parse.cjs +63 -0
package/dist/align/parse.d.cts +30 -0
package/dist/align/parse.d.ts +30 -0
package/dist/align/parse.js +51 -0
package/dist/chunk-BIEQXUOY.js +50 -0
package/dist/cli/bin.cjs +368 -0
package/dist/cli/bin.d.cts +1 -0
package/dist/cli/bin.d.ts +1 -0
package/dist/cli/bin.js +319 -0
package/dist/common/ffmpeg.cjs +232 -0
package/dist/common/ffmpeg.d.cts +33 -0
package/dist/common/ffmpeg.d.ts +33 -0
package/dist/common/ffmpeg.js +196 -0
package/dist/common/logging.cjs +45 -0
package/dist/common/logging.d.cts +5 -0
package/dist/common/logging.d.ts +5 -0
package/dist/common/logging.js +12 -0
package/dist/common/parse.cjs +73 -0
package/dist/common/parse.d.cts +28 -0
package/dist/common/parse.d.ts +28 -0
package/dist/common/parse.js +56 -0
package/dist/common/shell.cjs +30 -0
package/dist/common/shell.d.cts +3 -0
package/dist/common/shell.d.ts +3 -0
package/dist/common/shell.js +7 -0
package/dist/index.cjs +37 -0
package/dist/index.d.cts +12 -0
package/dist/index.d.ts +12 -0
package/dist/index.js +11 -0
package/dist/markup/__tests__/markup.test.cjs +464 -0
package/dist/markup/__tests__/markup.test.d.cts +2 -0
package/dist/markup/__tests__/markup.test.d.ts +2 -0
package/dist/markup/__tests__/markup.test.js +441 -0
package/dist/markup/markup.cjs +316 -0
package/dist/markup/markup.d.cts +24 -0
package/dist/markup/markup.d.ts +24 -0
package/dist/markup/markup.js +254 -0
package/dist/markup/parse.cjs +55 -0
package/dist/markup/parse.d.cts +17 -0
package/dist/markup/parse.d.ts +17 -0
package/dist/markup/parse.js +43 -0
package/dist/markup/segmentation.cjs +87 -0
package/dist/markup/segmentation.d.cts +8 -0
package/dist/markup/segmentation.d.ts +8 -0
package/dist/markup/segmentation.js +67 -0
package/dist/markup/semantics.cjs +79 -0
package/dist/markup/semantics.d.cts +6 -0
package/dist/markup/semantics.d.ts +6 -0
package/dist/markup/semantics.js +53 -0
package/dist/process/AudioEncoding.cjs +16 -0
package/dist/process/AudioEncoding.d.cts +8 -0
package/dist/process/AudioEncoding.d.ts +8 -0
package/dist/process/AudioEncoding.js +0 -0
package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
package/dist/process/__tests__/processAudiobook.test.js +209 -0
package/dist/process/mime.cjs +43 -0
package/dist/process/mime.d.cts +3 -0
package/dist/process/mime.d.ts +3 -0
package/dist/process/mime.js +24 -0
package/dist/process/parse.cjs +84 -0
package/dist/process/parse.d.cts +28 -0
package/dist/process/parse.d.ts +28 -0
package/dist/process/parse.js +73 -0
package/dist/process/processAudiobook.cjs +220 -0
package/dist/process/processAudiobook.d.cts +24 -0
package/dist/process/processAudiobook.d.ts +24 -0
package/dist/process/processAudiobook.js +166 -0
package/dist/process/ranges.cjs +203 -0
package/dist/process/ranges.d.cts +15 -0
package/dist/process/ranges.d.ts +15 -0
package/dist/process/ranges.js +137 -0
package/dist/transcribe/parse.cjs +149 -0
package/dist/transcribe/parse.d.cts +114 -0
package/dist/transcribe/parse.d.ts +114 -0
package/dist/transcribe/parse.js +143 -0
package/dist/transcribe/transcribe.cjs +400 -0
package/dist/transcribe/transcribe.d.cts +41 -0
package/dist/transcribe/transcribe.d.ts +41 -0
package/dist/transcribe/transcribe.js +330 -0
package/package.json +96 -0

package/dist/transcribe/transcribe.js ADDED Viewed

@@ -0,0 +1,330 @@
+import {
+  __callDispose,
+  __using
+} from "../chunk-BIEQXUOY.js";
+import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
+import * as os from "node:os";
+import { basename, extname, join, resolve } from "node:path";
+import { AsyncSemaphore } from "@esfx/async-semaphore";
+import { isAudioFile } from "@storyteller-platform/audiobook";
+import {
+  applyLegacyCpuFallback,
+  createAggregator,
+  ensureWhisperInstalled,
+  formatSingleReport,
+  recognize
+} from "@storyteller-platform/ghost-story";
+async function transcribe(input, output, locale, options) {
+  var _a;
+  if (process.env["DEBUG_TRANSCRIBE"] === "true") {
+    const inspector = await import("node:inspector");
+    inspector.open(9231, "0.0.0.0", true);
+  }
+  const semaphore = new AsyncSemaphore(options.parallelism ?? 1);
+  const controller = new AbortController();
+  const signal = AbortSignal.any([
+    options.signal ?? new AbortSignal(),
+    controller.signal
+  ]);
+  await mkdir(output, { recursive: true });
+  const allFiles = await readdir(input, { recursive: true });
+  const filenames = allFiles.filter((f) => isAudioFile(f));
+  if (!filenames.length) {
+    throw new Error(
+      `Failed to transcribe audio: found no audio files in ${input}`
+    );
+  }
+  const engine = options.engine ?? "whisper.cpp";
+  const model = options.model ?? "tiny.en";
+  if (engine === "whisper.cpp") {
+    await ensureWhisperInstalled({
+      model,
+      printOutput: ["debug", "info"].includes(
+        ((_a = options.logger) == null ? void 0 : _a.level) ?? "silent"
+      ),
+      signal
+    });
+  }
+  const transcriptions = [];
+  function aborted() {
+    return signal.aborted;
+  }
+  const perFileProgress = /* @__PURE__ */ new Map();
+  const timing = createAggregator();
+  timing.setMetadata("engine", engine);
+  timing.setMetadata("parallelization", options.parallelism ?? 1);
+  timing.setMetadata("processors", options.processors ?? 1);
+  timing.setMetadata("threads", options.threads ?? 4);
+  await Promise.all(
+    filenames.map(async (filename) => {
+      var _a2, _b, _c;
+      var _stack = [];
+      try {
+        if (aborted()) throw new Error("Aborted");
+        const filepath = join(input, filename);
+        const transcriptionFilepath = join(
+          output,
+          `${basename(filename, extname(filename))}.json`
+        );
+        try {
+          await readFile(transcriptionFilepath, {
+            encoding: "utf-8",
+            signal
+          });
+          (_a2 = options.logger) == null ? void 0 : _a2.info(`Found existing transcription for ${filepath}`);
+          transcriptions.push(transcriptionFilepath);
+        } catch {
+        }
+        if (aborted()) throw new Error("Aborted");
+        const stack = __using(_stack, new DisposableStack());
+        stack.defer(() => {
+          semaphore.release();
+        });
+        await semaphore.wait();
+        function onFileProgress(progress) {
+          var _a3, _b2;
+          perFileProgress.set(filename, progress);
+          const updatedProgress = Array.from(perFileProgress.values()).reduce((acc, p) => acc + p) / filenames.length;
+          (_a3 = options.logger) == null ? void 0 : _a3.info(
+            `Progress: ${Math.floor(updatedProgress * 100)}%`
+          );
+          (_b2 = options.onProgress) == null ? void 0 : _b2.call(options, updatedProgress);
+        }
+        const transcription = await transcribeFile(filepath, locale, {
+          ...options,
+          signal,
+          engine,
+          model,
+          processors: options.processors ?? 1,
+          threads: options.threads ?? 4,
+          onProgress: onFileProgress
+        });
+        (_b = options.logger) == null ? void 0 : _b.info(
+          formatSingleReport(
+            transcription.timing,
+            `Transcription Timing Report for ${filepath}`
+          )
+        );
+        timing.add(transcription.timing);
+        await writeFile(
+          transcriptionFilepath,
+          JSON.stringify({
+            transcript: transcription.transcript,
+            timeline: transcription.timeline
+          }),
+          { signal }
+        );
+        transcriptions.push(transcriptionFilepath);
+        (_c = options.onProgress) == null ? void 0 : _c.call(options, (transcriptions.length + 1) / filenames.length);
+      } catch (_) {
+        var _error = _, _hasError = true;
+      } finally {
+        __callDispose(_stack, _error, _hasError);
+      }
+    }).map(
+      (p) => p.catch((e) => {
+        controller.abort(e);
+        throw e;
+      })
+    )
+  );
+  return timing;
+}
+async function transcribeFile(input, locale, options) {
+  var _a, _b;
+  const audioFilepath = resolve(process.cwd(), input);
+  const sharedOptions = {
+    signal: options.signal,
+    language: locale.language
+  };
+  switch (options.engine) {
+    case "whisper.cpp": {
+      const fallbackVariant = getCpuOverrideVariant(
+        options.whisperCpuOverride ?? null
+      );
+      const whisperOptions = await ensureWhisperInstalled({
+        model: options.model,
+        variant: fallbackVariant,
+        printOutput: ["debug", "info"].includes(
+          ((_a = options.logger) == null ? void 0 : _a.level) ?? "silent"
+        ),
+        signal: options.signal
+      });
+      (_b = options.logger) == null ? void 0 : _b.info(`Transcribing audio file ${audioFilepath}`);
+      return recognize(audioFilepath, {
+        engine: options.engine,
+        options: {
+          flashAttention: true,
+          model: getWhisperCppModelId(sharedOptions.language, options.model),
+          processors: options.processors,
+          threads: options.threads,
+          onProgress: (progress) => {
+            var _a2;
+            if (options.onProgress) {
+              options.onProgress(progress);
+              return;
+            }
+            (_a2 = options.logger) == null ? void 0 : _a2.info(
+              `Transcribing ${audioFilepath} progress: ${Math.floor(progress * 100)}%`
+            );
+          },
+          ...whisperOptions
+        },
+        ...sharedOptions
+      });
+    }
+    case "google-cloud": {
+      if (!options.googleCloudApiKey) {
+        throw new Error(
+          "Failed to start transcription with engine google-cloud: missing API key"
+        );
+      }
+      return recognize(audioFilepath, {
+        engine: "google-cloud",
+        options: {
+          apiKey: options.googleCloudApiKey
+        },
+        ...sharedOptions
+      });
+    }
+    case "microsoft-azure": {
+      if (!options.azureServiceRegion) {
+        throw new Error(
+          "Failed to start transcription with engine microsoft-azure: missing service region"
+        );
+      }
+      if (!options.azureSubscriptionKey) {
+        throw new Error(
+          "Failed to start transcription with engine microsoft-azure: missing subscription key"
+        );
+      }
+      return recognize(audioFilepath, {
+        engine: "microsoft-azure",
+        options: {
+          serviceRegion: options.azureServiceRegion,
+          subscriptionKey: options.azureSubscriptionKey
+        },
+        ...sharedOptions
+      });
+    }
+    case "amazon-transcribe": {
+      if (!options.amazonTranscribeRegion) {
+        throw new Error(
+          "Failed to start transcription with engine amazon-transcribe: missing region"
+        );
+      }
+      if (!options.amazonTranscribeAccessKeyId) {
+        throw new Error(
+          "Failed to start transcription with engine amazon-transcribe: missing access key id"
+        );
+      }
+      if (!options.amazonTranscribeSecretAccessKey) {
+        throw new Error(
+          "Failed to start transcription with engine amazon-transcribe: missing access secret access key"
+        );
+      }
+      if (!options.amazonTranscribeBucketName) {
+        throw new Error(
+          "Failed to start transcription with engine amazon-transcribe: missing bucket name"
+        );
+      }
+      return recognize(audioFilepath, {
+        engine: "amazon-transcribe",
+        options: {
+          region: options.amazonTranscribeRegion,
+          accessKeyId: options.amazonTranscribeAccessKeyId,
+          secretAccessKey: options.amazonTranscribeSecretAccessKey,
+          bucketName: options.amazonTranscribeBucketName
+        },
+        ...sharedOptions
+      });
+    }
+    case "openai-cloud": {
+      return recognize(audioFilepath, {
+        engine: "openai-cloud",
+        options: {
+          ...options.openAiApiKey && { apiKey: options.openAiApiKey },
+          ...options.openAiOrganization && {
+            organization: options.openAiOrganization
+          },
+          ...options.openAiBaseUrl && { baseURL: options.openAiBaseUrl },
+          model: options.openAiModelName ?? "whisper-1"
+        },
+        ...sharedOptions
+      });
+    }
+    case "whisper-server": {
+      if (!options.whisperServerUrl) {
+        throw new Error(
+          "Failed to start transcription with engine whisper-server: missing server url"
+        );
+      }
+      return recognize(audioFilepath, {
+        engine: "whisper-server",
+        options: {
+          baseURL: options.whisperServerUrl,
+          ...options.whisperServerApiKey && {
+            apiKey: options.whisperServerApiKey
+          }
+        },
+        ...sharedOptions
+      });
+    }
+    case "deepgram": {
+      if (!options.deepgramApiKey) {
+        throw new Error(
+          "Failed to start transcription with engine deepgram: missing api key"
+        );
+      }
+      return recognize(audioFilepath, {
+        engine: "deepgram",
+        options: {
+          apiKey: options.deepgramApiKey,
+          // nova-3 is just as cheap as nova-2 and has better performance
+          model: options.deepgramModel ?? "nova-3",
+          punctuate: true
+        },
+        ...sharedOptions
+      });
+    }
+    default: {
+      throw new Error(
+        `Unknown transcription engine: ${options.engine}`
+      );
+    }
+  }
+}
+function getWhisperCppModelId(language, modelType) {
+  if (modelType === "large") return "large-v3-turbo";
+  if (language !== "en" || modelType.startsWith("large")) return modelType;
+  if (modelType.includes(".en")) return modelType;
+  const quant = modelType.indexOf("-q");
+  if (quant === -1) return `${modelType}.en`;
+  return `${modelType.slice(0, quant)}.en${modelType.slice(quant)}`;
+}
+function getCpuOverrideVariant(override) {
+  if (!override) return void 0;
+  const platform = os.platform();
+  const arch = os.arch();
+  if (platform === "linux" && arch === "x64") {
+    const variant = override === "blas" ? "linux-x64-blas" : "linux-x64-cpu";
+    return applyLegacyCpuFallback(variant);
+  }
+  if (platform === "linux" && arch === "arm64") {
+    return "linux-arm64-cpu";
+  }
+  if (platform === "darwin" && arch === "arm64") {
+    return "darwin-arm64-cpu";
+  }
+  if (platform === "darwin" && arch === "x64") {
+    return "darwin-x64-cpu";
+  }
+  if (platform === "win32") {
+    return "windows-x64-cpu";
+  }
+  return void 0;
+}
+export {
+  transcribe,
+  transcribeFile
+};

package/package.json ADDED Viewed

@@ -0,0 +1,96 @@
+{
+  "name": "@storyteller-platform/align",
+  "version": "0.0.1",
+  "description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
+  "author": "Shane Friedman",
+  "license": "MIT",
+  "keywords": [
+    "epub",
+    "ebook",
+    "speech-to-text",
+    "audiobook"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://gitlab.com/storyteller-platform/storyteller"
+  },
+  "bugs": {
+    "url": "https://gitlab.com/storyteller-platform/storyteller"
+  },
+  "engines": {
+    "node": ">=24"
+  },
+  "os": [
+    "win32",
+    "darwin",
+    "linux"
+  ],
+  "type": "module",
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE.txt"
+  ],
+  "exports": {
+    ".": {
+      "import": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
+      },
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      }
+    }
+  },
+  "scripts": {
+    "compile": "NODE_OPTIONS=--experimental-import-meta-resolve ./scripts/bundle.js && node --build-sea sea-config.json",
+    "build": "tsup",
+    "prepack": "yarn build",
+    "test": "yarn tsx --test"
+  },
+  "bin": "./dist/cli/bin.js",
+  "dependencies": {
+    "@echogarden/icu-segmentation-wasm": "^0.2.2",
+    "@echogarden/text-segmentation": "^0.4.1",
+    "@esfx/async-semaphore": "^1.0.0",
+    "@optique/core": "^0.10.7",
+    "@optique/run": "^0.10.7",
+    "@storyteller-platform/audiobook": "^0.3.7",
+    "@storyteller-platform/epub": "^0.4.6",
+    "@storyteller-platform/ghost-story": "^0.1.2",
+    "chalk": "^5.4.1",
+    "cli-progress": "^3.12.0",
+    "esbuild": "^0.27.3",
+    "memoize": "^10.2.0",
+    "pino": "^10.3.1",
+    "pino-pretty": "^13.1.3",
+    "zod": "^3.24.0"
+  },
+  "devDependencies": {
+    "@storyteller-platform/eslint": "0.1.0",
+    "@storyteller-platform/tsup": "0.1.0",
+    "@tsconfig/strictest": "^2.0.5",
+    "@types/cli-progress": "^3",
+    "@types/node": "^24.0.0",
+    "eslint": "^8.0.0",
+    "tsup": "^8.5.0",
+    "tsx": "^4.19.2",
+    "typescript": "~5.8.3"
+  },
+  "publishConfig": {
+    "access": "public",
+    "exports": {
+      ".": {
+        "import": {
+          "types": "./dist/index.d.ts",
+          "default": "./dist/index.js"
+        },
+        "require": {
+          "types": "./dist/index.d.cts",
+          "default": "./dist/index.cjs"
+        }
+      }
+    }
+  }
+}