npm - @twick/cloud-subtitle-video - Versions diffs - 0.15.1 - Mend

@twick/cloud-subtitle-video 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +128 -0
package/bin/twick-subtitle-video.js +125 -0
package/core/audio.utils.js +96 -0
package/core/gc.utils.js +177 -0
package/core/index.js +1 -0
package/core/project.utils.js +72 -0
package/core/transcriber.js +231 -0
package/core/workflow.js +70 -0
package/package.json +61 -0
package/platform/aws/Dockerfile +14 -0
package/platform/aws/handler.js +105 -0

package/README.md ADDED Viewed

@@ -0,0 +1,128 @@
+# @twick/cloud-subtitle-video
+**Generate complete subtitle video projects from video URLs using Google Cloud Speech-to-Text.**
+Automatically transcribes video audio, creates timed subtitle tracks, and optionally exports project JSONs to Google Cloud Storage. Perfect for programmatic subtitle generation at scale.
+## What Problem Does This Solve?
+- **Automated subtitle generation** — Convert video URLs into complete Twick projects with timed subtitles
+- **Word-level timing** — Precise subtitle placement using Google Speech-to-Text API
+- **Serverless processing** — Deploy as AWS Lambda for automatic scaling
+- **Multi-language support** — Generate subtitles in multiple languages and fonts
+## Input → Output
+**Input:** Video URL + optional configuration
+```json
+{
+  "videoUrl": "https://example.com/video.mp4",
+  "videoSize": { "width": 1920, "height": 1080 },
+  "language": "english",
+  "languageFont": "english",
+  "shouldExport": false
+}
+```
+**Output:** Complete Twick project JSON with video track + subtitle track
+```json
+{
+  "properties": { "width": 1920, "height": 1080 },
+  "tracks": [
+    { "id": "video", "type": "video", "elements": [...] },
+    { "id": "subtitle", "type": "caption", "elements": [...] }
+  ],
+  "version": 1
+}
+```
+**Where it runs:** AWS Lambda container image (Linux/AMD64)
+## Installation
+```bash
+npm install -D @twick/cloud-subtitle-video
+```
+## Quick Start
+### 1. Scaffold AWS Lambda Template
+```bash
+npx twick-subtitle-video init
+```
+### 2. Build Docker Image
+```bash
+npx twick-subtitle-video build twick-subtitle-video:latest
+```
+### 3. Configure Google Cloud
+**Required:**
+- Google Cloud project with Speech-to-Text API enabled
+- Service account with permissions:
+  - `roles/speech.client` (or `speech.batchRecognize`)
+  - `roles/storage.objectCreator`
+  - `roles/storage.objectViewer`
+**Environment variables:**
+- `GOOGLE_CLOUD_PROJECT` (required) — Your GCP project ID
+- `GOOGLE_CLOUD_STORAGE_BUCKET` (optional) — GCS bucket for exports (default: `"twick-video"`)
+**Credentials (choose one):**
+- **AWS Secrets Manager** (recommended for Lambda):
+  - `GCP_SERVICE_ACCOUNT_SECRET_NAME` — Secret name containing GCP service account JSON
+  - `AWS_REGION` (optional) — Region for Secrets Manager (default: `"ap-south-1"`)
+- **File-based** (alternative):
+  - `GOOGLE_APPLICATION_CREDENTIALS` — Path to service account JSON file
+### 4. Deploy to AWS Lambda
+```bash
+# Login to ECR
+npx twick-subtitle-video ecr-login us-east-1 YOUR_ACCOUNT_ID
+# Push to ECR
+npx twick-subtitle-video push twick-subtitle-video:latest us-east-1 YOUR_ACCOUNT_ID
+```
+## Deployment (High Level)
+1. **Scaffold** the Lambda container template
+2. **Configure** Google Cloud credentials (via Secrets Manager or file mount)
+3. **Set environment variables** (GCP project, bucket, etc.)
+4. **Build and push** Docker image to ECR
+5. **Create Lambda function** using the ECR image
+The Lambda handler expects:
+- **Event format:** `{ videoUrl, videoSize?, language?, languageFont?, shouldExport? }`
+- **Response:** Twick project JSON (or GCS URL if `shouldExport: true`)
+## Programmatic Usage
+Use the core functions directly:
+```js
+import { createSubtitleProject } from '@twick/cloud-subtitle-video';
+const project = await createSubtitleProject({
+  videoUrl: 'https://example.com/video.mp4',
+  videoSize: { width: 1920, height: 1080 },
+  language: 'english',
+  languageFont: 'english',
+});
+console.log(project.tracks); // Array of video and subtitle tracks
+```
+## Technical Details
+- **API:** Google Cloud Speech-to-Text API v2
+- **Model:** `"long"` (optimized for longer audio)
+- **Audio format:** FLAC, 16kHz, mono
+- **Features:** Word-level timing offsets for precise subtitle placement
+- **Auto-selection:** Synchronous (short audio) or batch (long audio >6s)
+For detailed setup instructions, see the complete deployment guide in the repository.

package/bin/twick-subtitle-video.js ADDED Viewed

@@ -0,0 +1,125 @@
+#!/usr/bin/env node
+import { fileURLToPath } from 'url';
+import { dirname, join } from 'path';
+import fs from 'fs';
+import { spawn } from 'child_process';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const pkgRoot = join(__dirname, '..');
+function copyTemplate(destDir) {
+  const templateDir = join(pkgRoot, 'platform', 'aws');
+  if (!fs.existsSync(destDir)) fs.mkdirSync(destDir, { recursive: true });
+  // Create platform/aws directory structure to maintain consistency with CMD ["platform/aws/handler.handler"]
+  const platformAwsDir = join(destDir, 'platform', 'aws');
+  if (!fs.existsSync(platformAwsDir)) {
+    fs.mkdirSync(platformAwsDir, { recursive: true });
+  }
+  // Copy Dockerfile to root (it references platform/aws/handler.handler)
+  const dockerfileSrc = join(templateDir, 'Dockerfile');
+  const dockerfileDest = join(destDir, 'Dockerfile');
+  fs.copyFileSync(dockerfileSrc, dockerfileDest);
+  // Copy handler.js to platform/aws/ to match the CMD path
+  const handlerSrc = join(templateDir, 'handler.js');
+  const handlerDest = join(platformAwsDir, 'handler.js');
+  fs.copyFileSync(handlerSrc, handlerDest);
+  // Minimal package.json to enable docker layer caching (npm ci)
+  const pkgJsonPath = join(destDir, 'package.json');
+  if (!fs.existsSync(pkgJsonPath)) {
+    const pkg = {
+      name: 'twick-subtitle-video-runtime',
+      type: 'module',
+      dependencies: {
+        '@twick/cloud-subtitle-video': 'latest',
+        'fluent-ffmpeg': '^2.1.2',
+        '@ffmpeg-installer/ffmpeg': '^1.1.0',
+        '@ffprobe-installer/ffprobe': '^1.1.0'
+      }
+    };
+    fs.writeFileSync(pkgJsonPath, JSON.stringify(pkg, null, 2));
+  }
+}
+function run(cmd, args, opts = {}) {
+  return new Promise((resolve, reject) => {
+    const ps = typeof cmd === 'string' && Array.isArray(args) && args.length === 0
+      ? spawn(cmd, { stdio: 'inherit', shell: true, ...opts })
+      : spawn(cmd, args, { stdio: 'inherit', shell: true, ...opts });
+    ps.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`${cmd} exited ${code}`))));
+  });
+}
+async function main() {
+  const [command, ...rest] = process.argv.slice(2);
+  if (!command || ['-h', '--help', 'help'].includes(command)) {
+    console.log(`
+Usage: twick-subtitle-video <command> [options]
+Commands:
+  init [dir]             Scaffold AWS container template into [dir] (default: ./twick-subtitle-video-aws)
+  build <image> [dir]    Docker build image from [dir] (default: ./twick-subtitle-video-aws)
+  ecr-login <region> <accountId>  Login docker to ECR
+  push <image> <region> <accountId>  Push image to ECR (repo must exist)
+Examples:
+  twick-subtitle-video init
+  twick-subtitle-video build my-repo:latest
+  twick-subtitle-video ecr-login us-east-1 123456789012
+  twick-subtitle-video push my-repo:latest us-east-1 123456789012
+`);
+    return;
+  }
+  if (command === 'init') {
+    const dir = rest[0] || 'twick-subtitle-video-aws';
+    copyTemplate(dir);
+    console.log(`✔ Scaffolded AWS runtime into ./${dir}`);
+    return;
+  }
+  if (command === 'build') {
+    const image = rest[0];
+    const dir = rest[1] || 'twick-subtitle-video-aws';
+    if (!image) throw new Error('Image name required. e.g., my-repo:latest');
+    // Build for linux/amd64 platform to avoid creating multi-arch manifest index
+    // This reduces the number of artifacts pushed to the registry
+    await run('docker', ['build', '--platform', 'linux/amd64', '-t', image, dir]);
+    return;
+  }
+  if (command === 'ecr-login') {
+    const region = rest[0];
+    const accountId = rest[1];
+    if (!region || !accountId) throw new Error('Usage: ecr-login <region> <accountId>');
+    const registry = `${accountId}.dkr.ecr.${region}.amazonaws.com`;
+    await run(`aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${registry}`, []);
+    return;
+  }
+  if (command === 'push') {
+    const image = rest[0];
+    const region = rest[1];
+    const accountId = rest[2];
+    if (!image || !region || !accountId) throw new Error('Usage: push <image> <region> <accountId>');
+    const [repo, tag = 'latest'] = image.split(':');
+    const registry = `${accountId}.dkr.ecr.${region}.amazonaws.com`;
+    const remote = `${registry}/${repo}:${tag}`;
+    await run('docker', ['tag', `${repo}:${tag}`, remote]);
+    await run('docker', ['push', remote]);
+    console.log(`✔ Pushed ${remote}`);
+    return;
+  }
+  throw new Error(`Unknown command: ${command}`);
+}
+main().catch((err) => {
+  console.error(err.message || err);
+  process.exit(1);
+});

package/core/audio.utils.js ADDED Viewed

@@ -0,0 +1,96 @@
+import fs from "fs";
+import { join } from "path";
+import { mkdtemp, readFile, rm } from "fs/promises";
+import { tmpdir } from "os";
+import { execFile } from "child_process";
+import { promisify } from "util";
+import { Readable, pipeline } from "stream";
+// These packages provide prebuilt ffmpeg/ffprobe binaries. Types are not bundled,
+// so we import them as `any` to keep TypeScript satisfied.
+import ffmpeg from "@ffmpeg-installer/ffmpeg";
+import ffprobe from "@ffprobe-installer/ffprobe";
+const execFileAsync = promisify(execFile);
+const pipelineAsync = promisify(pipeline);
+const ffmpegPath = ffmpeg.path;
+const ffprobePath = ffprobe.path;
+/**
+ * Audio encoding configuration for different formats.
+ * Currently supports FLAC format optimized for Google Speech-to-Text API.
+ * @type {Object<string, Object>}
+ */
+export const AUDIO_CONFIG = {
+  "FLAC": {
+    "codec": "flac",
+    "encoding": "FLAC",
+    "sampleRate": 16000,
+    "channelCount": 1,
+    "extension": "flac",
+    "contentType": "audio/flac",
+  },
+}
+/**
+ * Extracts audio from a video URL and converts it to a format suitable for transcription.
+ * Downloads the video, extracts audio using ffmpeg, and returns the audio buffer and duration.
+ *
+ * @param {string} videoUrl - Publicly accessible HTTP(S) URL to the video file
+ * @param {string} [format="FLAC"] - Audio output format (currently only "FLAC" supported)
+ * @returns {Promise<Object>} Object containing audioBuffer (Buffer) and duration (number in seconds)
+ * @throws {Error} If video download, extraction, or processing fails
+ */
+export const extractAudioBufferFromVideo = async (videoUrl, format = "FLAC") => {
+    const videoResponse = await fetch(videoUrl);
+    if (!videoResponse.ok) {
+      throw new Error(`Failed to download video: ${videoResponse.status} ${videoResponse.statusText}`);
+    }
+    const tmpBase = await mkdtemp(join(tmpdir(), 'mcp-'));
+    const inputPath = join(tmpBase, 'input_video');
+    // Change extension to .flac
+    const outputPath = join(tmpBase, `output_audio.${format}`);
+    if (!videoResponse.body) {
+      await rm(tmpBase, { recursive: true, force: true });
+      throw new Error("Video response has no body");
+    }
+    const videoStream = Readable.fromWeb(videoResponse.body);
+    const fileWriteStream = fs.createWriteStream(inputPath);
+    await pipelineAsync(videoStream, fileWriteStream);
+    let duration = 0;
+    try {
+      const { stdout } = await execFileAsync(ffprobePath, [
+        '-v', 'error',
+        '-show_entries', 'format=duration',
+        '-of', 'default=noprint_wrappers=1:nokey=1',
+        inputPath
+      ]);
+      duration = parseFloat(stdout.toString().trim()) || 0;
+    } catch (err) {
+      console.warn('Failed to get duration using ffprobe');
+    }
+    try {
+      await execFileAsync(ffmpegPath, [
+        '-y',
+        '-i', inputPath,
+        '-vn',             // Strip video
+        '-ac', '1',         // Mono channel (Required for STT)
+        '-ar', AUDIO_CONFIG[format].sampleRate,     // 16kHz is ideal for Chirp
+        '-c:a', AUDIO_CONFIG[format].codec,     // Use FLAC codec
+        outputPath
+      ]);
+    } catch (err) {
+      await rm(tmpBase, { recursive: true, force: true });
+      const stderr = err?.stderr?.toString?.().trim?.() || "";
+      throw new Error(`ffmpeg extraction failed: ${stderr}`);
+    }
+    // Use the promise-based readFile for consistency
+    const audioBuffer = await readFile(outputPath);
+    await rm(tmpBase, { recursive: true, force: true });
+    return { audioBuffer, duration };
+};

package/core/gc.utils.js ADDED Viewed

@@ -0,0 +1,177 @@
+import { Storage } from "@google-cloud/storage";
+import {
+  SecretsManagerClient,
+  GetSecretValueCommand,
+} from "@aws-sdk/client-secrets-manager";
+import fs from "fs";
+/**
+ * Google Cloud Project ID. Can be set via GOOGLE_CLOUD_PROJECT environment variable.
+ * @type {string}
+ */
+export const CLOUD_PROJECT_ID = process.env.GOOGLE_CLOUD_PROJECT;
+/**
+ * Google Cloud region for Speech-to-Text API. Currently set to "global".
+ * @type {string}
+ */
+export const CLOUD_REGION = "global";
+export const AWS_REGION = process.env.AWS_REGION;
+/**
+ * Google Cloud Storage bucket name for storing audio files and project exports.
+ * Can be set via GOOGLE_CLOUD_STORAGE_BUCKET environment variable.
+ * @type {string}
+ */
+export const CLOUD_STORAGE_BUCKET = process.env.GOOGLE_CLOUD_STORAGE_BUCKET;
+let googleCredentials = null;
+/**
+ * Retrieves Google Cloud service account credentials from AWS Secrets Manager.
+ *
+ * If GCP_SERVICE_ACCOUNT_SECRET_NAME is set, fetches the JSON credentials from AWS Secrets Manager.
+ * If not set, returns undefined (useful when credentials are provided via GOOGLE_APPLICATION_CREDENTIALS).
+ *
+ * @returns {Promise<Object|undefined>} Parsed JSON credentials object or undefined
+ * @throws {Error} If fetching from Secrets Manager fails
+ */
+export const getGoogleCredentials = async () => {
+  if (googleCredentials) {
+    return googleCredentials;
+  }
+  try {
+    const secretName = process.env.GCP_SERVICE_ACCOUNT_SECRET_NAME;
+    if (!secretName) {
+      console.log(
+        "No secret name configured, skipping Google credentials initialization"
+      );
+      return;
+    }
+    const client = new SecretsManagerClient({
+      region: process.env.AWS_REGION || "ap-south-1",
+    });
+    const response = await client.send(
+      new GetSecretValueCommand({
+        SecretId: secretName,
+        VersionStage: "AWSCURRENT", // VersionStage defaults to AWSCURRENT if unspecified
+      })
+    );
+    const parsedCredentials = JSON.parse(response.SecretString);
+    // Validate that the credentials contain required fields
+    if (!parsedCredentials.client_email) {
+      throw new Error(
+        `Invalid Google Cloud credentials: missing 'client_email' field. ` +
+          `The secret must contain a valid service account JSON with 'client_email', ` +
+          `'private_key', and 'type' fields.`
+      );
+    }
+    if (!parsedCredentials.private_key) {
+      throw new Error(
+        `Invalid Google Cloud credentials: missing 'private_key' field.`
+      );
+    }
+    if (parsedCredentials.type !== "service_account") {
+      console.warn(
+        `Warning: credentials type is '${parsedCredentials.type}', expected 'service_account'`
+      );
+    }
+    googleCredentials = parsedCredentials;
+    return googleCredentials;
+  } catch (error) {
+    console.error(
+      `Failed to initialize Google credentials from secret ::`,
+      error
+    );
+    throw error;
+  }
+};
+let storage = null;
+/**
+ * Gets or initializes the Google Cloud Storage client instance.
+ *
+ * @returns {Promise<Storage>} Initialized Storage client
+ */
+const getStorage = async () => {
+  if (!storage) {
+    storage = new Storage({
+      projectId: CLOUD_PROJECT_ID,
+      credentials: await getGoogleCredentials(),
+    });
+  }
+  return storage;
+};
+/**
+ * Uploads a file to Google Cloud Storage.
+ *
+ * @param {Object} params - Upload parameters
+ * @param {Buffer|string} params.data - File data to upload (Buffer or string)
+ * @param {string} [params.folder] - Optional folder path in the bucket
+ * @param {string} params.fileName - Name of the file to create
+ * @param {string} params.contentType - MIME type of the file
+ * @param {boolean} [params.isPublic=false] - If true, returns a signed URL valid for 1 hour
+ * @returns {Promise<string>} Public URL or signed URL (if isPublic=true) to the uploaded file
+ */
+export const uploadFile = async ({
+  data,
+  folder,
+  fileName,
+  contentType,
+  isPublic = false,
+}) => {
+  const bucket = (await getStorage()).bucket(CLOUD_STORAGE_BUCKET);
+  const bucketName = CLOUD_STORAGE_BUCKET;
+  // 2. Define the path including the folder 'content'
+  const destinationPath = `${folder ? `${folder}/` : ""}${fileName}`;
+  const file = bucket.file(destinationPath);
+  // 3. Save the file.
+  await file.save(data, {
+    contentType: contentType,
+    resumable: false,
+  });
+  if (isPublic) {
+    // Generate a signed URL valid for 1 hour instead of making the file public
+    const expires = new Date();
+    expires.setHours(expires.getHours() + 1); // 1 hour from now
+    const [signedUrl] = await file.getSignedUrl({
+      version: "v4",
+      action: "read",
+      expires: expires,
+    });
+    return signedUrl;
+  }
+  return `https://storage.googleapis.com/${bucketName}/${destinationPath}`;
+};
+/**
+ * Converts a Google Cloud Storage URL to a gs:// URI format.
+ *
+ * @param {string} URI - GCS URL (https://storage.googleapis.com/...) or gs:// URI
+ * @returns {string} gs:// URI format
+ * @throws {Error} If the URI format is invalid
+ */
+export const getGCSUri = (URI) => {
+  if (URI.startsWith("https://storage.googleapis.com/")) {
+    const path = URI.replace("https://storage.googleapis.com/", "");
+    return `gs://${path}`;
+  } else if (!URI.startsWith("gs://")) {
+    throw new Error(
+      `Invalid audio URI format. Expected gs://bucket/path or https://storage.googleapis.com/bucket/path, got: ${URI}`
+    );
+  }
+  return URI;
+};

package/core/index.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export { createSubtitleProject, exportProject } from "./workflow.js";

package/core/project.utils.js ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * Builds a Twick subtitle video project JSON structure from transcription results.
+ *
+ * @param {Object} params - Project parameters
+ * @param {Array<Object>} params.subtitles - Array of subtitle objects with {t, s, e} properties
+ * @param {number} params.duration - Video duration in seconds
+ * @param {string} params.videoUrl - Source video URL
+ * @param {Object} [params.videoSize] - Video dimensions {width, height}
+ * @returns {Object} Twick project JSON structure with properties, tracks, and version
+ */
+export const buildProject = (params) => {
+  const { subtitles, duration, videoUrl, videoSize } = params;
+    return {
+      properties: {
+        width: videoSize?.width || 720,
+        height: videoSize?.height || 1280,
+      },
+      tracks: [
+        {
+          id: "video",
+          type: "video",
+          elements: [
+            {
+              id: "video",
+              type: "video",
+              s: 0,
+              e: duration,
+              props: {
+                src: videoUrl,
+                width: videoSize?.width || 720,
+                height: videoSize?.height || 1280,
+              },
+            },
+          ],
+        },
+        {
+          id: "subtitle",
+          type: "caption",
+          props: {
+            capStyle: "highlight_bg",
+            font: {
+              size: 50,
+              weight: 700,
+              family: "Bangers",
+            },
+            colors: {
+              text: "#ffffff",
+              highlight: "#ff4081",
+              bgColor: "#444444",
+            },
+            lineWidth: 0.35,
+            stroke: "#000000",
+            fontWeight: 700,
+            shadowOffset: [-3, 3],
+            shadowColor: "#000000",
+            x: 0,
+            y: 200,
+            applyToAll: true,
+          },
+          elements: subtitles.map((subtitle, index) => ({
+            id: `subtitle-${index}`,
+            type: "caption",
+            s: subtitle.s / 1000,
+            e: subtitle.e / 1000,
+            t: subtitle.t,
+          })),
+        },
+      ],
+      version: 1,
+    };
+};

package/core/transcriber.js ADDED Viewed

@@ -0,0 +1,231 @@
+import { SpeechClient } from "@google-cloud/speech/build/src/v2/index.js";
+import {
+  CLOUD_PROJECT_ID,
+  CLOUD_REGION,
+  getGCSUri,
+  getGoogleCredentials,
+  uploadFile,
+} from "./gc.utils.js";
+import { AUDIO_CONFIG } from "./audio.utils.js";
+/**
+ * Language code mapping for Google Speech-to-Text API.
+ * @type {Object<string, string>}
+ */
+const LANGUAGE_CODE = {
+  english: "en-US",
+};
+/**
+ * Speech recognition model to use. "long" model is optimized for longer audio files.
+ * @type {string}
+ */
+const MODEL = "long";
+let speechClient = null;
+/**
+ * Gets or initializes the Google Cloud Speech-to-Text client.
+ *
+ * @returns {Promise<SpeechClient>} Initialized SpeechClient instance
+ */
+export const getSpeechClient = async () => {
+  if (!speechClient) {
+    speechClient = new SpeechClient({
+      projectId: CLOUD_PROJECT_ID,
+      region: CLOUD_REGION,
+      credentials: await getGoogleCredentials(),
+    });
+  }
+  return speechClient;
+};
+/**
+ * Recognizer resource path for Google Speech-to-Text API v2.
+ * @type {string}
+ */
+const recognizer = `projects/${CLOUD_PROJECT_ID}/locations/${CLOUD_REGION}/recognizers/_`;
+/**
+ * Processes Speech-to-Text API response and groups words into phrases of 4 words each.
+ *
+ * @param {Object} results - API response results object
+ * @returns {Array<Object>} Array of phrase objects with text, start time, end time, and word timings
+ */
+const processResponse = (results) => {
+  // Extract words from response
+  const words = results?.alternatives?.[0]?.words || [];
+  if (words.length === 0) {
+    return [];
+  }
+  // Convert time offsets to milliseconds
+  const convertToMs = (offset) => {
+    if (!offset) return 0;
+    const seconds = Number(offset.seconds || 0);
+    const nanos = Number(offset.nanos || 0);
+    return seconds * 1000 + nanos / 1e6;
+  };
+  // Process words into individual word timings
+  const processedWords = words.map((w) => ({
+    word: w.word,
+    startMs: convertToMs(w.startOffset),
+    endMs: convertToMs(w.endOffset),
+  }));
+  // Group words into phrases of 4 words each
+  const phrases = [];
+  for (let i = 0; i < processedWords.length; i += 4) {
+    const group = processedWords.slice(i, i + 4);
+    const text = group.map((w) => w.word).join(" ");
+    const startMs = group[0].startMs;
+    const endMs = group[group.length - 1].endMs;
+    const wordStarts = group.map((w) => w.startMs);
+    phrases.push({
+      t: text,
+      s: Math.round(startMs),
+      e: Math.round(endMs),
+      w: wordStarts.map((ms) => Math.round(ms)),
+    });
+  }
+  return phrases;
+};
+/**
+ * Transcribes short audio (typically under 60 seconds) using Google Speech-to-Text API.
+ * Uses synchronous recognize method for faster processing.
+ *
+ * @param {Object} params - Transcription parameters
+ * @param {Buffer} params.audioBuffer - Audio data buffer (FLAC format)
+ * @param {string} [params.language="english"] - Language code (e.g., "english")
+ * @param {string} [params.format="FLAC"] - Audio format (currently only "FLAC" supported)
+ * @returns {Promise<Array<Object>>} Array of phrase objects with text, timings, and word offsets
+ * @throws {Error} If transcription fails
+ */
+export async function transcribeShort({
+  audioBuffer,
+  language = "english",
+  format = "FLAC",
+}) {
+  const client = await getSpeechClient();
+  const audioContent = audioBuffer.toString("base64");
+  const request = {
+    recognizer: recognizer,
+    config: {
+      explicitDecodingConfig: {
+        encoding: AUDIO_CONFIG[format].encoding,
+        sampleRateHertz: AUDIO_CONFIG[format].sampleRate,
+        audioChannelCount: 1,
+      },
+      languageCodes: [LANGUAGE_CODE[language]],
+      model: MODEL,
+      features: {
+        enableWordTimeOffsets: true,
+      },
+    },
+    content: audioContent,
+  };
+  try {
+    const [response] = await client.recognize(request);
+    return processResponse(response.results?.[0]);
+  } catch (err) {
+    console.error("Transcription Error:", err.message);
+    throw err;
+  }
+}
+/**
+ * Transcribes long audio (typically over 60 seconds) using Google Speech-to-Text API.
+ * Uses asynchronous batchRecognize method and requires audio to be uploaded to GCS first.
+ *
+ * @param {Object} params - Transcription parameters
+ * @param {Buffer} [params.audioBuffer] - Audio data buffer (required if audioUrl not provided)
+ * @param {string} [params.audioUrl] - GCS URI (gs://) or HTTPS URL to audio file (required if audioBuffer not provided)
+ * @param {string} [params.language="english"] - Language code (e.g., "english")
+ * @param {string} [params.format="FLAC"] - Audio format (currently only "FLAC" supported)
+ * @returns {Promise<Array<Object>>} Array of phrase objects with text, timings, and word offsets
+ * @throws {Error} If transcription fails
+ */
+export async function transcribeLong({
+  audioBuffer,
+  audioUrl,
+  language = "english",
+  format = "FLAC",
+}) {
+  let gcsUri;
+  if (audioUrl) {
+    gcsUri = getGCSUri(audioUrl);
+  } else {
+    const audioUri = await uploadFile({
+      data: audioBuffer,
+      folder: "audio",
+      fileName: `audio-${Date.now()}.${AUDIO_CONFIG[format].extension}`,
+      contentType: AUDIO_CONFIG[format].contentType,
+    });
+    gcsUri = getGCSUri(audioUri);
+  }
+  console.log("GCS URI:", gcsUri);
+  const client = await getSpeechClient();
+  const request = {
+    recognizer: recognizer,
+    config: {
+      explicitDecodingConfig: {
+        encoding: AUDIO_CONFIG[format].encoding,
+        sampleRateHertz: AUDIO_CONFIG[format].sampleRate,
+        audioChannelCount: 1,
+      },
+      languageCodes: [LANGUAGE_CODE[language]],
+      model: MODEL,
+      features: {
+        enableWordTimeOffsets: true,
+      },
+    },
+    files: [
+      {
+        uri: gcsUri,
+      },
+    ],
+    recognitionOutputConfig: {
+      inlineResponseConfig: {},
+    },
+  };
+  try {
+    console.log("Waiting for operation to complete...");
+    const [operation] = await client.batchRecognize(request);
+    const [response] = await operation.promise();
+    // Extract results for the audio URI (use the GCS URI as the key)
+    const fileResult = response.results?.[gcsUri];
+    if (!fileResult || !fileResult.transcript) {
+      return [];
+    }
+    // Extract words from all results (batchRecognize can return multiple result segments)
+    const allPhrases = [];
+    const results = fileResult.transcript.results || [];
+    for (const result of results) {
+      const phrases = processResponse(result);
+      console.log("Phrases:", phrases);
+      console.log("Transcription Result:", result);
+      allPhrases.push(...phrases);
+    }
+    if (allPhrases.length === 0) {
+      return [];
+    }
+    return allPhrases;
+  } catch (err) {
+    console.error("Transcription Error:", err.message);
+    throw err;
+  }
+}

package/core/workflow.js ADDED Viewed

@@ -0,0 +1,70 @@
+import { extractAudioBufferFromVideo } from "./audio.utils.js";
+import { uploadFile } from "./gc.utils.js";
+import { buildProject } from "./project.utils.js";
+import { transcribeLong, transcribeShort } from "./transcriber.js";
+/**
+ * Creates a complete subtitle video project from a video URL.
+ * Downloads video, extracts audio, transcribes it using Google Speech-to-Text,
+ * and builds a Twick project JSON structure.
+ *
+ * @param {Object} params - Project creation parameters
+ * @param {string} params.videoUrl - Publicly accessible HTTP(S) URL to the video file
+ * @param {Object} [params.videoSize] - Video dimensions {width, height} (defaults to 720x1280)
+ * @param {string} [params.language="english"] - Transcription language code
+ * @param {string} [params.languageFont="english"] - Font/script for subtitles
+ * @returns {Promise<Object>} Twick project JSON structure
+ * @throws {Error} If video processing, transcription, or project building fails
+ */
+export const createSubtitleProject = async (params) => {
+  const { videoSize, videoUrl, language, languageFont } = params;
+  const { audioBuffer, duration } = await extractAudioBufferFromVideo(videoUrl);
+  let subtitles = [];
+  if (!duration) {
+    throw new Error("Failed to get duration of video");
+  } else if (!audioBuffer) {
+    throw new Error("Failed to get audio buffer from video");
+  } else if (duration > 6) {
+    subtitles = await transcribeLong({ audioBuffer, language });
+  } else {
+    subtitles = await transcribeShort({ audioBuffer, language });
+  }
+  if (!subtitles.length) {
+    throw new Error("No subtitles found");
+  }
+  const project = buildProject({
+    subtitles,
+    duration,
+    videoUrl,
+    videoSize,
+    language,
+    languageFont,
+  });
+  console.log("Project built successfully");
+  return project;
+};
+/**
+ * Exports a Twick project JSON to Google Cloud Storage and returns a public URL.
+ * Uploads the project to GCS and generates a signed URL valid for 1 hour.
+ *
+ * @param {Object} project - Twick project JSON object
+ * @returns {Promise<string>} Signed URL to the exported project JSON file
+ * @throws {Error} If upload to GCS fails
+ */
+export const exportProject = async (project) => {
+    const projectData = JSON.stringify(project);
+    console.log("Project:", projectData);
+    const exportedProjectUrl = await uploadFile({
+      data: projectData,
+      folder: "projects",
+      fileName: `project-${Date.now()}.json`,
+      contentType: "application/json",
+      isPublic: true,
+    });
+    console.log("Project exported successfully");
+    console.log("Project exported to:", exportedProjectUrl);
+    return exportedProjectUrl;
+};

package/package.json ADDED Viewed

@@ -0,0 +1,61 @@
+{
+  "name": "@twick/cloud-subtitle-video",
+  "version": "0.15.1",
+  "description": "Twick cloud function for generating subtitle video from audio using Google Cloud Speech-to-Text",
+  "type": "module",
+  "main": "core/index.js",
+  "exports": {
+    ".": "./core/index.js",
+    "./aws": "./platform/aws/handler.js",
+    "./platform/aws/*": "./platform/aws/*"
+  },
+  "bin": {
+    "twick-subtitle-video": "bin/twick-subtitle-video.js"
+  },
+  "files": [
+    "core",
+    "core/index.js",
+    "platform",
+    "bin",
+    "README.md"
+  ],
+  "scripts": {
+    "test": "node --test test/*.test.js",
+    "verify:aws": "node -e \"require('fs').accessSync('platform/aws/Dockerfile'); require('fs').accessSync('platform/aws/handler.js'); console.log('AWS transcript function assets present')\"",
+    "pack:aws": "npm run verify:aws && npm pack",
+    "release:aws": "npm run verify:aws && npm publish --access public --tag aws",
+    "deploy:aws": "node scripts/deploy-aws.js",
+    "prepublishOnly": "npm run verify:aws"
+  },
+  "publishConfig": {
+    "access": "public",
+    "tag": "aws"
+  },
+  "keywords": [
+    "twick",
+    "audio",
+    "transcript",
+    "caption",
+    "lambda",
+    "aws",
+    "docker",
+    "google-cloud-speech"
+  ],
+  "author": "",
+  "license": "SEE LICENSE IN LICENSE.md",
+  "engines": {
+    "node": ">=20.0.0"
+  },
+  "dependencies": {
+    "@aws-sdk/client-secrets-manager": "^3.679.0",
+    "fluent-ffmpeg": "^2.1.2",
+    "@ffmpeg-installer/ffmpeg": "^1.1.0",
+    "@ffprobe-installer/ffprobe": "^1.1.0",
+    "@google-cloud/speech": "^7.2.1",
+    "@google-cloud/storage": "^7.18.0"
+  },
+  "devDependencies": {
+    "typescript": "~5.4.5",
+    "dotenv": "^16.4.5"
+  }
+}

package/platform/aws/Dockerfile ADDED Viewed

@@ -0,0 +1,14 @@
+FROM --platform=linux/amd64 public.ecr.aws/lambda/nodejs:20
+# Copy package files for better caching
+COPY package.json package-lock.json* ./
+RUN npm install
+# Copy source code
+COPY . ./
+# Default Lambda handler
+CMD ["platform/aws/handler.handler"]

package/platform/aws/handler.js ADDED Viewed

@@ -0,0 +1,105 @@
+import { createSubtitleProject, exportProject } from '@twick/cloud-subtitle-video';
+/**
+ * Creates a standardized JSON HTTP response with CORS headers.
+ *
+ * @param {number} statusCode - HTTP status code
+ * @param {Object} body - Response body object (will be JSON stringified)
+ * @returns {Object} Lambda response object with statusCode, headers, and body
+ */
+const jsonResponse = (statusCode, body) => ({
+  statusCode,
+  headers: {
+    'Content-Type': 'application/json',
+    'Access-Control-Allow-Origin': '*',
+    'Access-Control-Allow-Headers': 'Content-Type',
+    'Access-Control-Allow-Methods': 'POST, OPTIONS',
+  },
+  body: JSON.stringify(body),
+});
+/**
+ * AWS Lambda handler for creating subtitle video projects.
+ *
+ * Processes video URLs, transcribes audio using Google Speech-to-Text,
+ * and optionally exports projects to Google Cloud Storage.
+ *
+ * @param {Object} event - Lambda event object
+ * @param {string} [event.httpMethod] - HTTP method (for API Gateway integration)
+ * @param {Object|string} [event.arguments] - AppSync arguments object
+ * @param {string} [event.body] - JSON string body (for API Gateway)
+ * @param {string} [event.body.videoUrl] - Required: Publicly accessible video URL
+ * @param {Object} [event.body.videoSize] - Optional: Video dimensions {width, height}
+ * @param {string} [event.body.language] - Optional: Transcription language (default: "english")
+ * @param {string} [event.body.languageFont] - Optional: Font/script for subtitles
+ * @param {boolean} [event.body.shouldExport] - Optional: If true, exports project to GCS
+ * @returns {Promise<Object>} Lambda response object with statusCode, headers, and body
+ */
+export const handler = async (event) => {
+  console.log('Subtitle video function invoked');
+  console.log('Event:', JSON.stringify(event));
+  if (event.httpMethod === 'OPTIONS') {
+    return {
+      statusCode: 204,
+      headers: {
+        'Access-Control-Allow-Origin': '*',
+        'Access-Control-Allow-Headers': 'Content-Type',
+        'Access-Control-Allow-Methods': 'POST, OPTIONS',
+      },
+      body: '',
+    };
+  }
+  try {
+    const argumentsPayload =
+      event?.arguments ||
+      (event?.body ? JSON.parse(event.body) : {}) ||
+      {};
+    const { videoUrl, videoSize, language, languageFont, shouldExport } =
+      argumentsPayload;
+    if (!videoUrl) {
+      return jsonResponse(400, {
+        error: 'Missing required field: videoUrl',
+        expectedFormat: {
+          videoUrl:
+            'Publicly reachable video URL or "gs://bucket/object" for GCS',
+          videoSize: 'Optional video size (e.g., { "width": 1920, "height": 1080 })',
+          language: 'Optional language (e.g., "english", "hindi")',
+          languageFont: 'Optional font/script for subtitles (e.g., "english")',
+        },
+      });
+    }
+    const result = await createSubtitleProject({
+      videoUrl,
+      videoSize,
+      language,
+      languageFont,
+    });
+    console.log('Subtitle video project created successfully');
+    if (shouldExport) {
+      const project = await exportProject(result);
+      return jsonResponse(200, {
+        url: project,
+      });
+    } else {
+      return jsonResponse(200, {
+        project: result,
+      });
+    }
+  } catch (error) {
+    console.error('Error creating subtitle video project:', error);
+    return jsonResponse(500, {
+      error: 'Error creating subtitle video project',
+      message: error instanceof Error ? error.message : 'Unknown error',
+    });
+  }
+};