@twick/cloud-subtitle-video 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # @twick/cloud-subtitle-video
2
+
3
+ **Generate complete subtitle video projects from video URLs using Google Cloud Speech-to-Text.**
4
+
5
+ Automatically transcribes video audio, creates timed subtitle tracks, and optionally exports project JSONs to Google Cloud Storage. Perfect for programmatic subtitle generation at scale.
6
+
7
+ ## What Problem Does This Solve?
8
+
9
+ - **Automated subtitle generation** — Convert video URLs into complete Twick projects with timed subtitles
10
+ - **Word-level timing** — Precise subtitle placement using Google Speech-to-Text API
11
+ - **Serverless processing** — Deploy as AWS Lambda for automatic scaling
12
+ - **Multi-language support** — Generate subtitles in multiple languages and fonts
13
+
14
+ ## Input → Output
15
+
16
+ **Input:** Video URL + optional configuration
17
+ ```json
18
+ {
19
+ "videoUrl": "https://example.com/video.mp4",
20
+ "videoSize": { "width": 1920, "height": 1080 },
21
+ "language": "english",
22
+ "languageFont": "english",
23
+ "shouldExport": false
24
+ }
25
+ ```
26
+
27
+ **Output:** Complete Twick project JSON with video track + subtitle track
28
+ ```json
29
+ {
30
+ "properties": { "width": 1920, "height": 1080 },
31
+ "tracks": [
32
+ { "id": "video", "type": "video", "elements": [...] },
33
+ { "id": "subtitle", "type": "caption", "elements": [...] }
34
+ ],
35
+ "version": 1
36
+ }
37
+ ```
38
+
39
+ **Where it runs:** AWS Lambda container image (Linux/AMD64)
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ npm install -D @twick/cloud-subtitle-video
45
+ ```
46
+
47
+ ## Quick Start
48
+
49
+ ### 1. Scaffold AWS Lambda Template
50
+
51
+ ```bash
52
+ npx twick-subtitle-video init
53
+ ```
54
+
55
+ ### 2. Build Docker Image
56
+
57
+ ```bash
58
+ npx twick-subtitle-video build twick-subtitle-video:latest
59
+ ```
60
+
61
+ ### 3. Configure Google Cloud
62
+
63
+ **Required:**
64
+ - Google Cloud project with Speech-to-Text API enabled
65
+ - Service account with permissions:
66
+ - `roles/speech.client` (or `speech.batchRecognize`)
67
+ - `roles/storage.objectCreator`
68
+ - `roles/storage.objectViewer`
69
+
70
+ **Environment variables:**
71
+ - `GOOGLE_CLOUD_PROJECT` (required) — Your GCP project ID
72
+ - `GOOGLE_CLOUD_STORAGE_BUCKET` (optional) — GCS bucket for exports (default: `"twick-video"`)
73
+
74
+ **Credentials (choose one):**
75
+ - **AWS Secrets Manager** (recommended for Lambda):
76
+ - `GCP_SERVICE_ACCOUNT_SECRET_NAME` — Secret name containing GCP service account JSON
77
+ - `AWS_REGION` (optional) — Region for Secrets Manager (default: `"ap-south-1"`)
78
+ - **File-based** (alternative):
79
+ - `GOOGLE_APPLICATION_CREDENTIALS` — Path to service account JSON file
80
+
81
+ ### 4. Deploy to AWS Lambda
82
+
83
+ ```bash
84
+ # Login to ECR
85
+ npx twick-subtitle-video ecr-login us-east-1 YOUR_ACCOUNT_ID
86
+
87
+ # Push to ECR
88
+ npx twick-subtitle-video push twick-subtitle-video:latest us-east-1 YOUR_ACCOUNT_ID
89
+ ```
90
+
91
+ ## Deployment (High Level)
92
+
93
+ 1. **Scaffold** the Lambda container template
94
+ 2. **Configure** Google Cloud credentials (via Secrets Manager or file mount)
95
+ 3. **Set environment variables** (GCP project, bucket, etc.)
96
+ 4. **Build and push** Docker image to ECR
97
+ 5. **Create Lambda function** using the ECR image
98
+
99
+ The Lambda handler expects:
100
+ - **Event format:** `{ videoUrl, videoSize?, language?, languageFont?, shouldExport? }`
101
+ - **Response:** Twick project JSON (or GCS URL if `shouldExport: true`)
102
+
103
+ ## Programmatic Usage
104
+
105
+ Use the core functions directly:
106
+
107
+ ```js
108
+ import { createSubtitleProject } from '@twick/cloud-subtitle-video';
109
+
110
+ const project = await createSubtitleProject({
111
+ videoUrl: 'https://example.com/video.mp4',
112
+ videoSize: { width: 1920, height: 1080 },
113
+ language: 'english',
114
+ languageFont: 'english',
115
+ });
116
+
117
+ console.log(project.tracks); // Array of video and subtitle tracks
118
+ ```
119
+
120
+ ## Technical Details
121
+
122
+ - **API:** Google Cloud Speech-to-Text API v2
123
+ - **Model:** `"long"` (optimized for longer audio)
124
+ - **Audio format:** FLAC, 16kHz, mono
125
+ - **Features:** Word-level timing offsets for precise subtitle placement
126
+ - **Auto-selection:** Synchronous (short audio) or batch (long audio >6s)
127
+
128
+ For detailed setup instructions, see the complete deployment guide in the repository.
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env node
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname, join } from 'path';
4
+ import fs from 'fs';
5
+ import { spawn } from 'child_process';
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = dirname(__filename);
9
+ const pkgRoot = join(__dirname, '..');
10
+
11
+ function copyTemplate(destDir) {
12
+ const templateDir = join(pkgRoot, 'platform', 'aws');
13
+ if (!fs.existsSync(destDir)) fs.mkdirSync(destDir, { recursive: true });
14
+
15
+ // Create platform/aws directory structure to maintain consistency with CMD ["platform/aws/handler.handler"]
16
+ const platformAwsDir = join(destDir, 'platform', 'aws');
17
+ if (!fs.existsSync(platformAwsDir)) {
18
+ fs.mkdirSync(platformAwsDir, { recursive: true });
19
+ }
20
+
21
+ // Copy Dockerfile to root (it references platform/aws/handler.handler)
22
+ const dockerfileSrc = join(templateDir, 'Dockerfile');
23
+ const dockerfileDest = join(destDir, 'Dockerfile');
24
+ fs.copyFileSync(dockerfileSrc, dockerfileDest);
25
+
26
+ // Copy handler.js to platform/aws/ to match the CMD path
27
+ const handlerSrc = join(templateDir, 'handler.js');
28
+ const handlerDest = join(platformAwsDir, 'handler.js');
29
+ fs.copyFileSync(handlerSrc, handlerDest);
30
+
31
+ // Minimal package.json to enable docker layer caching (npm ci)
32
+ const pkgJsonPath = join(destDir, 'package.json');
33
+ if (!fs.existsSync(pkgJsonPath)) {
34
+ const pkg = {
35
+ name: 'twick-subtitle-video-runtime',
36
+ type: 'module',
37
+ dependencies: {
38
+ '@twick/cloud-subtitle-video': 'latest',
39
+ 'fluent-ffmpeg': '^2.1.2',
40
+ '@ffmpeg-installer/ffmpeg': '^1.1.0',
41
+ '@ffprobe-installer/ffprobe': '^1.1.0'
42
+ }
43
+ };
44
+ fs.writeFileSync(pkgJsonPath, JSON.stringify(pkg, null, 2));
45
+ }
46
+ }
47
+
48
+ function run(cmd, args, opts = {}) {
49
+ return new Promise((resolve, reject) => {
50
+ const ps = typeof cmd === 'string' && Array.isArray(args) && args.length === 0
51
+ ? spawn(cmd, { stdio: 'inherit', shell: true, ...opts })
52
+ : spawn(cmd, args, { stdio: 'inherit', shell: true, ...opts });
53
+ ps.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`${cmd} exited ${code}`))));
54
+ });
55
+ }
56
+
57
+ async function main() {
58
+ const [command, ...rest] = process.argv.slice(2);
59
+
60
+ if (!command || ['-h', '--help', 'help'].includes(command)) {
61
+ console.log(`
62
+ Usage: twick-subtitle-video <command> [options]
63
+
64
+ Commands:
65
+ init [dir] Scaffold AWS container template into [dir] (default: ./twick-subtitle-video-aws)
66
+ build <image> [dir] Docker build image from [dir] (default: ./twick-subtitle-video-aws)
67
+ ecr-login <region> <accountId> Login docker to ECR
68
+ push <image> <region> <accountId> Push image to ECR (repo must exist)
69
+
70
+ Examples:
71
+ twick-subtitle-video init
72
+ twick-subtitle-video build my-repo:latest
73
+ twick-subtitle-video ecr-login us-east-1 123456789012
74
+ twick-subtitle-video push my-repo:latest us-east-1 123456789012
75
+ `);
76
+ return;
77
+ }
78
+
79
+ if (command === 'init') {
80
+ const dir = rest[0] || 'twick-subtitle-video-aws';
81
+ copyTemplate(dir);
82
+ console.log(`✔ Scaffolded AWS runtime into ./${dir}`);
83
+ return;
84
+ }
85
+
86
+ if (command === 'build') {
87
+ const image = rest[0];
88
+ const dir = rest[1] || 'twick-subtitle-video-aws';
89
+ if (!image) throw new Error('Image name required. e.g., my-repo:latest');
90
+ // Build for linux/amd64 platform to avoid creating multi-arch manifest index
91
+ // This reduces the number of artifacts pushed to the registry
92
+ await run('docker', ['build', '--platform', 'linux/amd64', '-t', image, dir]);
93
+ return;
94
+ }
95
+
96
+ if (command === 'ecr-login') {
97
+ const region = rest[0];
98
+ const accountId = rest[1];
99
+ if (!region || !accountId) throw new Error('Usage: ecr-login <region> <accountId>');
100
+ const registry = `${accountId}.dkr.ecr.${region}.amazonaws.com`;
101
+ await run(`aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${registry}`, []);
102
+ return;
103
+ }
104
+
105
+ if (command === 'push') {
106
+ const image = rest[0];
107
+ const region = rest[1];
108
+ const accountId = rest[2];
109
+ if (!image || !region || !accountId) throw new Error('Usage: push <image> <region> <accountId>');
110
+ const [repo, tag = 'latest'] = image.split(':');
111
+ const registry = `${accountId}.dkr.ecr.${region}.amazonaws.com`;
112
+ const remote = `${registry}/${repo}:${tag}`;
113
+ await run('docker', ['tag', `${repo}:${tag}`, remote]);
114
+ await run('docker', ['push', remote]);
115
+ console.log(`✔ Pushed ${remote}`);
116
+ return;
117
+ }
118
+
119
+ throw new Error(`Unknown command: ${command}`);
120
+ }
121
+
122
+ main().catch((err) => {
123
+ console.error(err.message || err);
124
+ process.exit(1);
125
+ });
@@ -0,0 +1,96 @@
1
+ import fs from "fs";
2
+ import { join } from "path";
3
+ import { mkdtemp, readFile, rm } from "fs/promises";
4
+ import { tmpdir } from "os";
5
+ import { execFile } from "child_process";
6
+ import { promisify } from "util";
7
+ import { Readable, pipeline } from "stream";
8
+
9
+ // These packages provide prebuilt ffmpeg/ffprobe binaries. Types are not bundled,
10
+ // so we import them as `any` to keep TypeScript satisfied.
11
+ import ffmpeg from "@ffmpeg-installer/ffmpeg";
12
+ import ffprobe from "@ffprobe-installer/ffprobe";
13
+
14
+
15
+ const execFileAsync = promisify(execFile);
16
+ const pipelineAsync = promisify(pipeline);
17
+ const ffmpegPath = ffmpeg.path;
18
+ const ffprobePath = ffprobe.path;
19
+
20
+ /**
21
+ * Audio encoding configuration for different formats.
22
+ * Currently supports FLAC format optimized for Google Speech-to-Text API.
23
+ * @type {Object<string, Object>}
24
+ */
25
+ export const AUDIO_CONFIG = {
26
+ "FLAC": {
27
+ "codec": "flac",
28
+ "encoding": "FLAC",
29
+ "sampleRate": 16000,
30
+ "channelCount": 1,
31
+ "extension": "flac",
32
+ "contentType": "audio/flac",
33
+ },
34
+ }
35
+
36
+ /**
37
+ * Extracts audio from a video URL and converts it to a format suitable for transcription.
38
+ * Downloads the video, extracts audio using ffmpeg, and returns the audio buffer and duration.
39
+ *
40
+ * @param {string} videoUrl - Publicly accessible HTTP(S) URL to the video file
41
+ * @param {string} [format="FLAC"] - Audio output format (currently only "FLAC" supported)
42
+ * @returns {Promise<Object>} Object containing audioBuffer (Buffer) and duration (number in seconds)
43
+ * @throws {Error} If video download, extraction, or processing fails
44
+ */
45
+ export const extractAudioBufferFromVideo = async (videoUrl, format = "FLAC") => {
46
+ const videoResponse = await fetch(videoUrl);
47
+ if (!videoResponse.ok) {
48
+ throw new Error(`Failed to download video: ${videoResponse.status} ${videoResponse.statusText}`);
49
+ }
50
+ const tmpBase = await mkdtemp(join(tmpdir(), 'mcp-'));
51
+ const inputPath = join(tmpBase, 'input_video');
52
+ // Change extension to .flac
53
+ const outputPath = join(tmpBase, `output_audio.${format}`);
54
+
55
+ if (!videoResponse.body) {
56
+ await rm(tmpBase, { recursive: true, force: true });
57
+ throw new Error("Video response has no body");
58
+ }
59
+ const videoStream = Readable.fromWeb(videoResponse.body);
60
+ const fileWriteStream = fs.createWriteStream(inputPath);
61
+ await pipelineAsync(videoStream, fileWriteStream);
62
+
63
+ let duration = 0;
64
+ try {
65
+ const { stdout } = await execFileAsync(ffprobePath, [
66
+ '-v', 'error',
67
+ '-show_entries', 'format=duration',
68
+ '-of', 'default=noprint_wrappers=1:nokey=1',
69
+ inputPath
70
+ ]);
71
+ duration = parseFloat(stdout.toString().trim()) || 0;
72
+ } catch (err) {
73
+ console.warn('Failed to get duration using ffprobe');
74
+ }
75
+
76
+ try {
77
+ await execFileAsync(ffmpegPath, [
78
+ '-y',
79
+ '-i', inputPath,
80
+ '-vn', // Strip video
81
+ '-ac', '1', // Mono channel (Required for STT)
82
+ '-ar', AUDIO_CONFIG[format].sampleRate, // 16kHz is ideal for Chirp
83
+ '-c:a', AUDIO_CONFIG[format].codec, // Use FLAC codec
84
+ outputPath
85
+ ]);
86
+ } catch (err) {
87
+ await rm(tmpBase, { recursive: true, force: true });
88
+ const stderr = err?.stderr?.toString?.().trim?.() || "";
89
+ throw new Error(`ffmpeg extraction failed: ${stderr}`);
90
+ }
91
+
92
+ // Use the promise-based readFile for consistency
93
+ const audioBuffer = await readFile(outputPath);
94
+ await rm(tmpBase, { recursive: true, force: true });
95
+ return { audioBuffer, duration };
96
+ };
@@ -0,0 +1,177 @@
1
+ import { Storage } from "@google-cloud/storage";
2
+ import {
3
+ SecretsManagerClient,
4
+ GetSecretValueCommand,
5
+ } from "@aws-sdk/client-secrets-manager";
6
+ import fs from "fs";
7
+
8
+ /**
9
+ * Google Cloud Project ID. Can be set via GOOGLE_CLOUD_PROJECT environment variable.
10
+ * @type {string}
11
+ */
12
+ export const CLOUD_PROJECT_ID = process.env.GOOGLE_CLOUD_PROJECT;
13
+
14
+ /**
15
+ * Google Cloud region for Speech-to-Text API. Currently set to "global".
16
+ * @type {string}
17
+ */
18
+ export const CLOUD_REGION = "global";
19
+
20
+ export const AWS_REGION = process.env.AWS_REGION;
21
+ /**
22
+ * Google Cloud Storage bucket name for storing audio files and project exports.
23
+ * Can be set via GOOGLE_CLOUD_STORAGE_BUCKET environment variable.
24
+ * @type {string}
25
+ */
26
+ export const CLOUD_STORAGE_BUCKET = process.env.GOOGLE_CLOUD_STORAGE_BUCKET;
27
+
28
+ let googleCredentials = null;
29
+
30
+ /**
31
+ * Retrieves Google Cloud service account credentials from AWS Secrets Manager.
32
+ *
33
+ * If GCP_SERVICE_ACCOUNT_SECRET_NAME is set, fetches the JSON credentials from AWS Secrets Manager.
34
+ * If not set, returns undefined (useful when credentials are provided via GOOGLE_APPLICATION_CREDENTIALS).
35
+ *
36
+ * @returns {Promise<Object|undefined>} Parsed JSON credentials object or undefined
37
+ * @throws {Error} If fetching from Secrets Manager fails
38
+ */
39
+ export const getGoogleCredentials = async () => {
40
+ if (googleCredentials) {
41
+ return googleCredentials;
42
+ }
43
+ try {
44
+ const secretName = process.env.GCP_SERVICE_ACCOUNT_SECRET_NAME;
45
+ if (!secretName) {
46
+ console.log(
47
+ "No secret name configured, skipping Google credentials initialization"
48
+ );
49
+ return;
50
+ }
51
+
52
+ const client = new SecretsManagerClient({
53
+ region: process.env.AWS_REGION || "ap-south-1",
54
+ });
55
+
56
+ const response = await client.send(
57
+ new GetSecretValueCommand({
58
+ SecretId: secretName,
59
+ VersionStage: "AWSCURRENT", // VersionStage defaults to AWSCURRENT if unspecified
60
+ })
61
+ );
62
+ const parsedCredentials = JSON.parse(response.SecretString);
63
+
64
+ // Validate that the credentials contain required fields
65
+ if (!parsedCredentials.client_email) {
66
+ throw new Error(
67
+ `Invalid Google Cloud credentials: missing 'client_email' field. ` +
68
+ `The secret must contain a valid service account JSON with 'client_email', ` +
69
+ `'private_key', and 'type' fields.`
70
+ );
71
+ }
72
+
73
+ if (!parsedCredentials.private_key) {
74
+ throw new Error(
75
+ `Invalid Google Cloud credentials: missing 'private_key' field.`
76
+ );
77
+ }
78
+
79
+ if (parsedCredentials.type !== "service_account") {
80
+ console.warn(
81
+ `Warning: credentials type is '${parsedCredentials.type}', expected 'service_account'`
82
+ );
83
+ }
84
+
85
+ googleCredentials = parsedCredentials;
86
+ return googleCredentials;
87
+ } catch (error) {
88
+ console.error(
89
+ `Failed to initialize Google credentials from secret ::`,
90
+ error
91
+ );
92
+ throw error;
93
+ }
94
+ };
95
+
96
+ let storage = null;
97
+
98
+ /**
99
+ * Gets or initializes the Google Cloud Storage client instance.
100
+ *
101
+ * @returns {Promise<Storage>} Initialized Storage client
102
+ */
103
+ const getStorage = async () => {
104
+ if (!storage) {
105
+ storage = new Storage({
106
+ projectId: CLOUD_PROJECT_ID,
107
+ credentials: await getGoogleCredentials(),
108
+ });
109
+ }
110
+ return storage;
111
+ };
112
+
113
+ /**
114
+ * Uploads a file to Google Cloud Storage.
115
+ *
116
+ * @param {Object} params - Upload parameters
117
+ * @param {Buffer|string} params.data - File data to upload (Buffer or string)
118
+ * @param {string} [params.folder] - Optional folder path in the bucket
119
+ * @param {string} params.fileName - Name of the file to create
120
+ * @param {string} params.contentType - MIME type of the file
121
+ * @param {boolean} [params.isPublic=false] - If true, returns a signed URL valid for 1 hour
122
+ * @returns {Promise<string>} Public URL or signed URL (if isPublic=true) to the uploaded file
123
+ */
124
+ export const uploadFile = async ({
125
+ data,
126
+ folder,
127
+ fileName,
128
+ contentType,
129
+ isPublic = false,
130
+ }) => {
131
+ const bucket = (await getStorage()).bucket(CLOUD_STORAGE_BUCKET);
132
+ const bucketName = CLOUD_STORAGE_BUCKET;
133
+
134
+ // 2. Define the path including the folder 'content'
135
+ const destinationPath = `${folder ? `${folder}/` : ""}${fileName}`;
136
+ const file = bucket.file(destinationPath);
137
+
138
+ // 3. Save the file.
139
+ await file.save(data, {
140
+ contentType: contentType,
141
+ resumable: false,
142
+ });
143
+
144
+ if (isPublic) {
145
+ // Generate a signed URL valid for 1 hour instead of making the file public
146
+ const expires = new Date();
147
+ expires.setHours(expires.getHours() + 1); // 1 hour from now
148
+
149
+ const [signedUrl] = await file.getSignedUrl({
150
+ version: "v4",
151
+ action: "read",
152
+ expires: expires,
153
+ });
154
+ return signedUrl;
155
+ }
156
+
157
+ return `https://storage.googleapis.com/${bucketName}/${destinationPath}`;
158
+ };
159
+
160
+ /**
161
+ * Converts a Google Cloud Storage URL to a gs:// URI format.
162
+ *
163
+ * @param {string} URI - GCS URL (https://storage.googleapis.com/...) or gs:// URI
164
+ * @returns {string} gs:// URI format
165
+ * @throws {Error} If the URI format is invalid
166
+ */
167
+ export const getGCSUri = (URI) => {
168
+ if (URI.startsWith("https://storage.googleapis.com/")) {
169
+ const path = URI.replace("https://storage.googleapis.com/", "");
170
+ return `gs://${path}`;
171
+ } else if (!URI.startsWith("gs://")) {
172
+ throw new Error(
173
+ `Invalid audio URI format. Expected gs://bucket/path or https://storage.googleapis.com/bucket/path, got: ${URI}`
174
+ );
175
+ }
176
+ return URI;
177
+ };
package/core/index.js ADDED
@@ -0,0 +1 @@
1
+ export { createSubtitleProject, exportProject } from "./workflow.js";
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Builds a Twick subtitle video project JSON structure from transcription results.
3
+ *
4
+ * @param {Object} params - Project parameters
5
+ * @param {Array<Object>} params.subtitles - Array of subtitle objects with {t, s, e} properties
6
+ * @param {number} params.duration - Video duration in seconds
7
+ * @param {string} params.videoUrl - Source video URL
8
+ * @param {Object} [params.videoSize] - Video dimensions {width, height}
9
+ * @returns {Object} Twick project JSON structure with properties, tracks, and version
10
+ */
11
+ export const buildProject = (params) => {
12
+ const { subtitles, duration, videoUrl, videoSize } = params;
13
+
14
+ return {
15
+ properties: {
16
+ width: videoSize?.width || 720,
17
+ height: videoSize?.height || 1280,
18
+ },
19
+ tracks: [
20
+ {
21
+ id: "video",
22
+ type: "video",
23
+ elements: [
24
+ {
25
+ id: "video",
26
+ type: "video",
27
+ s: 0,
28
+ e: duration,
29
+ props: {
30
+ src: videoUrl,
31
+ width: videoSize?.width || 720,
32
+ height: videoSize?.height || 1280,
33
+ },
34
+ },
35
+ ],
36
+ },
37
+ {
38
+ id: "subtitle",
39
+ type: "caption",
40
+ props: {
41
+ capStyle: "highlight_bg",
42
+ font: {
43
+ size: 50,
44
+ weight: 700,
45
+ family: "Bangers",
46
+ },
47
+ colors: {
48
+ text: "#ffffff",
49
+ highlight: "#ff4081",
50
+ bgColor: "#444444",
51
+ },
52
+ lineWidth: 0.35,
53
+ stroke: "#000000",
54
+ fontWeight: 700,
55
+ shadowOffset: [-3, 3],
56
+ shadowColor: "#000000",
57
+ x: 0,
58
+ y: 200,
59
+ applyToAll: true,
60
+ },
61
+ elements: subtitles.map((subtitle, index) => ({
62
+ id: `subtitle-${index}`,
63
+ type: "caption",
64
+ s: subtitle.s / 1000,
65
+ e: subtitle.e / 1000,
66
+ t: subtitle.t,
67
+ })),
68
+ },
69
+ ],
70
+ version: 1,
71
+ };
72
+ };
@@ -0,0 +1,231 @@
1
+ import { SpeechClient } from "@google-cloud/speech/build/src/v2/index.js";
2
+ import {
3
+ CLOUD_PROJECT_ID,
4
+ CLOUD_REGION,
5
+ getGCSUri,
6
+ getGoogleCredentials,
7
+ uploadFile,
8
+ } from "./gc.utils.js";
9
+ import { AUDIO_CONFIG } from "./audio.utils.js";
10
+
11
+ /**
12
+ * Language code mapping for Google Speech-to-Text API.
13
+ * @type {Object<string, string>}
14
+ */
15
+ const LANGUAGE_CODE = {
16
+ english: "en-US",
17
+ };
18
+
19
+ /**
20
+ * Speech recognition model to use. "long" model is optimized for longer audio files.
21
+ * @type {string}
22
+ */
23
+ const MODEL = "long";
24
+
25
+ let speechClient = null;
26
+
27
+ /**
28
+ * Gets or initializes the Google Cloud Speech-to-Text client.
29
+ *
30
+ * @returns {Promise<SpeechClient>} Initialized SpeechClient instance
31
+ */
32
+ export const getSpeechClient = async () => {
33
+ if (!speechClient) {
34
+ speechClient = new SpeechClient({
35
+ projectId: CLOUD_PROJECT_ID,
36
+ region: CLOUD_REGION,
37
+ credentials: await getGoogleCredentials(),
38
+ });
39
+ }
40
+ return speechClient;
41
+ };
42
+
43
+ /**
44
+ * Recognizer resource path for Google Speech-to-Text API v2.
45
+ * @type {string}
46
+ */
47
+ const recognizer = `projects/${CLOUD_PROJECT_ID}/locations/${CLOUD_REGION}/recognizers/_`;
48
+
49
+ /**
50
+ * Processes Speech-to-Text API response and groups words into phrases of 4 words each.
51
+ *
52
+ * @param {Object} results - API response results object
53
+ * @returns {Array<Object>} Array of phrase objects with text, start time, end time, and word timings
54
+ */
55
+ const processResponse = (results) => {
56
+ // Extract words from response
57
+ const words = results?.alternatives?.[0]?.words || [];
58
+
59
+ if (words.length === 0) {
60
+ return [];
61
+ }
62
+
63
+ // Convert time offsets to milliseconds
64
+ const convertToMs = (offset) => {
65
+ if (!offset) return 0;
66
+ const seconds = Number(offset.seconds || 0);
67
+ const nanos = Number(offset.nanos || 0);
68
+ return seconds * 1000 + nanos / 1e6;
69
+ };
70
+
71
+ // Process words into individual word timings
72
+ const processedWords = words.map((w) => ({
73
+ word: w.word,
74
+ startMs: convertToMs(w.startOffset),
75
+ endMs: convertToMs(w.endOffset),
76
+ }));
77
+
78
+ // Group words into phrases of 4 words each
79
+ const phrases = [];
80
+ for (let i = 0; i < processedWords.length; i += 4) {
81
+ const group = processedWords.slice(i, i + 4);
82
+ const text = group.map((w) => w.word).join(" ");
83
+ const startMs = group[0].startMs;
84
+ const endMs = group[group.length - 1].endMs;
85
+ const wordStarts = group.map((w) => w.startMs);
86
+
87
+ phrases.push({
88
+ t: text,
89
+ s: Math.round(startMs),
90
+ e: Math.round(endMs),
91
+ w: wordStarts.map((ms) => Math.round(ms)),
92
+ });
93
+ }
94
+ return phrases;
95
+ };
96
+
97
+ /**
98
+ * Transcribes short audio (typically under 60 seconds) using Google Speech-to-Text API.
99
+ * Uses synchronous recognize method for faster processing.
100
+ *
101
+ * @param {Object} params - Transcription parameters
102
+ * @param {Buffer} params.audioBuffer - Audio data buffer (FLAC format)
103
+ * @param {string} [params.language="english"] - Language code (e.g., "english")
104
+ * @param {string} [params.format="FLAC"] - Audio format (currently only "FLAC" supported)
105
+ * @returns {Promise<Array<Object>>} Array of phrase objects with text, timings, and word offsets
106
+ * @throws {Error} If transcription fails
107
+ */
108
+ export async function transcribeShort({
109
+ audioBuffer,
110
+ language = "english",
111
+ format = "FLAC",
112
+ }) {
113
+ const client = await getSpeechClient();
114
+
115
+ const audioContent = audioBuffer.toString("base64");
116
+
117
+ const request = {
118
+ recognizer: recognizer,
119
+ config: {
120
+ explicitDecodingConfig: {
121
+ encoding: AUDIO_CONFIG[format].encoding,
122
+ sampleRateHertz: AUDIO_CONFIG[format].sampleRate,
123
+ audioChannelCount: 1,
124
+ },
125
+ languageCodes: [LANGUAGE_CODE[language]],
126
+ model: MODEL,
127
+ features: {
128
+ enableWordTimeOffsets: true,
129
+ },
130
+ },
131
+ content: audioContent,
132
+ };
133
+
134
+ try {
135
+ const [response] = await client.recognize(request);
136
+ return processResponse(response.results?.[0]);
137
+ } catch (err) {
138
+ console.error("Transcription Error:", err.message);
139
+ throw err;
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Transcribes long audio (typically over 60 seconds) using Google Speech-to-Text API.
145
+ * Uses asynchronous batchRecognize method and requires audio to be uploaded to GCS first.
146
+ *
147
+ * @param {Object} params - Transcription parameters
148
+ * @param {Buffer} [params.audioBuffer] - Audio data buffer (required if audioUrl not provided)
149
+ * @param {string} [params.audioUrl] - GCS URI (gs://) or HTTPS URL to audio file (required if audioBuffer not provided)
150
+ * @param {string} [params.language="english"] - Language code (e.g., "english")
151
+ * @param {string} [params.format="FLAC"] - Audio format (currently only "FLAC" supported)
152
+ * @returns {Promise<Array<Object>>} Array of phrase objects with text, timings, and word offsets
153
+ * @throws {Error} If transcription fails
154
+ */
155
+ export async function transcribeLong({
156
+ audioBuffer,
157
+ audioUrl,
158
+ language = "english",
159
+ format = "FLAC",
160
+ }) {
161
+ let gcsUri;
162
+ if (audioUrl) {
163
+ gcsUri = getGCSUri(audioUrl);
164
+ } else {
165
+ const audioUri = await uploadFile({
166
+ data: audioBuffer,
167
+ folder: "audio",
168
+ fileName: `audio-${Date.now()}.${AUDIO_CONFIG[format].extension}`,
169
+ contentType: AUDIO_CONFIG[format].contentType,
170
+ });
171
+ gcsUri = getGCSUri(audioUri);
172
+ }
173
+
174
+ console.log("GCS URI:", gcsUri);
175
+ const client = await getSpeechClient();
176
+
177
+ const request = {
178
+ recognizer: recognizer,
179
+ config: {
180
+ explicitDecodingConfig: {
181
+ encoding: AUDIO_CONFIG[format].encoding,
182
+ sampleRateHertz: AUDIO_CONFIG[format].sampleRate,
183
+ audioChannelCount: 1,
184
+ },
185
+ languageCodes: [LANGUAGE_CODE[language]],
186
+ model: MODEL,
187
+ features: {
188
+ enableWordTimeOffsets: true,
189
+ },
190
+ },
191
+ files: [
192
+ {
193
+ uri: gcsUri,
194
+ },
195
+ ],
196
+ recognitionOutputConfig: {
197
+ inlineResponseConfig: {},
198
+ },
199
+ };
200
+
201
+ try {
202
+ console.log("Waiting for operation to complete...");
203
+ const [operation] = await client.batchRecognize(request);
204
+ const [response] = await operation.promise();
205
+
206
+ // Extract results for the audio URI (use the GCS URI as the key)
207
+ const fileResult = response.results?.[gcsUri];
208
+ if (!fileResult || !fileResult.transcript) {
209
+ return [];
210
+ }
211
+
212
+ // Extract words from all results (batchRecognize can return multiple result segments)
213
+ const allPhrases = [];
214
+ const results = fileResult.transcript.results || [];
215
+
216
+ for (const result of results) {
217
+ const phrases = processResponse(result);
218
+ console.log("Phrases:", phrases);
219
+ console.log("Transcription Result:", result);
220
+ allPhrases.push(...phrases);
221
+ }
222
+
223
+ if (allPhrases.length === 0) {
224
+ return [];
225
+ }
226
+ return allPhrases;
227
+ } catch (err) {
228
+ console.error("Transcription Error:", err.message);
229
+ throw err;
230
+ }
231
+ }
@@ -0,0 +1,70 @@
1
+ import { extractAudioBufferFromVideo } from "./audio.utils.js";
2
+ import { uploadFile } from "./gc.utils.js";
3
+ import { buildProject } from "./project.utils.js";
4
+ import { transcribeLong, transcribeShort } from "./transcriber.js";
5
+
6
+ /**
7
+ * Creates a complete subtitle video project from a video URL.
8
+ * Downloads video, extracts audio, transcribes it using Google Speech-to-Text,
9
+ * and builds a Twick project JSON structure.
10
+ *
11
+ * @param {Object} params - Project creation parameters
12
+ * @param {string} params.videoUrl - Publicly accessible HTTP(S) URL to the video file
13
+ * @param {Object} [params.videoSize] - Video dimensions {width, height} (defaults to 720x1280)
14
+ * @param {string} [params.language="english"] - Transcription language code
15
+ * @param {string} [params.languageFont="english"] - Font/script for subtitles
16
+ * @returns {Promise<Object>} Twick project JSON structure
17
+ * @throws {Error} If video processing, transcription, or project building fails
18
+ */
19
+ export const createSubtitleProject = async (params) => {
20
+ const { videoSize, videoUrl, language, languageFont } = params;
21
+
22
+ const { audioBuffer, duration } = await extractAudioBufferFromVideo(videoUrl);
23
+ let subtitles = [];
24
+ if (!duration) {
25
+ throw new Error("Failed to get duration of video");
26
+ } else if (!audioBuffer) {
27
+ throw new Error("Failed to get audio buffer from video");
28
+ } else if (duration > 6) {
29
+ subtitles = await transcribeLong({ audioBuffer, language });
30
+ } else {
31
+ subtitles = await transcribeShort({ audioBuffer, language });
32
+ }
33
+ if (!subtitles.length) {
34
+ throw new Error("No subtitles found");
35
+ }
36
+ const project = buildProject({
37
+ subtitles,
38
+ duration,
39
+ videoUrl,
40
+ videoSize,
41
+ language,
42
+ languageFont,
43
+ });
44
+
45
+ console.log("Project built successfully");
46
+ return project;
47
+ };
48
+
49
+ /**
50
+ * Exports a Twick project JSON to Google Cloud Storage and returns a public URL.
51
+ * Uploads the project to GCS and generates a signed URL valid for 1 hour.
52
+ *
53
+ * @param {Object} project - Twick project JSON object
54
+ * @returns {Promise<string>} Signed URL to the exported project JSON file
55
+ * @throws {Error} If upload to GCS fails
56
+ */
57
+ export const exportProject = async (project) => {
58
+ const projectData = JSON.stringify(project);
59
+ console.log("Project:", projectData);
60
+ const exportedProjectUrl = await uploadFile({
61
+ data: projectData,
62
+ folder: "projects",
63
+ fileName: `project-${Date.now()}.json`,
64
+ contentType: "application/json",
65
+ isPublic: true,
66
+ });
67
+ console.log("Project exported successfully");
68
+ console.log("Project exported to:", exportedProjectUrl);
69
+ return exportedProjectUrl;
70
+ };
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "@twick/cloud-subtitle-video",
3
+ "version": "0.15.1",
4
+ "description": "Twick cloud function for generating subtitle video from audio using Google Cloud Speech-to-Text",
5
+ "type": "module",
6
+ "main": "core/index.js",
7
+ "exports": {
8
+ ".": "./core/index.js",
9
+ "./aws": "./platform/aws/handler.js",
10
+ "./platform/aws/*": "./platform/aws/*"
11
+ },
12
+ "bin": {
13
+ "twick-subtitle-video": "bin/twick-subtitle-video.js"
14
+ },
15
+ "files": [
16
+ "core",
17
+ "core/index.js",
18
+ "platform",
19
+ "bin",
20
+ "README.md"
21
+ ],
22
+ "scripts": {
23
+ "test": "node --test test/*.test.js",
24
+ "verify:aws": "node -e \"require('fs').accessSync('platform/aws/Dockerfile'); require('fs').accessSync('platform/aws/handler.js'); console.log('AWS transcript function assets present')\"",
25
+ "pack:aws": "npm run verify:aws && npm pack",
26
+ "release:aws": "npm run verify:aws && npm publish --access public --tag aws",
27
+ "deploy:aws": "node scripts/deploy-aws.js",
28
+ "prepublishOnly": "npm run verify:aws"
29
+ },
30
+ "publishConfig": {
31
+ "access": "public",
32
+ "tag": "aws"
33
+ },
34
+ "keywords": [
35
+ "twick",
36
+ "audio",
37
+ "transcript",
38
+ "caption",
39
+ "lambda",
40
+ "aws",
41
+ "docker",
42
+ "google-cloud-speech"
43
+ ],
44
+ "author": "",
45
+ "license": "SEE LICENSE IN LICENSE.md",
46
+ "engines": {
47
+ "node": ">=20.0.0"
48
+ },
49
+ "dependencies": {
50
+ "@aws-sdk/client-secrets-manager": "^3.679.0",
51
+ "fluent-ffmpeg": "^2.1.2",
52
+ "@ffmpeg-installer/ffmpeg": "^1.1.0",
53
+ "@ffprobe-installer/ffprobe": "^1.1.0",
54
+ "@google-cloud/speech": "^7.2.1",
55
+ "@google-cloud/storage": "^7.18.0"
56
+ },
57
+ "devDependencies": {
58
+ "typescript": "~5.4.5",
59
+ "dotenv": "^16.4.5"
60
+ }
61
+ }
@@ -0,0 +1,14 @@
1
+ FROM --platform=linux/amd64 public.ecr.aws/lambda/nodejs:20
2
+
3
+ # Copy package files for better caching
4
+ COPY package.json package-lock.json* ./
5
+
6
+ RUN npm install
7
+
8
+ # Copy source code
9
+ COPY . ./
10
+
11
+ # Default Lambda handler
12
+ CMD ["platform/aws/handler.handler"]
13
+
14
+
@@ -0,0 +1,105 @@
1
+ import { createSubtitleProject, exportProject } from '@twick/cloud-subtitle-video';
2
+
3
+ /**
4
+ * Creates a standardized JSON HTTP response with CORS headers.
5
+ *
6
+ * @param {number} statusCode - HTTP status code
7
+ * @param {Object} body - Response body object (will be JSON stringified)
8
+ * @returns {Object} Lambda response object with statusCode, headers, and body
9
+ */
10
+ const jsonResponse = (statusCode, body) => ({
11
+ statusCode,
12
+ headers: {
13
+ 'Content-Type': 'application/json',
14
+ 'Access-Control-Allow-Origin': '*',
15
+ 'Access-Control-Allow-Headers': 'Content-Type',
16
+ 'Access-Control-Allow-Methods': 'POST, OPTIONS',
17
+ },
18
+ body: JSON.stringify(body),
19
+ });
20
+
21
+ /**
22
+ * AWS Lambda handler for creating subtitle video projects.
23
+ *
24
+ * Processes video URLs, transcribes audio using Google Speech-to-Text,
25
+ * and optionally exports projects to Google Cloud Storage.
26
+ *
27
+ * @param {Object} event - Lambda event object
28
+ * @param {string} [event.httpMethod] - HTTP method (for API Gateway integration)
29
+ * @param {Object|string} [event.arguments] - AppSync arguments object
30
+ * @param {string} [event.body] - JSON string body (for API Gateway)
31
+ * @param {string} [event.body.videoUrl] - Required: Publicly accessible video URL
32
+ * @param {Object} [event.body.videoSize] - Optional: Video dimensions {width, height}
33
+ * @param {string} [event.body.language] - Optional: Transcription language (default: "english")
34
+ * @param {string} [event.body.languageFont] - Optional: Font/script for subtitles
35
+ * @param {boolean} [event.body.shouldExport] - Optional: If true, exports project to GCS
36
+ * @returns {Promise<Object>} Lambda response object with statusCode, headers, and body
37
+ */
38
+ export const handler = async (event) => {
39
+ console.log('Subtitle video function invoked');
40
+ console.log('Event:', JSON.stringify(event));
41
+
42
+ if (event.httpMethod === 'OPTIONS') {
43
+ return {
44
+ statusCode: 204,
45
+ headers: {
46
+ 'Access-Control-Allow-Origin': '*',
47
+ 'Access-Control-Allow-Headers': 'Content-Type',
48
+ 'Access-Control-Allow-Methods': 'POST, OPTIONS',
49
+ },
50
+ body: '',
51
+ };
52
+ }
53
+
54
+ try {
55
+ const argumentsPayload =
56
+ event?.arguments ||
57
+ (event?.body ? JSON.parse(event.body) : {}) ||
58
+ {};
59
+
60
+ const { videoUrl, videoSize, language, languageFont, shouldExport } =
61
+ argumentsPayload;
62
+
63
+ if (!videoUrl) {
64
+ return jsonResponse(400, {
65
+ error: 'Missing required field: videoUrl',
66
+ expectedFormat: {
67
+ videoUrl:
68
+ 'Publicly reachable video URL or "gs://bucket/object" for GCS',
69
+ videoSize: 'Optional video size (e.g., { "width": 1920, "height": 1080 })',
70
+ language: 'Optional language (e.g., "english", "hindi")',
71
+ languageFont: 'Optional font/script for subtitles (e.g., "english")',
72
+ },
73
+ });
74
+ }
75
+
76
+ const result = await createSubtitleProject({
77
+ videoUrl,
78
+ videoSize,
79
+ language,
80
+ languageFont,
81
+ });
82
+
83
+ console.log('Subtitle video project created successfully');
84
+
85
+ if (shouldExport) {
86
+ const project = await exportProject(result);
87
+ return jsonResponse(200, {
88
+ url: project,
89
+ });
90
+ } else {
91
+ return jsonResponse(200, {
92
+ project: result,
93
+ });
94
+ }
95
+ } catch (error) {
96
+ console.error('Error creating subtitle video project:', error);
97
+
98
+ return jsonResponse(500, {
99
+ error: 'Error creating subtitle video project',
100
+ message: error instanceof Error ? error.message : 'Unknown error',
101
+ });
102
+ }
103
+ };
104
+
105
+