@vargai/sdk 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.github/workflows/ci.yml +23 -0
  2. package/.husky/README.md +102 -0
  3. package/.husky/commit-msg +9 -0
  4. package/.husky/pre-commit +12 -0
  5. package/.husky/pre-push +9 -0
  6. package/.size-limit.json +8 -0
  7. package/.test-hooks.ts +5 -0
  8. package/CONTRIBUTING.md +150 -0
  9. package/LICENSE.md +53 -0
  10. package/README.md +7 -0
  11. package/action/captions/index.ts +202 -12
  12. package/action/captions/tiktok.ts +538 -0
  13. package/action/cut/index.ts +119 -0
  14. package/action/fade/index.ts +116 -0
  15. package/action/merge/index.ts +177 -0
  16. package/action/remove/index.ts +184 -0
  17. package/action/split/index.ts +133 -0
  18. package/action/transition/index.ts +154 -0
  19. package/action/trim/index.ts +117 -0
  20. package/bun.lock +299 -8
  21. package/cli/commands/upload.ts +215 -0
  22. package/cli/index.ts +3 -1
  23. package/commitlint.config.js +22 -0
  24. package/index.ts +12 -0
  25. package/lib/ass.ts +547 -0
  26. package/lib/fal.ts +75 -1
  27. package/lib/ffmpeg.ts +400 -0
  28. package/lib/higgsfield/example.ts +22 -29
  29. package/lib/higgsfield/index.ts +3 -2
  30. package/lib/higgsfield/soul.ts +0 -5
  31. package/lib/remotion/SKILL.md +240 -21
  32. package/lib/remotion/cli.ts +34 -0
  33. package/package.json +20 -3
  34. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +83 -0
  35. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  36. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +98 -0
  37. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  38. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  39. package/scripts/.gitkeep +0 -0
  40. package/service/music/index.ts +29 -14
  41. package/tsconfig.json +1 -1
  42. package/utilities/s3.ts +2 -2
  43. package/HIGGSFIELD_REWRITE_SUMMARY.md +0 -300
  44. package/TEST_RESULTS.md +0 -122
  45. package/output.txt +0 -1
  46. package/scripts/produce-menopause-campaign.sh +0 -202
  47. package/test-import.ts +0 -7
  48. package/test-services.ts +0 -97
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint-and-format:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: oven-sh/setup-bun@v2
16
+ with:
17
+ bun-version: latest
18
+
19
+ - name: Install dependencies
20
+ run: bun install
21
+
22
+ - name: Check
23
+ run: bun run check
@@ -0,0 +1,102 @@
1
+ # Git Hooks Configuration
2
+
3
+ This project uses [Husky](https://typicode.github.io/husky/) to manage Git hooks for maintaining code quality and security.
4
+
5
+ ## Installed Hooks
6
+
7
+ ### `pre-commit`
8
+ Runs before each commit:
9
+ - **Gitleaks** - Scans staged files for secrets and credentials
10
+ - **Lint-staged** - Runs Biome linter/formatter on staged files
11
+
12
+ ### `commit-msg`
13
+ Validates commit messages:
14
+ - **Commitlint** - Enforces [Conventional Commits](https://www.conventionalcommits.org/) format
15
+
16
+ ### `pre-push`
17
+ Runs before pushing to remote:
18
+ - **TypeScript type checking** - Ensures no type errors before push
19
+
20
+ ## Commit Message Format
21
+
22
+ Follow the Conventional Commits specification:
23
+
24
+ ```
25
+ <type>(<scope>): <subject>
26
+
27
+ <body>
28
+
29
+ <footer>
30
+ ```
31
+
32
+ ### Types
33
+ - `feat`: New feature
34
+ - `fix`: Bug fix
35
+ - `docs`: Documentation changes
36
+ - `style`: Code style changes (formatting, etc)
37
+ - `refactor`: Code refactoring
38
+ - `perf`: Performance improvements
39
+ - `test`: Test changes
40
+ - `build`: Build system changes
41
+ - `ci`: CI/CD changes
42
+ - `chore`: Other changes
43
+ - `revert`: Revert previous commit
44
+
45
+ ### Examples
46
+ ```bash
47
+ feat: add video generation API
48
+ fix(transcribe): handle empty audio files
49
+ docs: update installation guide
50
+ refactor: simplify audio processing pipeline
51
+ ```
52
+
53
+ ## Available Scripts
54
+
55
+ ```bash
56
+ # Run linter
57
+ bun run lint
58
+
59
+ # Format code
60
+ bun run format
61
+
62
+ # Type check
63
+ bun run type-check
64
+
65
+ # Check bundle size
66
+ bun run size
67
+ ```
68
+
69
+ ## Bypassing Hooks
70
+
71
+ ⚠️ **Not recommended** - Only use when absolutely necessary:
72
+
73
+ ```bash
74
+ # Skip all hooks
75
+ git commit --no-verify -m "emergency fix"
76
+
77
+ # Skip specific checks by setting env vars
78
+ HUSKY=0 git commit -m "skip all hooks"
79
+ ```
80
+
81
+ ## Troubleshooting
82
+
83
+ If hooks aren't running:
84
+
85
+ ```bash
86
+ # Reinstall hooks
87
+ rm -rf .husky/_
88
+ bun run prepare
89
+ chmod +x .husky/pre-commit .husky/commit-msg .husky/pre-push
90
+ ```
91
+
92
+ ## Size Limits
93
+
94
+ Bundle size limits are defined in `.size-limit.json`. Check size before publishing:
95
+
96
+ ```bash
97
+ bun run size
98
+ ```
99
+
100
+
101
+
102
+
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env sh
2
+ . "$(dirname -- "$0")/_/husky.sh"
3
+
4
+ # Check commit message format
5
+ bunx --no -- commitlint --edit ${1}
6
+
7
+
8
+
9
+
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env sh
2
+ . "$(dirname -- "$0")/_/husky.sh"
3
+
4
+ # Check for secrets with gitleaks
5
+ gitleaks protect --staged
6
+
7
+ # Run linters on staged files
8
+ bunx lint-staged
9
+
10
+
11
+
12
+
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env sh
2
+ . "$(dirname -- "$0")/_/husky.sh"
3
+
4
+ # Run type checking before push
5
+ bun run type-check
6
+
7
+
8
+
9
+
@@ -0,0 +1,8 @@
1
+ [
2
+ {
3
+ "name": "SDK Main Export",
4
+ "path": "index.ts",
5
+ "limit": "50 KB",
6
+ "ignore": ["node_modules"]
7
+ }
8
+ ]
package/.test-hooks.ts ADDED
@@ -0,0 +1,5 @@
1
+ // Test file for git hooks validation
2
+ export function testHooks() {
3
+ console.log("Testing git hooks");
4
+ return true;
5
+ }
@@ -0,0 +1,150 @@
1
+ # Contributing to @vargai/sdk
2
+
3
+ ## Development Setup
4
+
5
+ This project uses **Bun** as the runtime and package manager.
6
+
7
+ ```bash
8
+ # Install dependencies
9
+ bun install
10
+
11
+ # Run linter
12
+ bun run lint
13
+
14
+ # Format code
15
+ bun run format
16
+
17
+ # Type check
18
+ bun run type-check
19
+
20
+ # Check bundle size
21
+ bun run size
22
+ ```
23
+
24
+ ## Git Workflow
25
+
26
+ This project uses automated Git hooks powered by [Husky](https://typicode.github.io/husky/) to maintain code quality and security.
27
+
28
+ ### Automated Checks
29
+
30
+ #### Before Commit (`pre-commit`)
31
+ 1. **Gitleaks** - Scans for secrets, API keys, and credentials
32
+ 2. **Biome** - Lints and formats staged files automatically
33
+
34
+ #### Commit Message (`commit-msg`)
35
+ 3. **Commitlint** - Validates commit message format
36
+
37
+ #### Before Push (`pre-push`)
38
+ 4. **TypeScript** - Type checking to catch errors early
39
+
40
+ ### Commit Message Format
41
+
42
+ We follow [Conventional Commits](https://www.conventionalcommits.org/) specification:
43
+
44
+ ```
45
+ <type>(<scope>): <subject>
46
+ ```
47
+
48
+ **Available types:**
49
+ - `feat`: New feature
50
+ - `fix`: Bug fix
51
+ - `docs`: Documentation only changes
52
+ - `style`: Code style changes (formatting, missing semi colons, etc)
53
+ - `refactor`: Code change that neither fixes a bug nor adds a feature
54
+ - `perf`: Performance improvements
55
+ - `test`: Adding or updating tests
56
+ - `build`: Changes to build system or dependencies
57
+ - `ci`: CI/CD configuration changes
58
+ - `chore`: Other changes that don't modify src or test files
59
+ - `revert`: Reverts a previous commit
60
+
61
+ **Examples:**
62
+ ```bash
63
+ feat: add voice cloning endpoint
64
+ feat(video): implement background removal
65
+ fix: handle null pointer in transcription
66
+ fix(captions): correct timestamp alignment
67
+ docs: update API documentation
68
+ refactor: simplify audio processing pipeline
69
+ perf(image): optimize memory usage
70
+ chore: update dependencies
71
+ ```
72
+
73
+ ### Code Style
74
+
75
+ We use [Biome](https://biomejs.dev/) for linting and formatting:
76
+
77
+ - **Automatic formatting** on commit via lint-staged
78
+ - **Manual formatting**: `bun run format`
79
+ - **Manual linting**: `bun run lint`
80
+
81
+ ### Bundle Size
82
+
83
+ SDK bundle size is monitored via size-limit:
84
+
85
+ ```bash
86
+ bun run size
87
+ ```
88
+
89
+ Current limits are defined in `.size-limit.json`.
90
+
91
+ ## Pull Request Process
92
+
93
+ 1. Fork the repository
94
+ 2. Create a feature branch: `git checkout -b feat/amazing-feature`
95
+ 3. Make your changes
96
+ 4. Commit with conventional format: `git commit -m "feat: add amazing feature"`
97
+ 5. Push to your fork: `git push origin feat/amazing-feature`
98
+ 6. Open a Pull Request
99
+
100
+ ### PR Guidelines
101
+
102
+ - ✅ All automated checks must pass
103
+ - ✅ Follow conventional commits format
104
+ - ✅ Update documentation if needed
105
+ - ✅ Add tests for new features
106
+ - ✅ Keep bundle size within limits
107
+
108
+ ## Bypassing Hooks
109
+
110
+ ⚠️ **Only in emergencies:**
111
+
112
+ ```bash
113
+ # Skip all hooks (not recommended)
114
+ git commit --no-verify -m "emergency fix"
115
+ ```
116
+
117
+ ## Troubleshooting
118
+
119
+ ### Hooks not running?
120
+
121
+ ```bash
122
+ # Reinstall hooks
123
+ bun run prepare
124
+ chmod +x .husky/*
125
+ ```
126
+
127
+ ### Type errors?
128
+
129
+ ```bash
130
+ bun run type-check
131
+ ```
132
+
133
+ ### Format issues?
134
+
135
+ ```bash
136
+ bun run format
137
+ ```
138
+
139
+ ### Secrets detected?
140
+
141
+ If gitleaks detects secrets:
142
+ 1. **DO NOT** bypass the hook
143
+ 2. Remove the secret from your code
144
+ 3. Use environment variables instead
145
+ 4. If already committed, rotate the exposed credentials
146
+
147
+ ## Questions?
148
+
149
+ Open an issue or reach out to the maintainers.
150
+
package/LICENSE.md ADDED
@@ -0,0 +1,53 @@
1
+ LICENSE
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
16
+
17
+ "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
18
+
19
+ "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
20
+
21
+ "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
22
+
23
+ "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
24
+
25
+ "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
26
+
27
+ "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
28
+
29
+ "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
30
+
31
+ 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
32
+
33
+ 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
34
+
35
+ 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
36
+
37
+ You must give any other recipients of the Work or Derivative Works a copy of this License; and
38
+ You must cause any modified files to carry prominent notices stating that You changed the files; and
39
+ You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
40
+ If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
41
+ You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
42
+
43
+ 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
44
+
45
+ 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
46
+
47
+ 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
48
+
49
+ 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
50
+
51
+ 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
52
+
53
+ END OF TERMS AND CONDITIONS
package/README.md CHANGED
@@ -229,3 +229,10 @@ fal.ai automatically filters content that may be nsfw:
229
229
  - ❌ "athletic wear" (vague, may trigger filter)
230
230
  - add "professional", "modest", "appropriate" to prompts
231
231
  - always check file sizes after batch generation (< 10KB = filtered)
232
+
233
+
234
+ ## License
235
+
236
+ Copyright © 2025 vargai Inc. and contributors
237
+ Licensed under the Apache License, Version 2.0.
238
+ See the [LICENSE](./LICENSE) file for details.
@@ -4,17 +4,26 @@
4
4
  * video captioning service
5
5
  * generates and overlays subtitles on videos using ffmpeg
6
6
  * supports auto-generation via groq/fireworks or custom srt files
7
+ *
8
+ * modes:
9
+ * - basic: standard SRT subtitles with customizable style
10
+ * - tiktok: word-by-word animated captions with bounce effects
7
11
  */
8
12
 
9
13
  import { existsSync } from "node:fs";
10
14
  import ffmpeg from "fluent-ffmpeg";
11
15
  import type { ActionMeta } from "../../cli/types";
12
16
  import { transcribe } from "../transcribe";
17
+ import {
18
+ addTikTokCaptions,
19
+ type TikTokCaptionItem,
20
+ type TikTokWordStyle,
21
+ } from "./tiktok";
13
22
 
14
23
  export const meta: ActionMeta = {
15
24
  name: "captions",
16
25
  type: "action",
17
- description: "add subtitles to video",
26
+ description: "add subtitles to video (basic or tiktok-style)",
18
27
  inputType: "video",
19
28
  outputType: "video",
20
29
  schema: {
@@ -32,10 +41,18 @@ export const meta: ActionMeta = {
32
41
  format: "file-path",
33
42
  description: "output video path",
34
43
  },
44
+ mode: {
45
+ type: "string",
46
+ enum: ["basic", "tiktok"],
47
+ default: "basic",
48
+ description:
49
+ "caption mode: basic (SRT) or tiktok (word-by-word animated)",
50
+ },
35
51
  srt: {
36
52
  type: "string",
37
53
  format: "file-path",
38
- description: "existing srt file (auto-generates if not provided)",
54
+ description:
55
+ "existing srt file (basic mode, auto-generates if not provided)",
39
56
  },
40
57
  provider: {
41
58
  type: "string",
@@ -43,28 +60,88 @@ export const meta: ActionMeta = {
43
60
  default: "fireworks",
44
61
  description: "transcription provider for auto-generation",
45
62
  },
63
+ // TikTok mode options
64
+ position: {
65
+ type: "string",
66
+ enum: ["upper-middle", "middle", "lower-middle", "top", "bottom"],
67
+ default: "upper-middle",
68
+ description: "tiktok: caption position on screen",
69
+ },
70
+ bounce: {
71
+ type: "number",
72
+ default: 1.12,
73
+ description: "tiktok: bounce scale for active word (1.0-1.5)",
74
+ },
75
+ noBounce: {
76
+ type: "boolean",
77
+ default: false,
78
+ description: "tiktok: disable bounce animation",
79
+ },
46
80
  },
47
81
  },
48
82
  output: { type: "string", format: "file-path", description: "video path" },
49
83
  },
50
84
  async run(options) {
51
- const { video, output, srt, provider } = options as {
52
- video: string;
53
- output: string;
54
- srt?: string;
55
- provider?: "groq" | "fireworks";
56
- };
57
- return addCaptions({ videoPath: video, output, srtPath: srt, provider });
85
+ const { video, output, srt, provider, mode, position, bounce, noBounce } =
86
+ options as {
87
+ video: string;
88
+ output: string;
89
+ srt?: string;
90
+ provider?: "groq" | "fireworks";
91
+ mode?: "basic" | "tiktok";
92
+ position?:
93
+ | "upper-middle"
94
+ | "middle"
95
+ | "lower-middle"
96
+ | "top"
97
+ | "bottom";
98
+ bounce?: number;
99
+ noBounce?: boolean;
100
+ };
101
+
102
+ const tiktokStyle: TikTokWordStyle | undefined =
103
+ mode === "tiktok"
104
+ ? {
105
+ position: position || "upper-middle",
106
+ bounceScale: bounce || 1.12,
107
+ useBounce: !noBounce,
108
+ }
109
+ : undefined;
110
+
111
+ return addCaptions({
112
+ videoPath: video,
113
+ output,
114
+ srtPath: srt,
115
+ provider,
116
+ mode,
117
+ tiktokStyle,
118
+ });
58
119
  },
59
120
  };
60
121
 
122
+ // re-export tiktok types for convenience
123
+ export {
124
+ addTikTokCaptions,
125
+ type TikTokCaptionItem,
126
+ type TikTokWordStyle,
127
+ } from "./tiktok";
128
+
61
129
  // types
62
130
  export interface AddCaptionsOptions {
63
131
  videoPath: string;
64
- srtPath?: string; // optional existing srt file
65
132
  output: string;
66
- provider?: "groq" | "fireworks"; // only used if srtPath not provided
133
+ /** Caption mode: basic (SRT) or tiktok (word-by-word animated) */
134
+ mode?: "basic" | "tiktok";
135
+ /** Existing srt file (auto-generates if not provided) - basic mode only */
136
+ srtPath?: string;
137
+ /** Transcription provider for auto-generation */
138
+ provider?: "groq" | "fireworks";
139
+ /** Style for basic mode */
67
140
  style?: SubtitleStyle;
141
+ /** Captions with word timings for tiktok mode (auto-generates from transcription if not provided) */
142
+ tiktokCaptions?: TikTokCaptionItem[];
143
+ /** Style for tiktok mode */
144
+ tiktokStyle?: TikTokWordStyle;
68
145
  }
69
146
 
70
147
  export interface SubtitleStyle {
@@ -92,7 +169,16 @@ const DEFAULT_STYLE: Required<SubtitleStyle> = {
92
169
  export async function addCaptions(
93
170
  options: AddCaptionsOptions,
94
171
  ): Promise<string> {
95
- const { videoPath, srtPath, output, provider = "fireworks", style } = options;
172
+ const {
173
+ videoPath,
174
+ srtPath,
175
+ output,
176
+ provider = "fireworks",
177
+ style,
178
+ mode = "basic",
179
+ tiktokCaptions,
180
+ tiktokStyle,
181
+ } = options;
96
182
 
97
183
  if (!videoPath) {
98
184
  throw new Error("videoPath is required");
@@ -104,6 +190,54 @@ export async function addCaptions(
104
190
  throw new Error(`video file not found: ${videoPath}`);
105
191
  }
106
192
 
193
+ // TikTok mode: use word-by-word animated captions
194
+ if (mode === "tiktok") {
195
+ console.log("[captions] using TikTok mode (word-by-word animated)...");
196
+
197
+ // If captions provided, use them directly
198
+ if (tiktokCaptions && tiktokCaptions.length > 0) {
199
+ return addTikTokCaptions({
200
+ videoPath,
201
+ output,
202
+ captions: tiktokCaptions,
203
+ style: tiktokStyle,
204
+ });
205
+ }
206
+
207
+ // Otherwise, auto-generate from transcription
208
+ console.log(`[captions] auto-generating word timings with ${provider}...`);
209
+
210
+ // Fireworks provides word-level timestamps, groq doesn't
211
+ if (provider === "groq") {
212
+ console.warn(
213
+ "[captions] warning: groq doesn't provide word-level timestamps, using fireworks instead",
214
+ );
215
+ }
216
+
217
+ // Import fireworks directly for word-level data
218
+ const { transcribeWithFireworks } = await import("../../lib/fireworks");
219
+
220
+ const data = await transcribeWithFireworks({ audioPath: videoPath });
221
+
222
+ if (!data.words || data.words.length === 0) {
223
+ throw new Error("transcription returned no word data");
224
+ }
225
+
226
+ // Convert fireworks words to tiktok captions
227
+ // Group words into phrases (max ~5-7 words per phrase)
228
+ const phrases = groupWordsIntoPhrases(data.words, 6);
229
+
230
+ console.log(`[captions] generated ${phrases.length} caption phrases`);
231
+
232
+ return addTikTokCaptions({
233
+ videoPath,
234
+ output,
235
+ captions: phrases,
236
+ style: tiktokStyle,
237
+ });
238
+ }
239
+
240
+ // Basic mode: use SRT subtitles
107
241
  console.log("[captions] adding captions to video...");
108
242
 
109
243
  // determine srt file path
@@ -162,6 +296,62 @@ export async function addCaptions(
162
296
  });
163
297
  }
164
298
 
299
+ /**
300
+ * Group words into phrases for TikTok captions
301
+ */
302
+ function groupWordsIntoPhrases(
303
+ words: Array<{ word: string; start: number; end: number }>,
304
+ maxWordsPerPhrase: number,
305
+ ): TikTokCaptionItem[] {
306
+ const phrases: TikTokCaptionItem[] = [];
307
+ let currentPhrase: Array<{ word: string; start: number; end: number }> = [];
308
+
309
+ for (const word of words) {
310
+ currentPhrase.push(word);
311
+
312
+ // Start new phrase after reaching max words or at sentence boundaries
313
+ const endsWithPunctuation = /[.!?]$/.test(word.word);
314
+
315
+ if (currentPhrase.length >= maxWordsPerPhrase || endsWithPunctuation) {
316
+ const first = currentPhrase[0];
317
+ const last = currentPhrase[currentPhrase.length - 1];
318
+ if (first && last) {
319
+ phrases.push({
320
+ text: currentPhrase.map((w) => w.word).join(" "),
321
+ start: first.start,
322
+ end: last.end,
323
+ words: currentPhrase.map((w) => ({
324
+ word: w.word,
325
+ start: w.start,
326
+ end: w.end,
327
+ })),
328
+ });
329
+ }
330
+ currentPhrase = [];
331
+ }
332
+ }
333
+
334
+ // Add remaining words
335
+ if (currentPhrase.length > 0) {
336
+ const first = currentPhrase[0];
337
+ const last = currentPhrase[currentPhrase.length - 1];
338
+ if (first && last) {
339
+ phrases.push({
340
+ text: currentPhrase.map((w) => w.word).join(" "),
341
+ start: first.start,
342
+ end: last.end,
343
+ words: currentPhrase.map((w) => ({
344
+ word: w.word,
345
+ start: w.start,
346
+ end: w.end,
347
+ })),
348
+ });
349
+ }
350
+ }
351
+
352
+ return phrases;
353
+ }
354
+
165
355
  // cli
166
356
  if (import.meta.main) {
167
357
  const { runCli } = await import("../../cli/runner");