@opensassi/opencode 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +4 -2
- package/package.json +2 -1
- package/skills/demo-video/SKILL.md +264 -0
- package/skills/demo-video/scripts/assemble.cjs +152 -0
- package/skills/demo-video/scripts/capture-browser.sh +64 -0
- package/skills/demo-video/scripts/capture-html.sh +48 -0
- package/skills/demo-video/scripts/generate-subs.cjs +75 -0
- package/skills/demo-video/scripts/generate-tts.sh +28 -0
- package/skills/demo-video/scripts/render-slide.cjs +61 -0
- package/skills/demo-video/scripts/render-terminal.cjs +138 -0
- package/skills/demo-video/scripts/setup.sh +44 -0
- package/skills/demo-video/test/assemble.test.js +100 -0
- package/skills/demo-video/test/capture-browser.test.js +71 -0
- package/skills/demo-video/test/capture-html.test.js +72 -0
- package/skills/demo-video/test/e2e-test.sh +302 -0
- package/skills/demo-video/test/fixtures/demo-scenes.json +36 -0
- package/skills/demo-video/test/fixtures/hello.output +2 -0
- package/skills/demo-video/test/fixtures/hello.timing +13 -0
- package/skills/demo-video/test/generate-subs.test.js +67 -0
- package/skills/demo-video/test/generate-tts.test.js +58 -0
- package/skills/demo-video/test/helpers/run-script.js +33 -0
- package/skills/demo-video/test/integration.test.js +110 -0
- package/skills/demo-video/test/jest.config.cjs +6 -0
- package/skills/demo-video/test/render-slide.test.js +79 -0
- package/skills/demo-video/test/render-terminal.test.js +87 -0
- package/skills/demo-video/test/setup.test.js +55 -0
- package/skills/opensassi/SKILL.md +14 -6
- package/skills-index.json +5 -0
package/AGENTS.md
CHANGED
|
@@ -9,6 +9,7 @@ All skills, scripts, and tooling are delivered via the npm package.
|
|
|
9
9
|
|---------|----------|
|
|
10
10
|
| `asm-optimizer` | SIMD/assembly optimization framework |
|
|
11
11
|
| `daily-evaluation` | Aggregate session evaluations into dashboards |
|
|
12
|
+
| `demo-video` | Produce narrated demo videos with multi-language subtitles |
|
|
12
13
|
| `git` | Rebase-based single-commit-per-session workflow |
|
|
13
14
|
| `issue` | GitHub issue management |
|
|
14
15
|
| `npm-optimizer` | Port an npm package to a C++ native addon |
|
|
@@ -23,8 +24,9 @@ All skills, scripts, and tooling are delivered via the npm package.
|
|
|
23
24
|
## Workflow
|
|
24
25
|
|
|
25
26
|
1. `skill opensassi` — Load the bootstrap skill. It exposes the full skills-index as a reference table.
|
|
26
|
-
2. Run `
|
|
27
|
-
3. Use the skill's commands. Scripts are run via `
|
|
27
|
+
2. Run `npm run opencode -- <skill-name>` to load any sub-skill. The agent reads the output as the skill's full instructions.
|
|
28
|
+
3. Use the skill's commands. Scripts are run via `npm run opencode -- run <path>` or `npm run opencode -- run --skill <name> <path>`.
|
|
29
|
+
- *Consumers of the published package use `npx @opensassi/opencode` instead of `npm run opencode --`.*
|
|
28
30
|
|
|
29
31
|
## Design Constraints
|
|
30
32
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opensassi/opencode",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Agent skill harness for opencode — bootstrap, system-design, git workflow, profiling, and more",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
"!**/*.spec.md"
|
|
23
23
|
],
|
|
24
24
|
"scripts": {
|
|
25
|
+
"opencode": "node bin/opencode.js",
|
|
25
26
|
"extract": "node scripts/extract-artifacts.js",
|
|
26
27
|
"extract:file": "node scripts/extract-artifacts.js --file",
|
|
27
28
|
"test-artifacts": "node scripts/test-artifacts.js",
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: demo-video
|
|
3
|
+
description: Produce narrated, edited demo videos from project outlines. Captures terminal TUI sessions via script(1), browser interactions via Playwright, then assembles clips with edge-tts narration and multi-language subtitles via ffmpeg.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Skill: demo-video
|
|
7
|
+
|
|
8
|
+
## Persona
|
|
9
|
+
|
|
10
|
+
You are a **senior DevOps and multimedia automation engineer**. Your role is to produce polished, narrated demonstration videos of software projects — from a high-level outline through to a final MP4 file with multi-language subtitles. You automate every step: planning scenes, recording terminal TUI output, capturing browser interactions, generating text-to-speech narration, and assembling the final video with transitions and subtitles.
|
|
11
|
+
|
|
12
|
+
You work **methodically** — always verify dependencies first, generate the scene file from an outline, record each scene independently, then assemble. You never produce a video without first checking the project's README, structure, and key commands.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## On Activation
|
|
17
|
+
|
|
18
|
+
1. **Read project context** — Read the README, package.json, recent git log, and key source files to understand what the project does and what should be shown.
|
|
19
|
+
2. **Check dependencies** — Run `setup.sh` to verify ffmpeg, edge-tts, and Playwright are available.
|
|
20
|
+
3. **Check for existing scene file** — Look for `demo-scenes.json` in the project root. If it exists, ask whether to re-use, revise, or regenerate.
|
|
21
|
+
4. **Report readiness** — Print the project name, a one-line summary, dependency status, and whether a scene file already exists.
|
|
22
|
+
5. **List available commands** — Show the 3 workflow phases and 8 available scripts.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Dependencies
|
|
27
|
+
|
|
28
|
+
The host machine must have:
|
|
29
|
+
|
|
30
|
+
| Tool | Install | Purpose |
|
|
31
|
+
|------|---------|---------|
|
|
32
|
+
| `ffmpeg ≥ 6.0` | `apt/brew/choco install ffmpeg` | Clip extraction, transitions, audio mixing, encoding |
|
|
33
|
+
| `edge-tts` | `pip install edge-tts` | Text-to-speech narration (Microsoft Edge engine) |
|
|
34
|
+
| `playwright` | `npx playwright install chromium` | Browser automation + HTML-to-video capture |
|
|
35
|
+
|
|
36
|
+
Run `setup.sh` to check all three and print install guidance for any missing tools.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Scene JSON Format
|
|
41
|
+
|
|
42
|
+
The scene file (`demo-scenes.json`) is the single source of truth. Generate it from the project outline, then pass it to the `record` and `produce` phases.
|
|
43
|
+
|
|
44
|
+
### Scene Types
|
|
45
|
+
|
|
46
|
+
| Type | Purpose |
|
|
47
|
+
|------|---------|
|
|
48
|
+
| `terminal_command` | Shows a command being executed inside a terminal TUI |
|
|
49
|
+
| `browser` | Opens a URL in Playwright, captures the viewport |
|
|
50
|
+
| `narration_only` | Full-screen slide with bullet points and TTS |
|
|
51
|
+
|
|
52
|
+
### Top-Level Structure
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"metadata": {
|
|
57
|
+
"title": "Demo Title",
|
|
58
|
+
"output_file": "demo-final.mp4",
|
|
59
|
+
"resolution": "1920x1080",
|
|
60
|
+
"frame_rate": 30
|
|
61
|
+
},
|
|
62
|
+
"languages": {
|
|
63
|
+
"en": { "voice": "en-US-AriaNeural" },
|
|
64
|
+
"zh": { "voice": "zh-CN-XiaoxiaoNeural" }
|
|
65
|
+
},
|
|
66
|
+
"scenes": [
|
|
67
|
+
{ "id": 1, "type": "terminal_command", ... },
|
|
68
|
+
{ "id": 2, "type": "browser", ... },
|
|
69
|
+
{ "id": 3, "type": "narration_only", ... }
|
|
70
|
+
]
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Scene Fields
|
|
75
|
+
|
|
76
|
+
**Common to all types:**
|
|
77
|
+
|
|
78
|
+
| Field | Type | Required | Description |
|
|
79
|
+
|-------|------|----------|-------------|
|
|
80
|
+
| `id` | integer | yes | Unique scene number |
|
|
81
|
+
| `type` | string | yes | `terminal_command`, `browser`, or `narration_only` |
|
|
82
|
+
| `narration` | object | yes | Map of language code → narration text (e.g. `{"en": "...", "zh": "..."}`) |
|
|
83
|
+
| `transition` | string | no | `fade`, `wipe_left`, `wipe_right`, `none` (default `fade`) |
|
|
84
|
+
| `transition_duration` | number | no | Transition length in seconds (default 0.5) |
|
|
85
|
+
|
|
86
|
+
**`terminal_command`:**
|
|
87
|
+
|
|
88
|
+
| Field | Type | Required | Description |
|
|
89
|
+
|-------|------|----------|-------------|
|
|
90
|
+
| `command` | string | yes | Shell command to execute |
|
|
91
|
+
| `cwd` | string | no | Working directory (default project root) |
|
|
92
|
+
| `speed` | number | no | Replay speed multiplier (default 3, range 1-10) |
|
|
93
|
+
| `duration` | number | no | Scene length in seconds (default: auto from command run time) |
|
|
94
|
+
|
|
95
|
+
**`browser`:**
|
|
96
|
+
|
|
97
|
+
| Field | Type | Required | Description |
|
|
98
|
+
|-------|------|----------|-------------|
|
|
99
|
+
| `url` | string | yes | URL to open |
|
|
100
|
+
| `duration` | number | yes | How long to show the browser (seconds) |
|
|
101
|
+
| `actions` | array | no | List of `{type, selector}` actions (click, scroll, type) |
|
|
102
|
+
|
|
103
|
+
**`narration_only`:**
|
|
104
|
+
|
|
105
|
+
| Field | Type | Required | Description |
|
|
106
|
+
|-------|------|----------|-------------|
|
|
107
|
+
| `bullets` | array | yes | Bullet point strings |
|
|
108
|
+
| `duration` | number | yes | Slide duration in seconds |
|
|
109
|
+
| `background_color` | string | no | CSS color (default `#1e1e2e`) |
|
|
110
|
+
| `text_color` | string | no | CSS color (default `#cdd6f4`) |
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Workflow
|
|
115
|
+
|
|
116
|
+
### Phase 1: Plan
|
|
117
|
+
|
|
118
|
+
**Goal:** Generate `demo-scenes.json` from a high-level outline.
|
|
119
|
+
|
|
120
|
+
1. Read the project's README, package.json, recent git log, and source layout.
|
|
121
|
+
2. Draft a scene-by-scene outline. Each scene should show one aspect of the project.
|
|
122
|
+
3. For each `terminal_command` scene, decide what command to run and what it will demonstrate.
|
|
123
|
+
4. For each `browser` scene, decide what URL to open and what UI interaction to show.
|
|
124
|
+
5. Write narration text in English for each scene.
|
|
125
|
+
6. **Translate narration** into all target languages (the agent's own LLM capacity handles translation).
|
|
126
|
+
7. Write the complete `demo-scenes.json` to disk.
|
|
127
|
+
|
|
128
|
+
**Output:** `demo-scenes.json`
|
|
129
|
+
|
|
130
|
+
### Phase 2: Record
|
|
131
|
+
|
|
132
|
+
**Goal:** Capture one video clip per scene.
|
|
133
|
+
|
|
134
|
+
For each scene in `demo-scenes.json`:
|
|
135
|
+
|
|
136
|
+
#### Record: `terminal_command`
|
|
137
|
+
|
|
138
|
+
1. Run the command with `script --timing` to capture authentic TUI output:
|
|
139
|
+
```bash
|
|
140
|
+
script --timing=/tmp/demo/scene_N.timing \
|
|
141
|
+
--flush /tmp/demo/scene_N.output \
|
|
142
|
+
-c "cd <cwd> && <command>"
|
|
143
|
+
```
|
|
144
|
+
2. Generate an accelerated terminal replay HTML:
|
|
145
|
+
```bash
|
|
146
|
+
npm run opencode -- run --skill demo-video render-terminal.cjs \
|
|
147
|
+
--timing /tmp/demo/scene_N.timing \
|
|
148
|
+
--output /tmp/demo/scene_N.output \
|
|
149
|
+
--command "<command>" \
|
|
150
|
+
--speed <speed> \
|
|
151
|
+
--html /tmp/demo/scene_N.html
|
|
152
|
+
```
|
|
153
|
+
3. Capture the HTML as a video clip via Playwright:
|
|
154
|
+
```bash
|
|
155
|
+
npm run opencode -- run --skill demo-video capture-html.sh \
|
|
156
|
+
--html /tmp/demo/scene_N.html \
|
|
157
|
+
--duration <duration> \
|
|
158
|
+
--output /tmp/demo/clips/scene_N.mp4
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
#### Record: `browser`
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
npm run opencode -- run --skill demo-video capture-browser.sh \
|
|
165
|
+
--url "<url>" \
|
|
166
|
+
--actions '<json>' \
|
|
167
|
+
--duration <duration> \
|
|
168
|
+
--output /tmp/demo/clips/scene_N.mp4
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### Record: `narration_only`
|
|
172
|
+
|
|
173
|
+
1. Generate an HTML slide:
|
|
174
|
+
```bash
|
|
175
|
+
npm run opencode -- run --skill demo-video render-slide.cjs \
|
|
176
|
+
--title "<scene title>" \
|
|
177
|
+
--bullets '<json>' \
|
|
178
|
+
--background "#1e1e2e" \
|
|
179
|
+
--foreground "#cdd6f4" \
|
|
180
|
+
--output /tmp/demo/scene_N.html
|
|
181
|
+
```
|
|
182
|
+
2. Capture as video:
|
|
183
|
+
```bash
|
|
184
|
+
npm run opencode -- run --skill demo-video capture-html.sh \
|
|
185
|
+
--html /tmp/demo/scene_N.html \
|
|
186
|
+
--duration <duration> \
|
|
187
|
+
--output /tmp/demo/clips/scene_N.mp4
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
**Output:** `/tmp/demo/clips/scene_1.mp4`, `scene_2.mp4`, ...
|
|
191
|
+
|
|
192
|
+
### Phase 3: Produce
|
|
193
|
+
|
|
194
|
+
**Goal:** Generate TTS audio, subtitle files, and assemble the final video.
|
|
195
|
+
|
|
196
|
+
#### Step 1 — Generate TTS audio (English only)
|
|
197
|
+
|
|
198
|
+
For each scene with non-empty English narration:
|
|
199
|
+
```bash
|
|
200
|
+
npm run opencode -- run --skill demo-video generate-tts.sh \
|
|
201
|
+
--text "<english narration text>" \
|
|
202
|
+
--voice en-US-AriaNeural \
|
|
203
|
+
--output /tmp/demo/audio/scene_N.mp3
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
#### Step 2 — Generate subtitle files (all languages)
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
npm run opencode -- run --skill demo-video generate-subs.cjs \
|
|
210
|
+
--scenes /tmp/demo/scenes.json \
|
|
211
|
+
--languages '["zh","es","de"]' \
|
|
212
|
+
--output-dir /tmp/demo/subs/
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Each language produces `demo.{lang}.srt` with subtitle timing derived from the scene durations and narration text.
|
|
216
|
+
|
|
217
|
+
#### Step 3 — Assemble final video
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
npm run opencode -- run --skill demo-video assemble.cjs \
|
|
221
|
+
--manifest /tmp/demo/manifest.json \
|
|
222
|
+
--output /tmp/demo/demo-final.mp4
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
The manifest is a JSON file listing all scene clips, transitions, and audio references. The assembly script:
|
|
226
|
+
1. Pads or trims each clip to match its narration audio duration
|
|
227
|
+
2. Applies crossfade transitions between consecutive clips
|
|
228
|
+
3. Mixes narration audio into the video track
|
|
229
|
+
4. Encodes as H.264 AAC in MP4 container
|
|
230
|
+
|
|
231
|
+
**Output:** `demo-final.mp4` + `demo-final.zh.srt` + `demo-final.es.srt` + ...
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Script Reference
|
|
236
|
+
|
|
237
|
+
All scripts are invoked via:
|
|
238
|
+
```bash
|
|
239
|
+
npm run opencode -- run --skill demo-video <script-name> [args...]
|
|
240
|
+
```
|
|
241
|
+
(Published consumers use `npx @opensassi/opencode run --skill demo-video <script-name> [args...]`)
|
|
242
|
+
|
|
243
|
+
| Script | Purpose |
|
|
244
|
+
|--------|---------|
|
|
245
|
+
| `setup.sh` | Check ffmpeg, edge-tts, Playwright; print install guidance |
|
|
246
|
+
| `render-terminal.cjs` | Convert `script --timing` output to accelerated terminal HTML replay |
|
|
247
|
+
| `render-slide.cjs` | Generate narration HTML slide with styled bullet points |
|
|
248
|
+
| `capture-html.sh` | Open HTML page in Playwright, record viewport as MP4 |
|
|
249
|
+
| `capture-browser.sh` | Navigate to URL in Playwright with optional interactions, record as MP4 |
|
|
250
|
+
| `generate-tts.sh` | Generate TTS MP3 via edge-tts from text |
|
|
251
|
+
| `generate-subs.cjs` | Generate SRT subtitle files from multi-language scene narration |
|
|
252
|
+
| `assemble.cjs` | Full ffmpeg assembly: concatenate clips, transitions, mix audio, encode |
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## Design Principles
|
|
257
|
+
|
|
258
|
+
- **Clips are independent** — Each scene is captured separately. A failed scene can be retried without re-recording the entire demo.
|
|
259
|
+
- **Timing collapse** — LLM thinking time and command execution pauses are collapsed via the `speed` multiplier (default 3×). The visual shows the TUI replaying faster than real time.
|
|
260
|
+
- **Subtitle-first for non-English** — One English audio track. Other languages get SRT subtitle files generated by the agent from translations. Languages with TTS support can optionally get full audio in a future enhancement.
|
|
261
|
+
- **Agent translates** — The agent's own LLM capacity generates all non-English narration text during the Plan phase. No external translation API is required.
|
|
262
|
+
- **Zero desktop recording** — No x11grab, no avfoundation, no screen noise. Terminal output is captured via `script --timing` and rendered as styled HTML. Browser content is captured directly via Playwright.
|
|
263
|
+
- **Cross-platform by default** — Playwright works on Linux, macOS, and Windows. No platform-specific capture code. ffmpeg and edge-tts are available on all three.
|
|
264
|
+
- **`--keep-raw`** — Pass `--keep-raw` to the assemble step to preserve intermediate clips, audio, and subtitle files for inspection or re-processing.
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
const fs = require('fs')
|
|
3
|
+
const path = require('path')
|
|
4
|
+
const { execSync } = require('child_process')
|
|
5
|
+
|
|
6
|
+
function parseArgs() {
|
|
7
|
+
const args = {}
|
|
8
|
+
for (let i = 2; i < process.argv.length; i += 2) {
|
|
9
|
+
const key = process.argv[i].replace(/^--/, '')
|
|
10
|
+
args[key] = process.argv[i + 1]
|
|
11
|
+
}
|
|
12
|
+
if (!args.mode || !args.output) {
|
|
13
|
+
console.error('Usage: assemble.cjs --mode <video-only|audio-only|final> --manifest <json> --output <file> [--keep-raw]')
|
|
14
|
+
console.error(' --mode video-only : concat scene clips, no audio')
|
|
15
|
+
console.error(' --mode audio-only : concat TTS audio files sequentially')
|
|
16
|
+
console.error(' --mode final : mix existing video_master + audio_master')
|
|
17
|
+
process.exit(1)
|
|
18
|
+
}
|
|
19
|
+
return args
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function getDuration(file) {
|
|
23
|
+
try {
|
|
24
|
+
const out = execSync(
|
|
25
|
+
`ffprobe -v error -show_entries format=duration -of csv=p=0 "${file}"`,
|
|
26
|
+
{ encoding: 'utf-8', timeout: 10000 }
|
|
27
|
+
).trim()
|
|
28
|
+
return parseFloat(out) || 0
|
|
29
|
+
} catch { return 0 }
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function assembleVideo(manifest, outputPath, keepRaw) {
|
|
33
|
+
const scenes = manifest.scenes || []
|
|
34
|
+
const workdir = path.dirname(path.resolve(outputPath))
|
|
35
|
+
|
|
36
|
+
// Pad/trim each clip to match target duration
|
|
37
|
+
const clipInfo = []
|
|
38
|
+
for (let i = 0; i < scenes.length; i++) {
|
|
39
|
+
const s = scenes[i]
|
|
40
|
+
if (!s.clip) { clipInfo.push(null); continue }
|
|
41
|
+
const clipDur = getDuration(s.clip)
|
|
42
|
+
const targetDur = s.duration || clipDur
|
|
43
|
+
const adjusted = path.join(workdir, `adjusted_${i}.mkv`)
|
|
44
|
+
const pad = targetDur - clipDur
|
|
45
|
+
if (pad > 0.1) {
|
|
46
|
+
execSync(`ffmpeg -y -i "${s.clip}" -filter:v "tpad=stop_mode=clone:stop_duration=${pad}" -an "${adjusted}"`, { stdio: 'ignore', timeout: 60000 })
|
|
47
|
+
} else if (pad < -0.1) {
|
|
48
|
+
execSync(`ffmpeg -y -i "${s.clip}" -filter:v "setpts=${(targetDur / clipDur).toFixed(3)}*PTS" -an "${adjusted}"`, { stdio: 'ignore', timeout: 60000 })
|
|
49
|
+
} else {
|
|
50
|
+
fs.copyFileSync(s.clip, adjusted)
|
|
51
|
+
}
|
|
52
|
+
clipInfo.push({ clip: adjusted, duration: targetDur })
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const valid = clipInfo.filter(c => c !== null)
|
|
56
|
+
if (valid.length === 0) throw new Error('No valid clips')
|
|
57
|
+
|
|
58
|
+
const raw = path.join(workdir, 'video_raw.mkv')
|
|
59
|
+
const inputArgs = valid.map(c => `-i "${c.clip}"`).join(' ')
|
|
60
|
+
const labels = valid.map((_, i) => `[${i}:v]`).join('')
|
|
61
|
+
execSync(
|
|
62
|
+
`ffmpeg -y ${inputArgs} -filter_complex "${labels}concat=n=${valid.length}:v=1:a=0[out]" -map "[out]" -c:v libx264 -preset veryfast -crf 28 "${raw}"`,
|
|
63
|
+
{ stdio: 'ignore', timeout: 120000 }
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
const outResolved = path.resolve(outputPath)
|
|
67
|
+
execSync(`ffmpeg -y -i "${raw}" -c:v libx264 -preset medium -crf 18 "${outResolved}"`, { stdio: 'ignore', timeout: 120000 })
|
|
68
|
+
console.log(`Video master: ${outResolved}`)
|
|
69
|
+
|
|
70
|
+
if (!keepRaw) {
|
|
71
|
+
for (const c of valid) { try { fs.unlinkSync(c.clip) } catch {} }
|
|
72
|
+
try { fs.unlinkSync(raw) } catch {}
|
|
73
|
+
try { fs.unlinkSync(concatList) } catch {}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function assembleAudio(manifest, outputPath, keepRaw) {
|
|
78
|
+
const scenes = manifest.scenes || []
|
|
79
|
+
const workdir = path.dirname(path.resolve(outputPath))
|
|
80
|
+
const audioFiles = scenes.map(s => s.audio).filter(Boolean)
|
|
81
|
+
|
|
82
|
+
if (audioFiles.length === 0) throw new Error('No audio files in manifest')
|
|
83
|
+
|
|
84
|
+
const concatList = path.join(workdir, 'audio_concat.txt')
|
|
85
|
+
const lines = audioFiles.map(f => `file '${f}'`)
|
|
86
|
+
fs.writeFileSync(concatList, lines.join('\n'), 'utf-8')
|
|
87
|
+
|
|
88
|
+
const outResolved = path.resolve(outputPath)
|
|
89
|
+
execSync(`ffmpeg -y -f concat -safe 0 -i "${concatList}" -c:a aac -b:a 128k "${outResolved}"`, { stdio: 'ignore', timeout: 120000 })
|
|
90
|
+
console.log(`Audio master: ${outResolved}`)
|
|
91
|
+
|
|
92
|
+
if (!keepRaw) {
|
|
93
|
+
try { fs.unlinkSync(concatList) } catch {}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function assembleFinal(args) {
|
|
98
|
+
const videoPath = path.resolve(args.video)
|
|
99
|
+
const audioPath = path.resolve(args.audio)
|
|
100
|
+
const outputPath = path.resolve(args.output)
|
|
101
|
+
const keepRaw = args['keep-raw'] === 'true'
|
|
102
|
+
|
|
103
|
+
if (!fs.existsSync(videoPath)) throw new Error(`Video not found: ${videoPath}`)
|
|
104
|
+
if (!fs.existsSync(audioPath)) throw new Error(`Audio not found: ${audioPath}`)
|
|
105
|
+
|
|
106
|
+
const vDur = getDuration(videoPath)
|
|
107
|
+
const aDur = getDuration(audioPath)
|
|
108
|
+
const diff = Math.abs(vDur - aDur)
|
|
109
|
+
|
|
110
|
+
if (diff > 0.5) {
|
|
111
|
+
throw new Error(`Video (${vDur.toFixed(2)}s) and audio (${aDur.toFixed(2)}s) mismatch by ${diff.toFixed(2)}s`)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const workdir = path.dirname(outputPath)
|
|
115
|
+
const mixed = path.join(workdir, 'mixed.mkv')
|
|
116
|
+
execSync(
|
|
117
|
+
`ffmpeg -y -i "${videoPath}" -i "${audioPath}" -c:v copy -c:a aac -b:a 128k -shortest "${mixed}"`,
|
|
118
|
+
{ stdio: 'ignore', timeout: 120000 }
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
const outResolved = path.resolve(outputPath)
|
|
122
|
+
execSync(
|
|
123
|
+
`ffmpeg -y -i "${mixed}" -c copy -movflags +faststart "${outResolved}"`,
|
|
124
|
+
{ stdio: 'ignore', timeout: 60000 }
|
|
125
|
+
)
|
|
126
|
+
console.log(`Final: ${outResolved}`)
|
|
127
|
+
|
|
128
|
+
if (!keepRaw) {
|
|
129
|
+
try { fs.unlinkSync(mixed) } catch {}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function main() {
|
|
134
|
+
const args = parseArgs()
|
|
135
|
+
const mode = args.mode
|
|
136
|
+
|
|
137
|
+
if (mode === 'video-only') {
|
|
138
|
+
const manifest = JSON.parse(fs.readFileSync(path.resolve(args.manifest), 'utf-8'))
|
|
139
|
+
assembleVideo(manifest, args.output, args['keep-raw'] === 'true')
|
|
140
|
+
} else if (mode === 'audio-only') {
|
|
141
|
+
const manifest = JSON.parse(fs.readFileSync(path.resolve(args.manifest), 'utf-8'))
|
|
142
|
+
assembleAudio(manifest, args.output, args['keep-raw'] === 'true')
|
|
143
|
+
} else if (mode === 'final') {
|
|
144
|
+
assembleFinal(args)
|
|
145
|
+
} else {
|
|
146
|
+
console.error(`Unknown mode: ${mode}`)
|
|
147
|
+
process.exit(1)
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (require.main === module) main()
|
|
152
|
+
module.exports = { assembleVideo, assembleAudio, assembleFinal, getDuration }
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
usage() {
|
|
5
|
+
echo "Usage: capture-browser.sh --url <url> [--actions <json>] --duration <sec> --output <file>"
|
|
6
|
+
exit 1
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
URL=""
|
|
10
|
+
ACTIONS="[]"
|
|
11
|
+
DURATION=""
|
|
12
|
+
OUTPUT=""
|
|
13
|
+
|
|
14
|
+
while [ $# -gt 0 ]; do
|
|
15
|
+
case "$1" in
|
|
16
|
+
--url) URL="$2"; shift 2 ;;
|
|
17
|
+
--actions) ACTIONS="$2"; shift 2 ;;
|
|
18
|
+
--duration) DURATION="$2"; shift 2 ;;
|
|
19
|
+
--output) OUTPUT="$2"; shift 2 ;;
|
|
20
|
+
*) echo "Unknown option: $1"; usage ;;
|
|
21
|
+
esac
|
|
22
|
+
done
|
|
23
|
+
|
|
24
|
+
[ -z "$URL" ] || [ -z "$DURATION" ] || [ -z "$OUTPUT" ] && usage
|
|
25
|
+
|
|
26
|
+
mkdir -p "$(dirname "$OUTPUT")"
|
|
27
|
+
|
|
28
|
+
node -e "
|
|
29
|
+
const { chromium } = require('playwright');
|
|
30
|
+
const actions = ${ACTIONS};
|
|
31
|
+
(async () => {
|
|
32
|
+
const browser = await chromium.launch({ headless: true });
|
|
33
|
+
const context = await browser.newContext({
|
|
34
|
+
viewport: { width: 1920, height: 1080 },
|
|
35
|
+
recordVideo: { dir: '$(dirname "$OUTPUT")', size: { width: 1920, height: 1080 } }
|
|
36
|
+
});
|
|
37
|
+
const page = await context.newPage();
|
|
38
|
+
const startMs = Date.now();
|
|
39
|
+
await page.goto('${URL}', { waitUntil: 'networkidle' });
|
|
40
|
+
|
|
41
|
+
if (actions.length > 0) {
|
|
42
|
+
for (const action of actions) {
|
|
43
|
+
const el = action.selector ? await page.\$(action.selector) : null;
|
|
44
|
+
if (!el) continue;
|
|
45
|
+
if (action.type === 'click') await el.click();
|
|
46
|
+
else if (action.type === 'type' && action.text) await el.fill(action.text);
|
|
47
|
+
else if (action.type === 'scroll') await page.evaluate((s) => window.scrollBy(0, s), action.y || 500);
|
|
48
|
+
await page.waitForTimeout(500);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const elapsed = (Date.now() - startMs) / 1000;
|
|
53
|
+
const remaining = Math.max(0, ${DURATION} - elapsed);
|
|
54
|
+
await page.waitForTimeout(remaining * 1000);
|
|
55
|
+
await context.close();
|
|
56
|
+
await browser.close();
|
|
57
|
+
const videoPath = await page.video().path();
|
|
58
|
+
require('fs').renameSync(videoPath, '${OUTPUT}');
|
|
59
|
+
console.log('Captured: ${OUTPUT}');
|
|
60
|
+
})();
|
|
61
|
+
" || {
|
|
62
|
+
echo "FAILED: Could not capture $URL"
|
|
63
|
+
exit 1
|
|
64
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
usage() {
|
|
5
|
+
echo "Usage: capture-html.sh --html <file> --duration <sec> --output <file>"
|
|
6
|
+
exit 1
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
HTML=""
|
|
10
|
+
DURATION=""
|
|
11
|
+
OUTPUT=""
|
|
12
|
+
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--html) HTML="$2"; shift 2 ;;
|
|
16
|
+
--duration) DURATION="$2"; shift 2 ;;
|
|
17
|
+
--output) OUTPUT="$2"; shift 2 ;;
|
|
18
|
+
*) echo "Unknown option: $1"; usage ;;
|
|
19
|
+
esac
|
|
20
|
+
done
|
|
21
|
+
|
|
22
|
+
[ -z "$HTML" ] || [ -z "$DURATION" ] || [ -z "$OUTPUT" ] && usage
|
|
23
|
+
[ ! -f "$HTML" ] && echo "HTML file not found: $HTML" && exit 1
|
|
24
|
+
|
|
25
|
+
mkdir -p "$(dirname "$OUTPUT")"
|
|
26
|
+
|
|
27
|
+
node -e "
|
|
28
|
+
const { chromium } = require('playwright');
|
|
29
|
+
const path = require('path');
|
|
30
|
+
const fs = require('fs');
|
|
31
|
+
(async () => {
|
|
32
|
+
const browser = await chromium.launch({ headless: true });
|
|
33
|
+
const context = await browser.newContext({
|
|
34
|
+
viewport: { width: 1920, height: 1080 },
|
|
35
|
+
deviceScaleFactor: 1,
|
|
36
|
+
recordVideo: { dir: '$(dirname "$OUTPUT")', size: { width: 1920, height: 1080 } }
|
|
37
|
+
});
|
|
38
|
+
const page = await context.newPage();
|
|
39
|
+
await page.goto('file://${HTML}');
|
|
40
|
+
await page.waitForTimeout(${DURATION} * 1000);
|
|
41
|
+
await context.close();
|
|
42
|
+
await browser.close();
|
|
43
|
+
const videoPath = await page.video().path();
|
|
44
|
+
const outPath = path.resolve('${OUTPUT}');
|
|
45
|
+
fs.renameSync(videoPath, outPath);
|
|
46
|
+
console.log('Captured: ' + outPath);
|
|
47
|
+
})().catch(e => { console.error('FAILED:', e.message); process.exit(1); });
|
|
48
|
+
"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
const fs = require('fs')
|
|
3
|
+
const path = require('path')
|
|
4
|
+
|
|
5
|
+
function parseArgs() {
|
|
6
|
+
const args = {}
|
|
7
|
+
for (let i = 2; i < process.argv.length; i += 2) {
|
|
8
|
+
const key = process.argv[i].replace(/^--/, '')
|
|
9
|
+
args[key] = process.argv[i + 1]
|
|
10
|
+
}
|
|
11
|
+
if (!args.scenes || !args.languages || !args['output-dir']) {
|
|
12
|
+
console.error('Usage: generate-subs.js --scenes <json> --languages <json> --output-dir <dir>')
|
|
13
|
+
process.exit(1)
|
|
14
|
+
}
|
|
15
|
+
return args
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function formatTime(seconds) {
|
|
19
|
+
const h = Math.floor(seconds / 3600)
|
|
20
|
+
const m = Math.floor((seconds % 3600) / 60)
|
|
21
|
+
const s = seconds % 60
|
|
22
|
+
const cs = Math.floor((s % 1) * 100)
|
|
23
|
+
const si = Math.floor(s)
|
|
24
|
+
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(si).padStart(2, '0')}.${String(cs).padStart(2, '0')}`
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function generateSrt(scenes, lang) {
|
|
28
|
+
const lines = []
|
|
29
|
+
let cueIdx = 1
|
|
30
|
+
let currentTime = 0
|
|
31
|
+
|
|
32
|
+
for (const scene of scenes) {
|
|
33
|
+
const text = (scene.narration && scene.narration[lang]) || (scene.narration && scene.narration.en) || ''
|
|
34
|
+
const duration = scene.duration || 5
|
|
35
|
+
if (!text) {
|
|
36
|
+
currentTime += duration
|
|
37
|
+
continue
|
|
38
|
+
}
|
|
39
|
+
const start = currentTime
|
|
40
|
+
currentTime += duration
|
|
41
|
+
lines.push(String(cueIdx))
|
|
42
|
+
lines.push(`${formatTime(start)} --> ${formatTime(currentTime)}`)
|
|
43
|
+
lines.push(text)
|
|
44
|
+
lines.push('')
|
|
45
|
+
cueIdx++
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return lines.join('\n')
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function main() {
|
|
52
|
+
const args = parseArgs()
|
|
53
|
+
const scenesPath = path.resolve(args.scenes)
|
|
54
|
+
const languages = JSON.parse(args.languages)
|
|
55
|
+
const outputDir = path.resolve(args['output-dir'])
|
|
56
|
+
|
|
57
|
+
if (!fs.existsSync(scenesPath)) {
|
|
58
|
+
console.error(`Scenes file not found: ${scenesPath}`)
|
|
59
|
+
process.exit(1)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const scenesData = JSON.parse(fs.readFileSync(scenesPath, 'utf-8'))
|
|
63
|
+
const scenes = scenesData.scenes || []
|
|
64
|
+
fs.mkdirSync(outputDir, { recursive: true })
|
|
65
|
+
|
|
66
|
+
for (const lang of languages) {
|
|
67
|
+
const srt = generateSrt(scenes, lang)
|
|
68
|
+
const outPath = path.join(outputDir, `demo.${lang}.srt`)
|
|
69
|
+
fs.writeFileSync(outPath, srt, 'utf-8')
|
|
70
|
+
console.log(`Wrote: ${outPath}`)
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (require.main === module) main()
|
|
75
|
+
module.exports = { generateSrt }
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
usage() {
|
|
5
|
+
echo "Usage: generate-tts.sh --text <text> --voice <name> --output <file>"
|
|
6
|
+
exit 1
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
TEXT=""
|
|
10
|
+
VOICE="en-US-AriaNeural"
|
|
11
|
+
OUTPUT=""
|
|
12
|
+
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--text) TEXT="$2"; shift 2 ;;
|
|
16
|
+
--voice) VOICE="$2"; shift 2 ;;
|
|
17
|
+
--output) OUTPUT="$2"; shift 2 ;;
|
|
18
|
+
*) echo "Unknown option: $1"; usage ;;
|
|
19
|
+
esac
|
|
20
|
+
done
|
|
21
|
+
|
|
22
|
+
[ -z "$TEXT" ] || [ -z "$OUTPUT" ] && usage
|
|
23
|
+
|
|
24
|
+
mkdir -p "$(dirname "$OUTPUT")"
|
|
25
|
+
|
|
26
|
+
python3 -m edge_tts --voice "$VOICE" --text "$TEXT" --write-media "$OUTPUT"
|
|
27
|
+
|
|
28
|
+
echo "Generated: $OUTPUT"
|