@argo-video/cli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +2 -2
- package/dist/asset-server.d.ts +7 -0
- package/dist/asset-server.d.ts.map +1 -0
- package/dist/asset-server.js +66 -0
- package/dist/asset-server.js.map +1 -0
- package/dist/captions.d.ts +17 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +23 -0
- package/dist/captions.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +87 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +44 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +74 -0
- package/dist/config.js.map +1 -0
- package/dist/export.d.ts +18 -0
- package/dist/export.d.ts.map +1 -0
- package/dist/export.js +64 -0
- package/dist/export.js.map +1 -0
- package/dist/fixtures.d.ts +13 -0
- package/dist/fixtures.d.ts.map +1 -0
- package/dist/fixtures.js +36 -0
- package/dist/fixtures.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/dist/init.d.ts +2 -0
- package/dist/init.d.ts.map +1 -0
- package/{src/init.ts → dist/init.js} +39 -54
- package/dist/init.js.map +1 -0
- package/dist/narration.d.ts +9 -0
- package/dist/narration.d.ts.map +1 -0
- package/dist/narration.js +27 -0
- package/dist/narration.js.map +1 -0
- package/dist/overlays/index.d.ts +8 -0
- package/dist/overlays/index.d.ts.map +1 -0
- package/dist/overlays/index.js +34 -0
- package/dist/overlays/index.js.map +1 -0
- package/dist/overlays/manifest.d.ts +5 -0
- package/dist/overlays/manifest.d.ts.map +1 -0
- package/dist/overlays/manifest.js +52 -0
- package/dist/overlays/manifest.js.map +1 -0
- package/dist/overlays/motion.d.ts +4 -0
- package/dist/overlays/motion.d.ts.map +1 -0
- package/dist/overlays/motion.js +25 -0
- package/dist/overlays/motion.js.map +1 -0
- package/dist/overlays/templates.d.ts +7 -0
- package/dist/overlays/templates.d.ts.map +1 -0
- package/dist/overlays/templates.js +98 -0
- package/dist/overlays/templates.js.map +1 -0
- package/dist/overlays/types.d.ts +42 -0
- package/dist/overlays/types.d.ts.map +1 -0
- package/dist/overlays/types.js +25 -0
- package/dist/overlays/types.js.map +1 -0
- package/dist/overlays/zones.d.ts +15 -0
- package/dist/overlays/zones.d.ts.map +1 -0
- package/dist/overlays/zones.js +69 -0
- package/dist/overlays/zones.js.map +1 -0
- package/dist/pipeline.d.ts +3 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +93 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/record.d.ts +14 -0
- package/dist/record.d.ts.map +1 -0
- package/dist/record.js +100 -0
- package/dist/record.js.map +1 -0
- package/dist/tts/align.d.ts +17 -0
- package/dist/tts/align.d.ts.map +1 -0
- package/dist/tts/align.js +40 -0
- package/dist/tts/align.js.map +1 -0
- package/dist/tts/cache.d.ts +31 -0
- package/dist/tts/cache.d.ts.map +1 -0
- package/dist/tts/cache.js +51 -0
- package/dist/tts/cache.js.map +1 -0
- package/dist/tts/engine.d.ts +41 -0
- package/dist/tts/engine.d.ts.map +1 -0
- package/dist/tts/engine.js +108 -0
- package/dist/tts/engine.js.map +1 -0
- package/dist/tts/generate.d.ts +20 -0
- package/dist/tts/generate.d.ts.map +1 -0
- package/dist/tts/generate.js +58 -0
- package/dist/tts/generate.js.map +1 -0
- package/dist/tts/kokoro.d.ts +13 -0
- package/dist/tts/kokoro.d.ts.map +1 -0
- package/dist/tts/kokoro.js +46 -0
- package/dist/tts/kokoro.js.map +1 -0
- package/package.json +13 -1
- package/.claude/settings.local.json +0 -34
- package/DESIGN.md +0 -261
- package/docs/enhancement-proposal.md +0 -262
- package/docs/superpowers/plans/2026-03-12-argo.md +0 -208
- package/docs/superpowers/plans/2026-03-12-editorial-overlay-system.md +0 -1560
- package/docs/superpowers/plans/2026-03-13-npm-rename-skill-showcase.md +0 -499
- package/docs/superpowers/specs/2026-03-13-npm-rename-skill-showcase-design.md +0 -109
- package/skills/argo-demo-creator.md +0 -355
- package/src/asset-server.ts +0 -81
- package/src/captions.ts +0 -36
- package/src/cli.ts +0 -97
- package/src/config.ts +0 -125
- package/src/export.ts +0 -93
- package/src/fixtures.ts +0 -50
- package/src/index.ts +0 -41
- package/src/narration.ts +0 -31
- package/src/overlays/index.ts +0 -54
- package/src/overlays/manifest.ts +0 -68
- package/src/overlays/motion.ts +0 -27
- package/src/overlays/templates.ts +0 -121
- package/src/overlays/types.ts +0 -73
- package/src/overlays/zones.ts +0 -82
- package/src/pipeline.ts +0 -120
- package/src/record.ts +0 -123
- package/src/tts/align.ts +0 -75
- package/src/tts/cache.ts +0 -65
- package/src/tts/engine.ts +0 -147
- package/src/tts/generate.ts +0 -83
- package/src/tts/kokoro.ts +0 -51
- package/tests/asset-server.test.ts +0 -67
- package/tests/captions.test.ts +0 -76
- package/tests/cli.test.ts +0 -131
- package/tests/config.test.ts +0 -150
- package/tests/e2e/fake-server.ts +0 -45
- package/tests/e2e/record.e2e.test.ts +0 -131
- package/tests/export.test.ts +0 -155
- package/tests/fixtures.test.ts +0 -74
- package/tests/init.test.ts +0 -77
- package/tests/narration.test.ts +0 -120
- package/tests/overlays/index.test.ts +0 -73
- package/tests/overlays/manifest.test.ts +0 -120
- package/tests/overlays/motion.test.ts +0 -34
- package/tests/overlays/templates.test.ts +0 -69
- package/tests/overlays/types.test.ts +0 -36
- package/tests/overlays/zones.test.ts +0 -49
- package/tests/pipeline.test.ts +0 -177
- package/tests/record.test.ts +0 -87
- package/tests/tts/align.test.ts +0 -118
- package/tests/tts/cache.test.ts +0 -110
- package/tests/tts/engine.test.ts +0 -204
- package/tests/tts/generate.test.ts +0 -177
- package/tests/tts/kokoro.test.ts +0 -25
- package/tsconfig.json +0 -19
package/src/pipeline.ts
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
import { readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
-
import { execFileSync } from 'node:child_process';
|
|
3
|
-
import { join } from 'node:path';
|
|
4
|
-
import { generateClips } from './tts/generate.js';
|
|
5
|
-
import { record } from './record.js';
|
|
6
|
-
import { alignClips, type ClipInfo, type SceneTiming } from './tts/align.js';
|
|
7
|
-
import { parseWavHeader, createWavBuffer } from './tts/engine.js';
|
|
8
|
-
import { exportVideo, checkFfmpeg } from './export.js';
|
|
9
|
-
import type { ArgoConfig } from './config.js';
|
|
10
|
-
|
|
11
|
-
function getVideoDurationMs(videoPath: string): number {
|
|
12
|
-
let raw: string;
|
|
13
|
-
try {
|
|
14
|
-
raw = execFileSync(
|
|
15
|
-
'ffprobe',
|
|
16
|
-
['-v', 'error', '-show_entries', 'format=duration', '-of', 'csv=p=0', videoPath],
|
|
17
|
-
{ encoding: 'utf-8' },
|
|
18
|
-
).trim();
|
|
19
|
-
} catch (err) {
|
|
20
|
-
throw new Error(
|
|
21
|
-
`Failed to get video duration from ${videoPath}. ` +
|
|
22
|
-
`Ensure ffprobe is installed (it usually comes with ffmpeg). ` +
|
|
23
|
-
`Original error: ${(err as Error).message}`
|
|
24
|
-
);
|
|
25
|
-
}
|
|
26
|
-
const durationMs = Math.round(parseFloat(raw) * 1000);
|
|
27
|
-
if (isNaN(durationMs) || durationMs <= 0) {
|
|
28
|
-
throw new Error(
|
|
29
|
-
`ffprobe returned invalid duration "${raw}" for ${videoPath}. The video file may be corrupt.`
|
|
30
|
-
);
|
|
31
|
-
}
|
|
32
|
-
return durationMs;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export async function runPipeline(
|
|
36
|
-
demoName: string,
|
|
37
|
-
config: Pick<ArgoConfig, 'baseURL' | 'demosDir' | 'outputDir' | 'tts' | 'video' | 'export'>
|
|
38
|
-
): Promise<string> {
|
|
39
|
-
if (!config.baseURL) {
|
|
40
|
-
throw new Error(
|
|
41
|
-
'baseURL is required but not set. Set it in argo.config.js or pass --config.'
|
|
42
|
-
);
|
|
43
|
-
}
|
|
44
|
-
if (!config.tts.engine) {
|
|
45
|
-
throw new Error('TTS engine is not configured. Ensure config.tts.engine is set.');
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
checkFfmpeg();
|
|
49
|
-
|
|
50
|
-
const argoDir = join('.argo', demoName);
|
|
51
|
-
|
|
52
|
-
// Step 1: Generate TTS clips
|
|
53
|
-
console.log('Step 1/4: Generating TTS clips...');
|
|
54
|
-
const clipResults = await generateClips({
|
|
55
|
-
manifestPath: `${config.demosDir}/${demoName}.voiceover.json`,
|
|
56
|
-
demoName,
|
|
57
|
-
engine: config.tts.engine,
|
|
58
|
-
projectRoot: '.',
|
|
59
|
-
defaults: { voice: config.tts.defaultVoice, speed: config.tts.defaultSpeed },
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
if (clipResults.length === 0) {
|
|
63
|
-
throw new Error(
|
|
64
|
-
`No TTS clips were generated from ${config.demosDir}/${demoName}.voiceover.json. ` +
|
|
65
|
-
`Ensure the manifest contains at least one entry.`
|
|
66
|
-
);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// Step 2: Record browser demo
|
|
70
|
-
console.log('Step 2/4: Recording browser demo...');
|
|
71
|
-
const { timingPath } = await record(demoName, {
|
|
72
|
-
demosDir: config.demosDir,
|
|
73
|
-
baseURL: config.baseURL,
|
|
74
|
-
video: { width: config.video.width, height: config.video.height },
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
// Step 3: Align clips with timing
|
|
78
|
-
console.log('Step 3/4: Aligning narration with video...');
|
|
79
|
-
const timing: SceneTiming = JSON.parse(readFileSync(timingPath, 'utf-8'));
|
|
80
|
-
|
|
81
|
-
// Load WAV clips into memory
|
|
82
|
-
const clips: ClipInfo[] = clipResults.map((cr) => {
|
|
83
|
-
const wavBuf = readFileSync(cr.clipPath);
|
|
84
|
-
const header = parseWavHeader(wavBuf);
|
|
85
|
-
// Extract Float32 samples from the data chunk
|
|
86
|
-
const sampleCount = header.dataSize / 4; // 32-bit float = 4 bytes
|
|
87
|
-
const samples = new Float32Array(sampleCount);
|
|
88
|
-
for (let i = 0; i < sampleCount && header.dataOffset + i * 4 + 3 < wavBuf.length; i++) {
|
|
89
|
-
samples[i] = wavBuf.readFloatLE(header.dataOffset + i * 4);
|
|
90
|
-
}
|
|
91
|
-
return {
|
|
92
|
-
scene: cr.scene,
|
|
93
|
-
durationMs: header.durationMs,
|
|
94
|
-
samples,
|
|
95
|
-
};
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
// Use actual video duration for alignment
|
|
99
|
-
const videoPath = join(argoDir, 'video.webm');
|
|
100
|
-
const totalDurationMs = getVideoDurationMs(videoPath);
|
|
101
|
-
|
|
102
|
-
const aligned = alignClips(timing, clips, totalDurationMs);
|
|
103
|
-
const alignedWav = createWavBuffer(aligned.samples, 24_000);
|
|
104
|
-
const alignedPath = join(argoDir, 'narration-aligned.wav');
|
|
105
|
-
writeFileSync(alignedPath, alignedWav);
|
|
106
|
-
|
|
107
|
-
// Step 4: Export final video
|
|
108
|
-
console.log('Step 4/4: Exporting final video...');
|
|
109
|
-
const outputPath = await exportVideo({
|
|
110
|
-
demoName,
|
|
111
|
-
argoDir: '.argo',
|
|
112
|
-
outputDir: config.outputDir,
|
|
113
|
-
preset: config.export.preset,
|
|
114
|
-
crf: config.export.crf,
|
|
115
|
-
fps: config.video.fps,
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
console.log(`Done! Video saved to: ${outputPath}`);
|
|
119
|
-
return outputPath;
|
|
120
|
-
}
|
package/src/record.ts
DELETED
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
import { execFile } from 'node:child_process';
|
|
2
|
-
import { mkdirSync, readdirSync, copyFileSync, existsSync, rmSync, writeFileSync } from 'node:fs';
|
|
3
|
-
import path from 'node:path';
|
|
4
|
-
import { startAssetServer, type AssetServer } from './asset-server.js';
|
|
5
|
-
import { loadOverlayManifest, hasImageAssets } from './overlays/manifest.js';
|
|
6
|
-
|
|
7
|
-
export interface RecordOptions {
|
|
8
|
-
demosDir: string;
|
|
9
|
-
baseURL: string;
|
|
10
|
-
video: { width: number; height: number };
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface RecordResult {
|
|
14
|
-
videoPath: string;
|
|
15
|
-
timingPath: string;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
function findVideoInResults(testResultsDir: string): string | undefined {
|
|
19
|
-
if (!existsSync(testResultsDir)) return undefined;
|
|
20
|
-
for (const entry of readdirSync(testResultsDir, { recursive: true })) {
|
|
21
|
-
const name = typeof entry === 'string' ? entry : entry.toString();
|
|
22
|
-
if (name.endsWith('.webm')) {
|
|
23
|
-
return path.join(testResultsDir, name);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
return undefined;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
function createPlaywrightConfig(options: RecordOptions, outputDir: string): string {
|
|
30
|
-
const demosDir = path.resolve(options.demosDir);
|
|
31
|
-
const { width, height } = options.video;
|
|
32
|
-
|
|
33
|
-
return `import { defineConfig } from '@playwright/test';
|
|
34
|
-
|
|
35
|
-
export default defineConfig({
|
|
36
|
-
preserveOutput: 'always',
|
|
37
|
-
outputDir: ${JSON.stringify(outputDir)},
|
|
38
|
-
projects: [
|
|
39
|
-
{
|
|
40
|
-
name: 'demos',
|
|
41
|
-
testDir: ${JSON.stringify(demosDir)},
|
|
42
|
-
testMatch: '**/*.demo.ts',
|
|
43
|
-
use: {
|
|
44
|
-
baseURL: ${JSON.stringify(options.baseURL)},
|
|
45
|
-
viewport: { width: ${width}, height: ${height} },
|
|
46
|
-
video: {
|
|
47
|
-
mode: 'on',
|
|
48
|
-
size: { width: ${width}, height: ${height} },
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
},
|
|
52
|
-
],
|
|
53
|
-
});
|
|
54
|
-
`;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
export async function record(demoName: string, options: RecordOptions): Promise<RecordResult> {
|
|
58
|
-
const argoDir = path.join('.argo', demoName);
|
|
59
|
-
mkdirSync(argoDir, { recursive: true });
|
|
60
|
-
|
|
61
|
-
const videoPath = path.join(argoDir, 'video.webm');
|
|
62
|
-
const timingPath = path.join(argoDir, '.timing.json');
|
|
63
|
-
const testResultsDir = path.resolve('test-results');
|
|
64
|
-
const recordConfigPath = path.join(argoDir, 'playwright.record.config.mjs');
|
|
65
|
-
|
|
66
|
-
writeFileSync(recordConfigPath, createPlaywrightConfig(options, testResultsDir), 'utf-8');
|
|
67
|
-
|
|
68
|
-
// Clean test-results to avoid picking up stale videos
|
|
69
|
-
rmSync(testResultsDir, { recursive: true, force: true });
|
|
70
|
-
|
|
71
|
-
// Start asset server if overlay manifest has image assets
|
|
72
|
-
let assetServer: AssetServer | undefined;
|
|
73
|
-
const overlayManifestPath = path.join(options.demosDir, `${demoName}.overlays.json`);
|
|
74
|
-
const overlayEntries = await loadOverlayManifest(overlayManifestPath);
|
|
75
|
-
if (overlayEntries && hasImageAssets(overlayEntries)) {
|
|
76
|
-
const assetDir = path.join(options.demosDir, 'assets');
|
|
77
|
-
assetServer = await startAssetServer(assetDir);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
try {
|
|
81
|
-
return await new Promise<RecordResult>((resolve, reject) => {
|
|
82
|
-
execFile('npx', ['playwright', 'test', '--config', recordConfigPath, '--grep', demoName, '--project', 'demos'], {
|
|
83
|
-
env: {
|
|
84
|
-
...process.env,
|
|
85
|
-
ARGO_DEMO_NAME: demoName,
|
|
86
|
-
ARGO_OUTPUT_DIR: argoDir,
|
|
87
|
-
BASE_URL: options.baseURL,
|
|
88
|
-
ARGO_ASSET_URL: assetServer?.url ?? '',
|
|
89
|
-
},
|
|
90
|
-
}, (error, stdout, stderr) => {
|
|
91
|
-
if (error) {
|
|
92
|
-
const output = [stdout, stderr].filter(Boolean).join('\n');
|
|
93
|
-
reject(new Error(`Playwright recording failed:\n${output}`));
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// Copy the video from test-results/ to .argo/<demo>/video.webm
|
|
98
|
-
const found = findVideoInResults(testResultsDir);
|
|
99
|
-
if (!found) {
|
|
100
|
-
reject(new Error(
|
|
101
|
-
`No video recording found in test-results/. ` +
|
|
102
|
-
`Ensure playwright.config.ts has video: 'on' or video: { mode: 'on' }.`
|
|
103
|
-
));
|
|
104
|
-
return;
|
|
105
|
-
}
|
|
106
|
-
copyFileSync(found, videoPath);
|
|
107
|
-
|
|
108
|
-
// Verify timing file was written by the narration fixture
|
|
109
|
-
if (!existsSync(timingPath)) {
|
|
110
|
-
reject(new Error(
|
|
111
|
-
`No timing file found at ${timingPath}. ` +
|
|
112
|
-
`Ensure the demo uses the argo test fixture with narration.mark() calls.`
|
|
113
|
-
));
|
|
114
|
-
return;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
resolve({ videoPath, timingPath });
|
|
118
|
-
});
|
|
119
|
-
});
|
|
120
|
-
} finally {
|
|
121
|
-
if (assetServer) await assetServer.close();
|
|
122
|
-
}
|
|
123
|
-
}
|
package/src/tts/align.ts
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
export type SceneTiming = Record<string, number>;
|
|
2
|
-
|
|
3
|
-
export interface ClipInfo {
|
|
4
|
-
scene: string;
|
|
5
|
-
durationMs: number;
|
|
6
|
-
samples: Float32Array;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export interface Placement {
|
|
10
|
-
scene: string;
|
|
11
|
-
startMs: number;
|
|
12
|
-
endMs: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export interface AlignResult {
|
|
16
|
-
placements: Placement[];
|
|
17
|
-
samples: Float32Array;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
const OVERLAP_GAP_MS = 100;
|
|
21
|
-
|
|
22
|
-
export function alignClips(
|
|
23
|
-
timing: SceneTiming,
|
|
24
|
-
clips: ClipInfo[],
|
|
25
|
-
totalDurationMs: number,
|
|
26
|
-
sampleRate = 24_000,
|
|
27
|
-
): AlignResult {
|
|
28
|
-
// 1. Filter to clips with matching scenes
|
|
29
|
-
const matched = clips.filter((c) => c.scene in timing);
|
|
30
|
-
const unmatched = clips.filter((c) => !(c.scene in timing));
|
|
31
|
-
if (unmatched.length > 0) {
|
|
32
|
-
const names = unmatched.map((c) => c.scene).join(', ');
|
|
33
|
-
console.warn(
|
|
34
|
-
`Warning: ${unmatched.length} clip(s) have no matching scene in timing and will be skipped: ${names}. ` +
|
|
35
|
-
`Check that voiceover manifest scene names match narration.mark() calls.`
|
|
36
|
-
);
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// 2. Sort by scene timestamp ascending
|
|
40
|
-
matched.sort((a, b) => timing[a.scene] - timing[b.scene]);
|
|
41
|
-
|
|
42
|
-
// 3. Place each clip, preventing overlap
|
|
43
|
-
const placements: Placement[] = [];
|
|
44
|
-
let previousEndMs = 0;
|
|
45
|
-
|
|
46
|
-
for (const clip of matched) {
|
|
47
|
-
let startMs = timing[clip.scene];
|
|
48
|
-
|
|
49
|
-
// If this would overlap the previous clip, push forward
|
|
50
|
-
if (placements.length > 0 && startMs < previousEndMs) {
|
|
51
|
-
startMs = previousEndMs + OVERLAP_GAP_MS;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const endMs = startMs + clip.durationMs;
|
|
55
|
-
placements.push({ scene: clip.scene, startMs, endMs });
|
|
56
|
-
previousEndMs = endMs;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// 4. Create silence buffer
|
|
60
|
-
const totalSamples = Math.round((totalDurationMs / 1000) * sampleRate);
|
|
61
|
-
const output = new Float32Array(totalSamples);
|
|
62
|
-
|
|
63
|
-
// 5. Mix each clip's samples into output
|
|
64
|
-
for (let i = 0; i < placements.length; i++) {
|
|
65
|
-
const placement = placements[i];
|
|
66
|
-
const clip = matched[i];
|
|
67
|
-
const startSample = Math.round((placement.startMs / 1000) * sampleRate);
|
|
68
|
-
|
|
69
|
-
for (let j = 0; j < clip.samples.length && startSample + j < totalSamples; j++) {
|
|
70
|
-
output[startSample + j] += clip.samples[j];
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
return { placements, samples: output };
|
|
75
|
-
}
|
package/src/tts/cache.ts
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Content-addressed clip cache for Argo TTS output.
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import crypto from 'node:crypto';
|
|
6
|
-
import fs from 'node:fs';
|
|
7
|
-
import path from 'node:path';
|
|
8
|
-
|
|
9
|
-
export interface ManifestEntry {
|
|
10
|
-
scene: string;
|
|
11
|
-
text: string;
|
|
12
|
-
voice?: string;
|
|
13
|
-
speed?: number;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export class ClipCache {
|
|
17
|
-
private readonly projectRoot: string;
|
|
18
|
-
|
|
19
|
-
constructor(projectRoot: string) {
|
|
20
|
-
this.projectRoot = projectRoot;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Returns the full file path for a cached clip.
|
|
25
|
-
*/
|
|
26
|
-
getClipPath(demoName: string, entry: ManifestEntry): string {
|
|
27
|
-
const hash = this.computeHash(entry);
|
|
28
|
-
return path.join(this.projectRoot, '.argo', demoName, 'clips', `${hash}.wav`);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Checks whether a clip is already cached on disk.
|
|
33
|
-
*/
|
|
34
|
-
isCached(demoName: string, entry: ManifestEntry): boolean {
|
|
35
|
-
return fs.existsSync(this.getClipPath(demoName, entry));
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Returns the cached WAV buffer, or null if not cached.
|
|
40
|
-
*/
|
|
41
|
-
getCachedClip(demoName: string, entry: ManifestEntry): Buffer | null {
|
|
42
|
-
const clipPath = this.getClipPath(demoName, entry);
|
|
43
|
-
if (!fs.existsSync(clipPath)) {
|
|
44
|
-
return null;
|
|
45
|
-
}
|
|
46
|
-
return fs.readFileSync(clipPath);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/**
|
|
50
|
-
* Writes a WAV buffer to the cache, creating directories as needed.
|
|
51
|
-
*/
|
|
52
|
-
cacheClip(demoName: string, entry: ManifestEntry, wavBuffer: Buffer): void {
|
|
53
|
-
const clipPath = this.getClipPath(demoName, entry);
|
|
54
|
-
fs.mkdirSync(path.dirname(clipPath), { recursive: true });
|
|
55
|
-
fs.writeFileSync(clipPath, wavBuffer);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
private computeHash(entry: ManifestEntry): string {
|
|
59
|
-
const { scene, text, voice, speed } = entry;
|
|
60
|
-
return crypto
|
|
61
|
-
.createHash('sha256')
|
|
62
|
-
.update(JSON.stringify({ scene, text, voice, speed }))
|
|
63
|
-
.digest('hex');
|
|
64
|
-
}
|
|
65
|
-
}
|
package/src/tts/engine.ts
DELETED
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TTS Engine interface and WAV utilities for Argo.
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
export interface TTSEngineOptions {
|
|
6
|
-
voice?: string;
|
|
7
|
-
speed?: number;
|
|
8
|
-
lang?: string;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export interface TTSEngine {
|
|
12
|
-
generate(text: string, options: TTSEngineOptions): Promise<Buffer>;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Creates a valid WAV file buffer from Float32Array samples.
|
|
17
|
-
* Format: mono, 32-bit IEEE float, given sample rate.
|
|
18
|
-
*/
|
|
19
|
-
export function createWavBuffer(samples: Float32Array, sampleRate = 24000): Buffer {
|
|
20
|
-
const numChannels = 1;
|
|
21
|
-
const bitsPerSample = 32;
|
|
22
|
-
const bytesPerSample = bitsPerSample / 8;
|
|
23
|
-
const blockAlign = numChannels * bytesPerSample;
|
|
24
|
-
const byteRate = sampleRate * blockAlign;
|
|
25
|
-
const dataSize = samples.length * bytesPerSample;
|
|
26
|
-
const headerSize = 44;
|
|
27
|
-
|
|
28
|
-
const buf = Buffer.alloc(headerSize + dataSize);
|
|
29
|
-
|
|
30
|
-
// RIFF header
|
|
31
|
-
buf.write('RIFF', 0, 'ascii');
|
|
32
|
-
buf.writeUInt32LE(headerSize + dataSize - 8, 4);
|
|
33
|
-
buf.write('WAVE', 8, 'ascii');
|
|
34
|
-
|
|
35
|
-
// fmt chunk
|
|
36
|
-
buf.write('fmt ', 12, 'ascii');
|
|
37
|
-
buf.writeUInt32LE(16, 16); // fmt chunk size
|
|
38
|
-
buf.writeUInt16LE(3, 20); // audioFormat = 3 (IEEE float)
|
|
39
|
-
buf.writeUInt16LE(numChannels, 22);
|
|
40
|
-
buf.writeUInt32LE(sampleRate, 24);
|
|
41
|
-
buf.writeUInt32LE(byteRate, 28);
|
|
42
|
-
buf.writeUInt16LE(blockAlign, 32);
|
|
43
|
-
buf.writeUInt16LE(bitsPerSample, 34);
|
|
44
|
-
|
|
45
|
-
// data chunk
|
|
46
|
-
buf.write('data', 36, 'ascii');
|
|
47
|
-
buf.writeUInt32LE(dataSize, 40);
|
|
48
|
-
|
|
49
|
-
// sample data
|
|
50
|
-
for (let i = 0; i < samples.length; i++) {
|
|
51
|
-
buf.writeFloatLE(samples[i], headerSize + i * bytesPerSample);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return buf;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
export interface WavHeader {
|
|
58
|
-
sampleRate: number;
|
|
59
|
-
numChannels: number;
|
|
60
|
-
bitsPerSample: number;
|
|
61
|
-
audioFormat: number;
|
|
62
|
-
dataSize: number;
|
|
63
|
-
dataOffset: number;
|
|
64
|
-
durationMs: number;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Parses a WAV file header. Searches for the 'data' chunk rather than
|
|
69
|
-
* assuming a fixed offset.
|
|
70
|
-
*/
|
|
71
|
-
export function parseWavHeader(wav: Buffer): WavHeader {
|
|
72
|
-
if (wav.length < 44) {
|
|
73
|
-
throw new Error('Buffer too small to be a valid WAV file');
|
|
74
|
-
}
|
|
75
|
-
if (wav.toString('ascii', 0, 4) !== 'RIFF') {
|
|
76
|
-
throw new Error('Not a valid WAV file: missing RIFF header');
|
|
77
|
-
}
|
|
78
|
-
if (wav.toString('ascii', 8, 12) !== 'WAVE') {
|
|
79
|
-
throw new Error('Not a valid WAV file: missing WAVE marker');
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// Validate and parse fmt chunk (expected at byte 12)
|
|
83
|
-
if (wav.toString('ascii', 12, 16) !== 'fmt ') {
|
|
84
|
-
throw new Error('Not a valid WAV file: fmt chunk not found at expected offset');
|
|
85
|
-
}
|
|
86
|
-
const audioFormat = wav.readUInt16LE(20);
|
|
87
|
-
const numChannels = wav.readUInt16LE(22);
|
|
88
|
-
const sampleRate = wav.readUInt32LE(24);
|
|
89
|
-
const bitsPerSample = wav.readUInt16LE(34);
|
|
90
|
-
|
|
91
|
-
// Search for 'data' chunk
|
|
92
|
-
let offset = 12; // after 'WAVE'
|
|
93
|
-
let dataSize = 0;
|
|
94
|
-
let dataOffset = 0;
|
|
95
|
-
|
|
96
|
-
while (offset < wav.length - 8) {
|
|
97
|
-
const chunkId = wav.toString('ascii', offset, offset + 4);
|
|
98
|
-
const chunkSize = wav.readUInt32LE(offset + 4);
|
|
99
|
-
|
|
100
|
-
if (chunkId === 'data') {
|
|
101
|
-
dataSize = chunkSize;
|
|
102
|
-
dataOffset = offset + 8;
|
|
103
|
-
break;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
offset += 8 + chunkSize;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
if (dataOffset === 0) {
|
|
110
|
-
throw new Error('No data chunk found in WAV file');
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const bytesPerSample = bitsPerSample / 8;
|
|
114
|
-
const totalSamples = dataSize / (bytesPerSample * numChannels);
|
|
115
|
-
const durationMs = (totalSamples / sampleRate) * 1000;
|
|
116
|
-
|
|
117
|
-
return {
|
|
118
|
-
sampleRate,
|
|
119
|
-
numChannels,
|
|
120
|
-
bitsPerSample,
|
|
121
|
-
audioFormat,
|
|
122
|
-
dataSize,
|
|
123
|
-
dataOffset,
|
|
124
|
-
durationMs,
|
|
125
|
-
};
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* Creates a mock TTS engine that produces silent WAV buffers of the given
|
|
130
|
-
* duration and records all calls for test assertions.
|
|
131
|
-
*/
|
|
132
|
-
export function createMockTTSEngine(
|
|
133
|
-
durationMs = 500,
|
|
134
|
-
): TTSEngine & { calls: Array<{ text: string; options: TTSEngineOptions }> } {
|
|
135
|
-
const calls: Array<{ text: string; options: TTSEngineOptions }> = [];
|
|
136
|
-
|
|
137
|
-
return {
|
|
138
|
-
calls,
|
|
139
|
-
async generate(text: string, options: TTSEngineOptions): Promise<Buffer> {
|
|
140
|
-
calls.push({ text, options });
|
|
141
|
-
const sampleRate = 24000;
|
|
142
|
-
const numSamples = Math.round((durationMs / 1000) * sampleRate);
|
|
143
|
-
const samples = new Float32Array(numSamples); // zeros = silence
|
|
144
|
-
return createWavBuffer(samples, sampleRate);
|
|
145
|
-
},
|
|
146
|
-
};
|
|
147
|
-
}
|
package/src/tts/generate.ts
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TTS clip generation with manifest parsing and cache integration.
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import fs from 'node:fs';
|
|
6
|
-
import type { TTSEngine } from './engine.js';
|
|
7
|
-
import { ClipCache, type ManifestEntry } from './cache.js';
|
|
8
|
-
|
|
9
|
-
export interface GenerateClipsOptions {
|
|
10
|
-
manifestPath: string;
|
|
11
|
-
demoName: string;
|
|
12
|
-
engine: TTSEngine;
|
|
13
|
-
projectRoot: string;
|
|
14
|
-
defaults?: { voice?: string; speed?: number };
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export interface ClipResult {
|
|
18
|
-
scene: string;
|
|
19
|
-
clipPath: string;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export async function generateClips(options: GenerateClipsOptions): Promise<ClipResult[]> {
|
|
23
|
-
const { manifestPath, demoName, engine, projectRoot, defaults } = options;
|
|
24
|
-
|
|
25
|
-
// 1. Check manifest exists
|
|
26
|
-
if (!fs.existsSync(manifestPath)) {
|
|
27
|
-
throw new Error(`Manifest file not found: ${manifestPath}`);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// 2. Read and parse JSON
|
|
31
|
-
let rawEntries: unknown[];
|
|
32
|
-
try {
|
|
33
|
-
const content = fs.readFileSync(manifestPath, 'utf-8');
|
|
34
|
-
rawEntries = JSON.parse(content);
|
|
35
|
-
} catch (err) {
|
|
36
|
-
if (err instanceof SyntaxError) {
|
|
37
|
-
throw new Error(`Failed to parse manifest ${manifestPath}: ${err.message}`);
|
|
38
|
-
}
|
|
39
|
-
throw err;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
if (!Array.isArray(rawEntries)) {
|
|
43
|
-
throw new Error(`Manifest ${manifestPath} must contain a JSON array`);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// 3. Validate entries
|
|
47
|
-
for (const entry of rawEntries) {
|
|
48
|
-
const e = entry as Record<string, unknown>;
|
|
49
|
-
if (typeof e.scene !== 'string' || typeof e.text !== 'string') {
|
|
50
|
-
throw new Error('Manifest entry missing required field: scene and text are required');
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const cache = new ClipCache(projectRoot);
|
|
55
|
-
const results: ClipResult[] = [];
|
|
56
|
-
|
|
57
|
-
for (const raw of rawEntries) {
|
|
58
|
-
const r = raw as Record<string, unknown>;
|
|
59
|
-
|
|
60
|
-
// 4. Build ManifestEntry with defaults
|
|
61
|
-
const manifestEntry: ManifestEntry = {
|
|
62
|
-
scene: r.scene as string,
|
|
63
|
-
text: r.text as string,
|
|
64
|
-
voice: (r.voice as string | undefined) ?? defaults?.voice,
|
|
65
|
-
speed: (r.speed as number | undefined) ?? defaults?.speed,
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
const clipPath = cache.getClipPath(demoName, manifestEntry);
|
|
69
|
-
|
|
70
|
-
// 5/6. Check cache or generate
|
|
71
|
-
if (!cache.isCached(demoName, manifestEntry)) {
|
|
72
|
-
const wavBuffer = await engine.generate(manifestEntry.text, {
|
|
73
|
-
voice: manifestEntry.voice,
|
|
74
|
-
speed: manifestEntry.speed,
|
|
75
|
-
});
|
|
76
|
-
cache.cacheClip(demoName, manifestEntry, wavBuffer);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
results.push({ scene: manifestEntry.scene, clipPath });
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
return results;
|
|
83
|
-
}
|
package/src/tts/kokoro.ts
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import type { TTSEngine, TTSEngineOptions } from './engine.js';
|
|
2
|
-
|
|
3
|
-
export class KokoroEngine implements TTSEngine {
|
|
4
|
-
private tts: any = null;
|
|
5
|
-
private modelId: string;
|
|
6
|
-
private dtype: string;
|
|
7
|
-
|
|
8
|
-
constructor(options?: { modelId?: string; dtype?: string }) {
|
|
9
|
-
this.modelId = options?.modelId ?? 'onnx-community/Kokoro-82M-ONNX';
|
|
10
|
-
this.dtype = options?.dtype ?? 'fp32';
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
private async getTTS(): Promise<any> {
|
|
14
|
-
if (this.tts) return this.tts;
|
|
15
|
-
try {
|
|
16
|
-
const { KokoroTTS } = await import('kokoro-js');
|
|
17
|
-
this.tts = await KokoroTTS.from_pretrained(this.modelId, {
|
|
18
|
-
dtype: this.dtype as 'fp32' | 'fp16' | 'q8' | 'q4' | 'q4f16',
|
|
19
|
-
});
|
|
20
|
-
} catch (err) {
|
|
21
|
-
throw new Error(
|
|
22
|
-
`Failed to initialize Kokoro TTS (model: ${this.modelId}, dtype: ${this.dtype}). ` +
|
|
23
|
-
`This may require an internet connection for first-time model download. ` +
|
|
24
|
-
`Original error: ${(err as Error).message}`
|
|
25
|
-
);
|
|
26
|
-
}
|
|
27
|
-
return this.tts;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async generate(text: string, options: TTSEngineOptions): Promise<Buffer> {
|
|
31
|
-
if (!text?.trim()) throw new Error('TTS text must not be empty');
|
|
32
|
-
const tts = await this.getTTS();
|
|
33
|
-
const audio = await tts.generate(text, {
|
|
34
|
-
voice: options.voice ?? 'af_heart',
|
|
35
|
-
speed: options.speed ?? 1.0,
|
|
36
|
-
});
|
|
37
|
-
const samples = audio.data ?? audio.audio;
|
|
38
|
-
if (!samples || !(samples instanceof Float32Array)) {
|
|
39
|
-
throw new Error(
|
|
40
|
-
'kokoro-js returned unexpected audio format: neither .data nor .audio contains Float32Array samples. ' +
|
|
41
|
-
'Check that your kokoro-js version is compatible.'
|
|
42
|
-
);
|
|
43
|
-
}
|
|
44
|
-
const sampleRate = audio.sampling_rate;
|
|
45
|
-
if (typeof sampleRate !== 'number' || sampleRate <= 0) {
|
|
46
|
-
throw new Error(`kokoro-js returned invalid sample rate: ${sampleRate}.`);
|
|
47
|
-
}
|
|
48
|
-
const { createWavBuffer } = await import('./engine.js');
|
|
49
|
-
return createWavBuffer(samples, sampleRate);
|
|
50
|
-
}
|
|
51
|
-
}
|