@argo-video/cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +2 -2
  3. package/dist/asset-server.d.ts +7 -0
  4. package/dist/asset-server.d.ts.map +1 -0
  5. package/dist/asset-server.js +69 -0
  6. package/dist/asset-server.js.map +1 -0
  7. package/dist/captions.d.ts +17 -0
  8. package/dist/captions.d.ts.map +1 -0
  9. package/dist/captions.js +23 -0
  10. package/dist/captions.js.map +1 -0
  11. package/dist/cli.d.ts +3 -0
  12. package/dist/cli.d.ts.map +1 -0
  13. package/dist/cli.js +87 -0
  14. package/dist/cli.js.map +1 -0
  15. package/dist/config.d.ts +49 -0
  16. package/dist/config.d.ts.map +1 -0
  17. package/dist/config.js +76 -0
  18. package/dist/config.js.map +1 -0
  19. package/dist/export.d.ts +19 -0
  20. package/dist/export.d.ts.map +1 -0
  21. package/dist/export.js +66 -0
  22. package/dist/export.js.map +1 -0
  23. package/dist/fixtures.d.ts +13 -0
  24. package/dist/fixtures.d.ts.map +1 -0
  25. package/dist/fixtures.js +49 -0
  26. package/dist/fixtures.js.map +1 -0
  27. package/dist/index.d.ts +8 -0
  28. package/dist/index.d.ts.map +1 -0
  29. package/dist/index.js +14 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/init.d.ts +2 -0
  32. package/dist/init.d.ts.map +1 -0
  33. package/{src/init.ts → dist/init.js} +39 -54
  34. package/dist/init.js.map +1 -0
  35. package/dist/narration.d.ts +32 -0
  36. package/dist/narration.d.ts.map +1 -0
  37. package/dist/narration.js +86 -0
  38. package/dist/narration.js.map +1 -0
  39. package/dist/overlays/index.d.ts +13 -0
  40. package/dist/overlays/index.d.ts.map +1 -0
  41. package/dist/overlays/index.js +45 -0
  42. package/dist/overlays/index.js.map +1 -0
  43. package/dist/overlays/manifest.d.ts +5 -0
  44. package/dist/overlays/manifest.d.ts.map +1 -0
  45. package/dist/overlays/manifest.js +52 -0
  46. package/dist/overlays/manifest.js.map +1 -0
  47. package/dist/overlays/motion.d.ts +4 -0
  48. package/dist/overlays/motion.d.ts.map +1 -0
  49. package/dist/overlays/motion.js +25 -0
  50. package/dist/overlays/motion.js.map +1 -0
  51. package/dist/overlays/templates.d.ts +8 -0
  52. package/dist/overlays/templates.d.ts.map +1 -0
  53. package/dist/overlays/templates.js +102 -0
  54. package/dist/overlays/templates.js.map +1 -0
  55. package/dist/overlays/types.d.ts +46 -0
  56. package/dist/overlays/types.d.ts.map +1 -0
  57. package/dist/overlays/types.js +25 -0
  58. package/dist/overlays/types.js.map +1 -0
  59. package/dist/overlays/zones.d.ts +23 -0
  60. package/dist/overlays/zones.d.ts.map +1 -0
  61. package/dist/overlays/zones.js +117 -0
  62. package/dist/overlays/zones.js.map +1 -0
  63. package/dist/pipeline.d.ts +3 -0
  64. package/dist/pipeline.d.ts.map +1 -0
  65. package/dist/pipeline.js +109 -0
  66. package/dist/pipeline.js.map +1 -0
  67. package/dist/record.d.ts +15 -0
  68. package/dist/record.d.ts.map +1 -0
  69. package/dist/record.js +110 -0
  70. package/dist/record.js.map +1 -0
  71. package/dist/tts/align.d.ts +26 -0
  72. package/dist/tts/align.d.ts.map +1 -0
  73. package/dist/tts/align.js +53 -0
  74. package/dist/tts/align.js.map +1 -0
  75. package/dist/tts/cache.d.ts +31 -0
  76. package/dist/tts/cache.d.ts.map +1 -0
  77. package/dist/tts/cache.js +51 -0
  78. package/dist/tts/cache.js.map +1 -0
  79. package/dist/tts/engine.d.ts +41 -0
  80. package/dist/tts/engine.d.ts.map +1 -0
  81. package/dist/tts/engine.js +108 -0
  82. package/dist/tts/engine.js.map +1 -0
  83. package/dist/tts/generate.d.ts +21 -0
  84. package/dist/tts/generate.d.ts.map +1 -0
  85. package/dist/tts/generate.js +61 -0
  86. package/dist/tts/generate.js.map +1 -0
  87. package/dist/tts/kokoro.d.ts +30 -0
  88. package/dist/tts/kokoro.d.ts.map +1 -0
  89. package/dist/tts/kokoro.js +66 -0
  90. package/dist/tts/kokoro.js.map +1 -0
  91. package/package.json +13 -1
  92. package/.claude/settings.local.json +0 -34
  93. package/DESIGN.md +0 -261
  94. package/docs/enhancement-proposal.md +0 -262
  95. package/docs/superpowers/plans/2026-03-12-argo.md +0 -208
  96. package/docs/superpowers/plans/2026-03-12-editorial-overlay-system.md +0 -1560
  97. package/docs/superpowers/plans/2026-03-13-npm-rename-skill-showcase.md +0 -499
  98. package/docs/superpowers/specs/2026-03-13-npm-rename-skill-showcase-design.md +0 -109
  99. package/skills/argo-demo-creator.md +0 -355
  100. package/src/asset-server.ts +0 -81
  101. package/src/captions.ts +0 -36
  102. package/src/cli.ts +0 -97
  103. package/src/config.ts +0 -125
  104. package/src/export.ts +0 -93
  105. package/src/fixtures.ts +0 -50
  106. package/src/index.ts +0 -41
  107. package/src/narration.ts +0 -31
  108. package/src/overlays/index.ts +0 -54
  109. package/src/overlays/manifest.ts +0 -68
  110. package/src/overlays/motion.ts +0 -27
  111. package/src/overlays/templates.ts +0 -121
  112. package/src/overlays/types.ts +0 -73
  113. package/src/overlays/zones.ts +0 -82
  114. package/src/pipeline.ts +0 -120
  115. package/src/record.ts +0 -123
  116. package/src/tts/align.ts +0 -75
  117. package/src/tts/cache.ts +0 -65
  118. package/src/tts/engine.ts +0 -147
  119. package/src/tts/generate.ts +0 -83
  120. package/src/tts/kokoro.ts +0 -51
  121. package/tests/asset-server.test.ts +0 -67
  122. package/tests/captions.test.ts +0 -76
  123. package/tests/cli.test.ts +0 -131
  124. package/tests/config.test.ts +0 -150
  125. package/tests/e2e/fake-server.ts +0 -45
  126. package/tests/e2e/record.e2e.test.ts +0 -131
  127. package/tests/export.test.ts +0 -155
  128. package/tests/fixtures.test.ts +0 -74
  129. package/tests/init.test.ts +0 -77
  130. package/tests/narration.test.ts +0 -120
  131. package/tests/overlays/index.test.ts +0 -73
  132. package/tests/overlays/manifest.test.ts +0 -120
  133. package/tests/overlays/motion.test.ts +0 -34
  134. package/tests/overlays/templates.test.ts +0 -69
  135. package/tests/overlays/types.test.ts +0 -36
  136. package/tests/overlays/zones.test.ts +0 -49
  137. package/tests/pipeline.test.ts +0 -177
  138. package/tests/record.test.ts +0 -87
  139. package/tests/tts/align.test.ts +0 -118
  140. package/tests/tts/cache.test.ts +0 -110
  141. package/tests/tts/engine.test.ts +0 -204
  142. package/tests/tts/generate.test.ts +0 -177
  143. package/tests/tts/kokoro.test.ts +0 -25
  144. package/tsconfig.json +0 -19
package/src/pipeline.ts DELETED
@@ -1,120 +0,0 @@
1
- import { readFileSync, writeFileSync } from 'node:fs';
2
- import { execFileSync } from 'node:child_process';
3
- import { join } from 'node:path';
4
- import { generateClips } from './tts/generate.js';
5
- import { record } from './record.js';
6
- import { alignClips, type ClipInfo, type SceneTiming } from './tts/align.js';
7
- import { parseWavHeader, createWavBuffer } from './tts/engine.js';
8
- import { exportVideo, checkFfmpeg } from './export.js';
9
- import type { ArgoConfig } from './config.js';
10
-
11
- function getVideoDurationMs(videoPath: string): number {
12
- let raw: string;
13
- try {
14
- raw = execFileSync(
15
- 'ffprobe',
16
- ['-v', 'error', '-show_entries', 'format=duration', '-of', 'csv=p=0', videoPath],
17
- { encoding: 'utf-8' },
18
- ).trim();
19
- } catch (err) {
20
- throw new Error(
21
- `Failed to get video duration from ${videoPath}. ` +
22
- `Ensure ffprobe is installed (it usually comes with ffmpeg). ` +
23
- `Original error: ${(err as Error).message}`
24
- );
25
- }
26
- const durationMs = Math.round(parseFloat(raw) * 1000);
27
- if (isNaN(durationMs) || durationMs <= 0) {
28
- throw new Error(
29
- `ffprobe returned invalid duration "${raw}" for ${videoPath}. The video file may be corrupt.`
30
- );
31
- }
32
- return durationMs;
33
- }
34
-
35
- export async function runPipeline(
36
- demoName: string,
37
- config: Pick<ArgoConfig, 'baseURL' | 'demosDir' | 'outputDir' | 'tts' | 'video' | 'export'>
38
- ): Promise<string> {
39
- if (!config.baseURL) {
40
- throw new Error(
41
- 'baseURL is required but not set. Set it in argo.config.js or pass --config.'
42
- );
43
- }
44
- if (!config.tts.engine) {
45
- throw new Error('TTS engine is not configured. Ensure config.tts.engine is set.');
46
- }
47
-
48
- checkFfmpeg();
49
-
50
- const argoDir = join('.argo', demoName);
51
-
52
- // Step 1: Generate TTS clips
53
- console.log('Step 1/4: Generating TTS clips...');
54
- const clipResults = await generateClips({
55
- manifestPath: `${config.demosDir}/${demoName}.voiceover.json`,
56
- demoName,
57
- engine: config.tts.engine,
58
- projectRoot: '.',
59
- defaults: { voice: config.tts.defaultVoice, speed: config.tts.defaultSpeed },
60
- });
61
-
62
- if (clipResults.length === 0) {
63
- throw new Error(
64
- `No TTS clips were generated from ${config.demosDir}/${demoName}.voiceover.json. ` +
65
- `Ensure the manifest contains at least one entry.`
66
- );
67
- }
68
-
69
- // Step 2: Record browser demo
70
- console.log('Step 2/4: Recording browser demo...');
71
- const { timingPath } = await record(demoName, {
72
- demosDir: config.demosDir,
73
- baseURL: config.baseURL,
74
- video: { width: config.video.width, height: config.video.height },
75
- });
76
-
77
- // Step 3: Align clips with timing
78
- console.log('Step 3/4: Aligning narration with video...');
79
- const timing: SceneTiming = JSON.parse(readFileSync(timingPath, 'utf-8'));
80
-
81
- // Load WAV clips into memory
82
- const clips: ClipInfo[] = clipResults.map((cr) => {
83
- const wavBuf = readFileSync(cr.clipPath);
84
- const header = parseWavHeader(wavBuf);
85
- // Extract Float32 samples from the data chunk
86
- const sampleCount = header.dataSize / 4; // 32-bit float = 4 bytes
87
- const samples = new Float32Array(sampleCount);
88
- for (let i = 0; i < sampleCount && header.dataOffset + i * 4 + 3 < wavBuf.length; i++) {
89
- samples[i] = wavBuf.readFloatLE(header.dataOffset + i * 4);
90
- }
91
- return {
92
- scene: cr.scene,
93
- durationMs: header.durationMs,
94
- samples,
95
- };
96
- });
97
-
98
- // Use actual video duration for alignment
99
- const videoPath = join(argoDir, 'video.webm');
100
- const totalDurationMs = getVideoDurationMs(videoPath);
101
-
102
- const aligned = alignClips(timing, clips, totalDurationMs);
103
- const alignedWav = createWavBuffer(aligned.samples, 24_000);
104
- const alignedPath = join(argoDir, 'narration-aligned.wav');
105
- writeFileSync(alignedPath, alignedWav);
106
-
107
- // Step 4: Export final video
108
- console.log('Step 4/4: Exporting final video...');
109
- const outputPath = await exportVideo({
110
- demoName,
111
- argoDir: '.argo',
112
- outputDir: config.outputDir,
113
- preset: config.export.preset,
114
- crf: config.export.crf,
115
- fps: config.video.fps,
116
- });
117
-
118
- console.log(`Done! Video saved to: ${outputPath}`);
119
- return outputPath;
120
- }
package/src/record.ts DELETED
@@ -1,123 +0,0 @@
1
- import { execFile } from 'node:child_process';
2
- import { mkdirSync, readdirSync, copyFileSync, existsSync, rmSync, writeFileSync } from 'node:fs';
3
- import path from 'node:path';
4
- import { startAssetServer, type AssetServer } from './asset-server.js';
5
- import { loadOverlayManifest, hasImageAssets } from './overlays/manifest.js';
6
-
7
- export interface RecordOptions {
8
- demosDir: string;
9
- baseURL: string;
10
- video: { width: number; height: number };
11
- }
12
-
13
- export interface RecordResult {
14
- videoPath: string;
15
- timingPath: string;
16
- }
17
-
18
- function findVideoInResults(testResultsDir: string): string | undefined {
19
- if (!existsSync(testResultsDir)) return undefined;
20
- for (const entry of readdirSync(testResultsDir, { recursive: true })) {
21
- const name = typeof entry === 'string' ? entry : entry.toString();
22
- if (name.endsWith('.webm')) {
23
- return path.join(testResultsDir, name);
24
- }
25
- }
26
- return undefined;
27
- }
28
-
29
- function createPlaywrightConfig(options: RecordOptions, outputDir: string): string {
30
- const demosDir = path.resolve(options.demosDir);
31
- const { width, height } = options.video;
32
-
33
- return `import { defineConfig } from '@playwright/test';
34
-
35
- export default defineConfig({
36
- preserveOutput: 'always',
37
- outputDir: ${JSON.stringify(outputDir)},
38
- projects: [
39
- {
40
- name: 'demos',
41
- testDir: ${JSON.stringify(demosDir)},
42
- testMatch: '**/*.demo.ts',
43
- use: {
44
- baseURL: ${JSON.stringify(options.baseURL)},
45
- viewport: { width: ${width}, height: ${height} },
46
- video: {
47
- mode: 'on',
48
- size: { width: ${width}, height: ${height} },
49
- },
50
- },
51
- },
52
- ],
53
- });
54
- `;
55
- }
56
-
57
- export async function record(demoName: string, options: RecordOptions): Promise<RecordResult> {
58
- const argoDir = path.join('.argo', demoName);
59
- mkdirSync(argoDir, { recursive: true });
60
-
61
- const videoPath = path.join(argoDir, 'video.webm');
62
- const timingPath = path.join(argoDir, '.timing.json');
63
- const testResultsDir = path.resolve('test-results');
64
- const recordConfigPath = path.join(argoDir, 'playwright.record.config.mjs');
65
-
66
- writeFileSync(recordConfigPath, createPlaywrightConfig(options, testResultsDir), 'utf-8');
67
-
68
- // Clean test-results to avoid picking up stale videos
69
- rmSync(testResultsDir, { recursive: true, force: true });
70
-
71
- // Start asset server if overlay manifest has image assets
72
- let assetServer: AssetServer | undefined;
73
- const overlayManifestPath = path.join(options.demosDir, `${demoName}.overlays.json`);
74
- const overlayEntries = await loadOverlayManifest(overlayManifestPath);
75
- if (overlayEntries && hasImageAssets(overlayEntries)) {
76
- const assetDir = path.join(options.demosDir, 'assets');
77
- assetServer = await startAssetServer(assetDir);
78
- }
79
-
80
- try {
81
- return await new Promise<RecordResult>((resolve, reject) => {
82
- execFile('npx', ['playwright', 'test', '--config', recordConfigPath, '--grep', demoName, '--project', 'demos'], {
83
- env: {
84
- ...process.env,
85
- ARGO_DEMO_NAME: demoName,
86
- ARGO_OUTPUT_DIR: argoDir,
87
- BASE_URL: options.baseURL,
88
- ARGO_ASSET_URL: assetServer?.url ?? '',
89
- },
90
- }, (error, stdout, stderr) => {
91
- if (error) {
92
- const output = [stdout, stderr].filter(Boolean).join('\n');
93
- reject(new Error(`Playwright recording failed:\n${output}`));
94
- return;
95
- }
96
-
97
- // Copy the video from test-results/ to .argo/<demo>/video.webm
98
- const found = findVideoInResults(testResultsDir);
99
- if (!found) {
100
- reject(new Error(
101
- `No video recording found in test-results/. ` +
102
- `Ensure playwright.config.ts has video: 'on' or video: { mode: 'on' }.`
103
- ));
104
- return;
105
- }
106
- copyFileSync(found, videoPath);
107
-
108
- // Verify timing file was written by the narration fixture
109
- if (!existsSync(timingPath)) {
110
- reject(new Error(
111
- `No timing file found at ${timingPath}. ` +
112
- `Ensure the demo uses the argo test fixture with narration.mark() calls.`
113
- ));
114
- return;
115
- }
116
-
117
- resolve({ videoPath, timingPath });
118
- });
119
- });
120
- } finally {
121
- if (assetServer) await assetServer.close();
122
- }
123
- }
package/src/tts/align.ts DELETED
@@ -1,75 +0,0 @@
1
- export type SceneTiming = Record<string, number>;
2
-
3
- export interface ClipInfo {
4
- scene: string;
5
- durationMs: number;
6
- samples: Float32Array;
7
- }
8
-
9
- export interface Placement {
10
- scene: string;
11
- startMs: number;
12
- endMs: number;
13
- }
14
-
15
- export interface AlignResult {
16
- placements: Placement[];
17
- samples: Float32Array;
18
- }
19
-
20
- const OVERLAP_GAP_MS = 100;
21
-
22
- export function alignClips(
23
- timing: SceneTiming,
24
- clips: ClipInfo[],
25
- totalDurationMs: number,
26
- sampleRate = 24_000,
27
- ): AlignResult {
28
- // 1. Filter to clips with matching scenes
29
- const matched = clips.filter((c) => c.scene in timing);
30
- const unmatched = clips.filter((c) => !(c.scene in timing));
31
- if (unmatched.length > 0) {
32
- const names = unmatched.map((c) => c.scene).join(', ');
33
- console.warn(
34
- `Warning: ${unmatched.length} clip(s) have no matching scene in timing and will be skipped: ${names}. ` +
35
- `Check that voiceover manifest scene names match narration.mark() calls.`
36
- );
37
- }
38
-
39
- // 2. Sort by scene timestamp ascending
40
- matched.sort((a, b) => timing[a.scene] - timing[b.scene]);
41
-
42
- // 3. Place each clip, preventing overlap
43
- const placements: Placement[] = [];
44
- let previousEndMs = 0;
45
-
46
- for (const clip of matched) {
47
- let startMs = timing[clip.scene];
48
-
49
- // If this would overlap the previous clip, push forward
50
- if (placements.length > 0 && startMs < previousEndMs) {
51
- startMs = previousEndMs + OVERLAP_GAP_MS;
52
- }
53
-
54
- const endMs = startMs + clip.durationMs;
55
- placements.push({ scene: clip.scene, startMs, endMs });
56
- previousEndMs = endMs;
57
- }
58
-
59
- // 4. Create silence buffer
60
- const totalSamples = Math.round((totalDurationMs / 1000) * sampleRate);
61
- const output = new Float32Array(totalSamples);
62
-
63
- // 5. Mix each clip's samples into output
64
- for (let i = 0; i < placements.length; i++) {
65
- const placement = placements[i];
66
- const clip = matched[i];
67
- const startSample = Math.round((placement.startMs / 1000) * sampleRate);
68
-
69
- for (let j = 0; j < clip.samples.length && startSample + j < totalSamples; j++) {
70
- output[startSample + j] += clip.samples[j];
71
- }
72
- }
73
-
74
- return { placements, samples: output };
75
- }
package/src/tts/cache.ts DELETED
@@ -1,65 +0,0 @@
1
- /**
2
- * Content-addressed clip cache for Argo TTS output.
3
- */
4
-
5
- import crypto from 'node:crypto';
6
- import fs from 'node:fs';
7
- import path from 'node:path';
8
-
9
- export interface ManifestEntry {
10
- scene: string;
11
- text: string;
12
- voice?: string;
13
- speed?: number;
14
- }
15
-
16
- export class ClipCache {
17
- private readonly projectRoot: string;
18
-
19
- constructor(projectRoot: string) {
20
- this.projectRoot = projectRoot;
21
- }
22
-
23
- /**
24
- * Returns the full file path for a cached clip.
25
- */
26
- getClipPath(demoName: string, entry: ManifestEntry): string {
27
- const hash = this.computeHash(entry);
28
- return path.join(this.projectRoot, '.argo', demoName, 'clips', `${hash}.wav`);
29
- }
30
-
31
- /**
32
- * Checks whether a clip is already cached on disk.
33
- */
34
- isCached(demoName: string, entry: ManifestEntry): boolean {
35
- return fs.existsSync(this.getClipPath(demoName, entry));
36
- }
37
-
38
- /**
39
- * Returns the cached WAV buffer, or null if not cached.
40
- */
41
- getCachedClip(demoName: string, entry: ManifestEntry): Buffer | null {
42
- const clipPath = this.getClipPath(demoName, entry);
43
- if (!fs.existsSync(clipPath)) {
44
- return null;
45
- }
46
- return fs.readFileSync(clipPath);
47
- }
48
-
49
- /**
50
- * Writes a WAV buffer to the cache, creating directories as needed.
51
- */
52
- cacheClip(demoName: string, entry: ManifestEntry, wavBuffer: Buffer): void {
53
- const clipPath = this.getClipPath(demoName, entry);
54
- fs.mkdirSync(path.dirname(clipPath), { recursive: true });
55
- fs.writeFileSync(clipPath, wavBuffer);
56
- }
57
-
58
- private computeHash(entry: ManifestEntry): string {
59
- const { scene, text, voice, speed } = entry;
60
- return crypto
61
- .createHash('sha256')
62
- .update(JSON.stringify({ scene, text, voice, speed }))
63
- .digest('hex');
64
- }
65
- }
package/src/tts/engine.ts DELETED
@@ -1,147 +0,0 @@
1
- /**
2
- * TTS Engine interface and WAV utilities for Argo.
3
- */
4
-
5
- export interface TTSEngineOptions {
6
- voice?: string;
7
- speed?: number;
8
- lang?: string;
9
- }
10
-
11
- export interface TTSEngine {
12
- generate(text: string, options: TTSEngineOptions): Promise<Buffer>;
13
- }
14
-
15
- /**
16
- * Creates a valid WAV file buffer from Float32Array samples.
17
- * Format: mono, 32-bit IEEE float, given sample rate.
18
- */
19
- export function createWavBuffer(samples: Float32Array, sampleRate = 24000): Buffer {
20
- const numChannels = 1;
21
- const bitsPerSample = 32;
22
- const bytesPerSample = bitsPerSample / 8;
23
- const blockAlign = numChannels * bytesPerSample;
24
- const byteRate = sampleRate * blockAlign;
25
- const dataSize = samples.length * bytesPerSample;
26
- const headerSize = 44;
27
-
28
- const buf = Buffer.alloc(headerSize + dataSize);
29
-
30
- // RIFF header
31
- buf.write('RIFF', 0, 'ascii');
32
- buf.writeUInt32LE(headerSize + dataSize - 8, 4);
33
- buf.write('WAVE', 8, 'ascii');
34
-
35
- // fmt chunk
36
- buf.write('fmt ', 12, 'ascii');
37
- buf.writeUInt32LE(16, 16); // fmt chunk size
38
- buf.writeUInt16LE(3, 20); // audioFormat = 3 (IEEE float)
39
- buf.writeUInt16LE(numChannels, 22);
40
- buf.writeUInt32LE(sampleRate, 24);
41
- buf.writeUInt32LE(byteRate, 28);
42
- buf.writeUInt16LE(blockAlign, 32);
43
- buf.writeUInt16LE(bitsPerSample, 34);
44
-
45
- // data chunk
46
- buf.write('data', 36, 'ascii');
47
- buf.writeUInt32LE(dataSize, 40);
48
-
49
- // sample data
50
- for (let i = 0; i < samples.length; i++) {
51
- buf.writeFloatLE(samples[i], headerSize + i * bytesPerSample);
52
- }
53
-
54
- return buf;
55
- }
56
-
57
- export interface WavHeader {
58
- sampleRate: number;
59
- numChannels: number;
60
- bitsPerSample: number;
61
- audioFormat: number;
62
- dataSize: number;
63
- dataOffset: number;
64
- durationMs: number;
65
- }
66
-
67
- /**
68
- * Parses a WAV file header. Searches for the 'data' chunk rather than
69
- * assuming a fixed offset.
70
- */
71
- export function parseWavHeader(wav: Buffer): WavHeader {
72
- if (wav.length < 44) {
73
- throw new Error('Buffer too small to be a valid WAV file');
74
- }
75
- if (wav.toString('ascii', 0, 4) !== 'RIFF') {
76
- throw new Error('Not a valid WAV file: missing RIFF header');
77
- }
78
- if (wav.toString('ascii', 8, 12) !== 'WAVE') {
79
- throw new Error('Not a valid WAV file: missing WAVE marker');
80
- }
81
-
82
- // Validate and parse fmt chunk (expected at byte 12)
83
- if (wav.toString('ascii', 12, 16) !== 'fmt ') {
84
- throw new Error('Not a valid WAV file: fmt chunk not found at expected offset');
85
- }
86
- const audioFormat = wav.readUInt16LE(20);
87
- const numChannels = wav.readUInt16LE(22);
88
- const sampleRate = wav.readUInt32LE(24);
89
- const bitsPerSample = wav.readUInt16LE(34);
90
-
91
- // Search for 'data' chunk
92
- let offset = 12; // after 'WAVE'
93
- let dataSize = 0;
94
- let dataOffset = 0;
95
-
96
- while (offset < wav.length - 8) {
97
- const chunkId = wav.toString('ascii', offset, offset + 4);
98
- const chunkSize = wav.readUInt32LE(offset + 4);
99
-
100
- if (chunkId === 'data') {
101
- dataSize = chunkSize;
102
- dataOffset = offset + 8;
103
- break;
104
- }
105
-
106
- offset += 8 + chunkSize;
107
- }
108
-
109
- if (dataOffset === 0) {
110
- throw new Error('No data chunk found in WAV file');
111
- }
112
-
113
- const bytesPerSample = bitsPerSample / 8;
114
- const totalSamples = dataSize / (bytesPerSample * numChannels);
115
- const durationMs = (totalSamples / sampleRate) * 1000;
116
-
117
- return {
118
- sampleRate,
119
- numChannels,
120
- bitsPerSample,
121
- audioFormat,
122
- dataSize,
123
- dataOffset,
124
- durationMs,
125
- };
126
- }
127
-
128
- /**
129
- * Creates a mock TTS engine that produces silent WAV buffers of the given
130
- * duration and records all calls for test assertions.
131
- */
132
- export function createMockTTSEngine(
133
- durationMs = 500,
134
- ): TTSEngine & { calls: Array<{ text: string; options: TTSEngineOptions }> } {
135
- const calls: Array<{ text: string; options: TTSEngineOptions }> = [];
136
-
137
- return {
138
- calls,
139
- async generate(text: string, options: TTSEngineOptions): Promise<Buffer> {
140
- calls.push({ text, options });
141
- const sampleRate = 24000;
142
- const numSamples = Math.round((durationMs / 1000) * sampleRate);
143
- const samples = new Float32Array(numSamples); // zeros = silence
144
- return createWavBuffer(samples, sampleRate);
145
- },
146
- };
147
- }
@@ -1,83 +0,0 @@
1
- /**
2
- * TTS clip generation with manifest parsing and cache integration.
3
- */
4
-
5
- import fs from 'node:fs';
6
- import type { TTSEngine } from './engine.js';
7
- import { ClipCache, type ManifestEntry } from './cache.js';
8
-
9
- export interface GenerateClipsOptions {
10
- manifestPath: string;
11
- demoName: string;
12
- engine: TTSEngine;
13
- projectRoot: string;
14
- defaults?: { voice?: string; speed?: number };
15
- }
16
-
17
- export interface ClipResult {
18
- scene: string;
19
- clipPath: string;
20
- }
21
-
22
- export async function generateClips(options: GenerateClipsOptions): Promise<ClipResult[]> {
23
- const { manifestPath, demoName, engine, projectRoot, defaults } = options;
24
-
25
- // 1. Check manifest exists
26
- if (!fs.existsSync(manifestPath)) {
27
- throw new Error(`Manifest file not found: ${manifestPath}`);
28
- }
29
-
30
- // 2. Read and parse JSON
31
- let rawEntries: unknown[];
32
- try {
33
- const content = fs.readFileSync(manifestPath, 'utf-8');
34
- rawEntries = JSON.parse(content);
35
- } catch (err) {
36
- if (err instanceof SyntaxError) {
37
- throw new Error(`Failed to parse manifest ${manifestPath}: ${err.message}`);
38
- }
39
- throw err;
40
- }
41
-
42
- if (!Array.isArray(rawEntries)) {
43
- throw new Error(`Manifest ${manifestPath} must contain a JSON array`);
44
- }
45
-
46
- // 3. Validate entries
47
- for (const entry of rawEntries) {
48
- const e = entry as Record<string, unknown>;
49
- if (typeof e.scene !== 'string' || typeof e.text !== 'string') {
50
- throw new Error('Manifest entry missing required field: scene and text are required');
51
- }
52
- }
53
-
54
- const cache = new ClipCache(projectRoot);
55
- const results: ClipResult[] = [];
56
-
57
- for (const raw of rawEntries) {
58
- const r = raw as Record<string, unknown>;
59
-
60
- // 4. Build ManifestEntry with defaults
61
- const manifestEntry: ManifestEntry = {
62
- scene: r.scene as string,
63
- text: r.text as string,
64
- voice: (r.voice as string | undefined) ?? defaults?.voice,
65
- speed: (r.speed as number | undefined) ?? defaults?.speed,
66
- };
67
-
68
- const clipPath = cache.getClipPath(demoName, manifestEntry);
69
-
70
- // 5/6. Check cache or generate
71
- if (!cache.isCached(demoName, manifestEntry)) {
72
- const wavBuffer = await engine.generate(manifestEntry.text, {
73
- voice: manifestEntry.voice,
74
- speed: manifestEntry.speed,
75
- });
76
- cache.cacheClip(demoName, manifestEntry, wavBuffer);
77
- }
78
-
79
- results.push({ scene: manifestEntry.scene, clipPath });
80
- }
81
-
82
- return results;
83
- }
package/src/tts/kokoro.ts DELETED
@@ -1,51 +0,0 @@
1
- import type { TTSEngine, TTSEngineOptions } from './engine.js';
2
-
3
- export class KokoroEngine implements TTSEngine {
4
- private tts: any = null;
5
- private modelId: string;
6
- private dtype: string;
7
-
8
- constructor(options?: { modelId?: string; dtype?: string }) {
9
- this.modelId = options?.modelId ?? 'onnx-community/Kokoro-82M-ONNX';
10
- this.dtype = options?.dtype ?? 'fp32';
11
- }
12
-
13
- private async getTTS(): Promise<any> {
14
- if (this.tts) return this.tts;
15
- try {
16
- const { KokoroTTS } = await import('kokoro-js');
17
- this.tts = await KokoroTTS.from_pretrained(this.modelId, {
18
- dtype: this.dtype as 'fp32' | 'fp16' | 'q8' | 'q4' | 'q4f16',
19
- });
20
- } catch (err) {
21
- throw new Error(
22
- `Failed to initialize Kokoro TTS (model: ${this.modelId}, dtype: ${this.dtype}). ` +
23
- `This may require an internet connection for first-time model download. ` +
24
- `Original error: ${(err as Error).message}`
25
- );
26
- }
27
- return this.tts;
28
- }
29
-
30
- async generate(text: string, options: TTSEngineOptions): Promise<Buffer> {
31
- if (!text?.trim()) throw new Error('TTS text must not be empty');
32
- const tts = await this.getTTS();
33
- const audio = await tts.generate(text, {
34
- voice: options.voice ?? 'af_heart',
35
- speed: options.speed ?? 1.0,
36
- });
37
- const samples = audio.data ?? audio.audio;
38
- if (!samples || !(samples instanceof Float32Array)) {
39
- throw new Error(
40
- 'kokoro-js returned unexpected audio format: neither .data nor .audio contains Float32Array samples. ' +
41
- 'Check that your kokoro-js version is compatible.'
42
- );
43
- }
44
- const sampleRate = audio.sampling_rate;
45
- if (typeof sampleRate !== 'number' || sampleRate <= 0) {
46
- throw new Error(`kokoro-js returned invalid sample rate: ${sampleRate}.`);
47
- }
48
- const { createWavBuffer } = await import('./engine.js');
49
- return createWavBuffer(samples, sampleRate);
50
- }
51
- }