@laitszkin/apollo-toolkit 3.13.2 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/AGENTS.md +7 -7
  2. package/CHANGELOG.md +27 -0
  3. package/CLAUDE.md +8 -8
  4. package/analyse-app-logs/SKILL.md +3 -3
  5. package/bin/apollo-toolkit.ts +7 -0
  6. package/codex/codex-memory-manager/SKILL.md +2 -2
  7. package/codex/learn-skill-from-conversations/SKILL.md +3 -3
  8. package/dist/bin/apollo-toolkit.d.ts +2 -0
  9. package/dist/bin/apollo-toolkit.js +7 -0
  10. package/dist/lib/cli.d.ts +41 -0
  11. package/dist/lib/cli.js +655 -0
  12. package/dist/lib/installer.d.ts +59 -0
  13. package/dist/lib/installer.js +404 -0
  14. package/dist/lib/tool-runner.d.ts +19 -0
  15. package/dist/lib/tool-runner.js +536 -0
  16. package/dist/lib/tools/architecture.d.ts +2 -0
  17. package/dist/lib/tools/architecture.js +34 -0
  18. package/dist/lib/tools/create-specs.d.ts +2 -0
  19. package/dist/lib/tools/create-specs.js +175 -0
  20. package/dist/lib/tools/docs-to-voice.d.ts +2 -0
  21. package/dist/lib/tools/docs-to-voice.js +705 -0
  22. package/dist/lib/tools/enforce-video-aspect-ratio.d.ts +2 -0
  23. package/dist/lib/tools/enforce-video-aspect-ratio.js +312 -0
  24. package/dist/lib/tools/extract-conversations.d.ts +2 -0
  25. package/dist/lib/tools/extract-conversations.js +105 -0
  26. package/dist/lib/tools/extract-pdf-text.d.ts +2 -0
  27. package/dist/lib/tools/extract-pdf-text.js +92 -0
  28. package/dist/lib/tools/filter-logs.d.ts +2 -0
  29. package/dist/lib/tools/filter-logs.js +94 -0
  30. package/dist/lib/tools/find-github-issues.d.ts +2 -0
  31. package/dist/lib/tools/find-github-issues.js +176 -0
  32. package/dist/lib/tools/generate-storyboard-images.d.ts +2 -0
  33. package/dist/lib/tools/generate-storyboard-images.js +419 -0
  34. package/dist/lib/tools/log-cli-utils.d.ts +35 -0
  35. package/dist/lib/tools/log-cli-utils.js +233 -0
  36. package/dist/lib/tools/open-github-issue.d.ts +2 -0
  37. package/dist/lib/tools/open-github-issue.js +750 -0
  38. package/dist/lib/tools/read-github-issue.d.ts +2 -0
  39. package/dist/lib/tools/read-github-issue.js +134 -0
  40. package/dist/lib/tools/render-error-book.d.ts +2 -0
  41. package/dist/lib/tools/render-error-book.js +265 -0
  42. package/dist/lib/tools/render-katex.d.ts +2 -0
  43. package/dist/lib/tools/render-katex.js +294 -0
  44. package/dist/lib/tools/review-threads.d.ts +2 -0
  45. package/dist/lib/tools/review-threads.js +491 -0
  46. package/dist/lib/tools/search-logs.d.ts +2 -0
  47. package/dist/lib/tools/search-logs.js +164 -0
  48. package/dist/lib/tools/sync-memory-index.d.ts +2 -0
  49. package/dist/lib/tools/sync-memory-index.js +113 -0
  50. package/dist/lib/tools/validate-openai-agent-config.d.ts +2 -0
  51. package/dist/lib/tools/validate-openai-agent-config.js +184 -0
  52. package/dist/lib/tools/validate-skill-frontmatter.d.ts +2 -0
  53. package/dist/lib/tools/validate-skill-frontmatter.js +118 -0
  54. package/dist/lib/types.d.ts +82 -0
  55. package/dist/lib/types.js +2 -0
  56. package/dist/lib/updater.d.ts +34 -0
  57. package/dist/lib/updater.js +112 -0
  58. package/dist/lib/utils/format.d.ts +2 -0
  59. package/dist/lib/utils/format.js +6 -0
  60. package/dist/lib/utils/terminal.d.ts +12 -0
  61. package/dist/lib/utils/terminal.js +26 -0
  62. package/docs-to-voice/SKILL.md +0 -1
  63. package/generate-spec/SKILL.md +1 -1
  64. package/katex/SKILL.md +1 -2
  65. package/lib/cli.ts +780 -0
  66. package/lib/installer.ts +466 -0
  67. package/lib/tool-runner.ts +561 -0
  68. package/lib/tools/architecture.ts +34 -0
  69. package/lib/tools/create-specs.ts +204 -0
  70. package/lib/tools/docs-to-voice.ts +799 -0
  71. package/lib/tools/enforce-video-aspect-ratio.ts +368 -0
  72. package/lib/tools/extract-conversations.ts +114 -0
  73. package/lib/tools/extract-pdf-text.ts +99 -0
  74. package/lib/tools/filter-logs.ts +118 -0
  75. package/lib/tools/find-github-issues.ts +211 -0
  76. package/lib/tools/generate-storyboard-images.ts +455 -0
  77. package/lib/tools/log-cli-utils.ts +262 -0
  78. package/lib/tools/open-github-issue.ts +930 -0
  79. package/lib/tools/read-github-issue.ts +179 -0
  80. package/lib/tools/render-error-book.ts +300 -0
  81. package/lib/tools/render-katex.ts +325 -0
  82. package/lib/tools/review-threads.ts +590 -0
  83. package/lib/tools/search-logs.ts +200 -0
  84. package/lib/tools/sync-memory-index.ts +114 -0
  85. package/lib/tools/validate-openai-agent-config.ts +209 -0
  86. package/lib/tools/validate-skill-frontmatter.ts +124 -0
  87. package/lib/types.ts +90 -0
  88. package/lib/updater.ts +165 -0
  89. package/lib/utils/format.ts +7 -0
  90. package/lib/utils/terminal.ts +22 -0
  91. package/open-github-issue/SKILL.md +2 -2
  92. package/optimise-skill/SKILL.md +1 -1
  93. package/package.json +13 -4
  94. package/resources/project-architecture/assets/architecture.css +764 -0
  95. package/resources/project-architecture/assets/viewer.client.js +144 -0
  96. package/resources/project-architecture/index.html +42 -0
  97. package/review-spec-related-changes/SKILL.md +1 -1
  98. package/solve-issues-found-during-review/SKILL.md +2 -1
  99. package/tsconfig.json +28 -0
  100. package/analyse-app-logs/scripts/__pycache__/filter_logs_by_time.cpython-312.pyc +0 -0
  101. package/analyse-app-logs/scripts/__pycache__/log_cli_utils.cpython-312.pyc +0 -0
  102. package/analyse-app-logs/scripts/__pycache__/search_logs.cpython-312.pyc +0 -0
  103. package/analyse-app-logs/scripts/filter_logs_by_time.py +0 -64
  104. package/analyse-app-logs/scripts/log_cli_utils.py +0 -112
  105. package/analyse-app-logs/scripts/search_logs.py +0 -137
  106. package/analyse-app-logs/tests/test_filter_logs_by_time.py +0 -95
  107. package/analyse-app-logs/tests/test_search_logs.py +0 -100
  108. package/codex/codex-memory-manager/scripts/extract_recent_conversations.py +0 -369
  109. package/codex/codex-memory-manager/scripts/sync_memory_index.py +0 -130
  110. package/codex/codex-memory-manager/tests/test_extract_recent_conversations.py +0 -177
  111. package/codex/codex-memory-manager/tests/test_memory_template.py +0 -37
  112. package/codex/codex-memory-manager/tests/test_sync_memory_index.py +0 -84
  113. package/codex/learn-skill-from-conversations/scripts/extract_recent_conversations.py +0 -369
  114. package/codex/learn-skill-from-conversations/tests/test_extract_recent_conversations.py +0 -177
  115. package/docs-to-voice/scripts/__pycache__/docs_to_voice.cpython-312.pyc +0 -0
  116. package/docs-to-voice/scripts/docs_to_voice.py +0 -1385
  117. package/docs-to-voice/scripts/docs_to_voice.sh +0 -11
  118. package/docs-to-voice/tests/test_docs_to_voice_api_max_chars.py +0 -210
  119. package/docs-to-voice/tests/test_docs_to_voice_sentence_timeline.py +0 -115
  120. package/docs-to-voice/tests/test_docs_to_voice_settings.py +0 -43
  121. package/docs-to-voice/tests/test_docs_to_voice_shell_wrapper.py +0 -51
  122. package/docs-to-voice/tests/test_docs_to_voice_speech_rate.py +0 -57
  123. package/generate-spec/scripts/__pycache__/create-specscpython-312.pyc +0 -0
  124. package/generate-spec/scripts/create-specs +0 -215
  125. package/generate-spec/tests/test_create_specs.py +0 -200
  126. package/init-project-html/scripts/architecture-bootstrap-render.js +0 -16
  127. package/init-project-html/scripts/architecture.js +0 -296
  128. package/katex/scripts/__pycache__/render_katex.cpython-312.pyc +0 -0
  129. package/katex/scripts/render_katex.py +0 -247
  130. package/katex/scripts/render_katex.sh +0 -11
  131. package/katex/tests/test_render_katex.py +0 -174
  132. package/learning-error-book/scripts/render_error_book_json_to_pdf.py +0 -590
  133. package/learning-error-book/tests/test_render_error_book_json_to_pdf.py +0 -134
  134. package/open-github-issue/scripts/__pycache__/open_github_issue.cpython-312.pyc +0 -0
  135. package/open-github-issue/scripts/open_github_issue.py +0 -705
  136. package/open-github-issue/tests/test_open_github_issue.py +0 -381
  137. package/openai-text-to-image-storyboard/scripts/generate_storyboard_images.py +0 -763
  138. package/openai-text-to-image-storyboard/tests/test_generate_storyboard_images.py +0 -177
  139. package/read-github-issue/scripts/__pycache__/find_issues.cpython-312.pyc +0 -0
  140. package/read-github-issue/scripts/__pycache__/read_issue.cpython-312.pyc +0 -0
  141. package/read-github-issue/scripts/find_issues.py +0 -148
  142. package/read-github-issue/scripts/read_issue.py +0 -108
  143. package/read-github-issue/tests/test_find_issues.py +0 -127
  144. package/read-github-issue/tests/test_read_issue.py +0 -109
  145. package/resolve-review-comments/scripts/__pycache__/review_threads.cpython-312.pyc +0 -0
  146. package/resolve-review-comments/scripts/review_threads.py +0 -425
  147. package/resolve-review-comments/tests/test_review_threads.py +0 -74
  148. package/scripts/validate_openai_agent_config.py +0 -209
  149. package/scripts/validate_skill_frontmatter.py +0 -131
  150. package/text-to-short-video/scripts/__pycache__/enforce_video_aspect_ratio.cpython-312.pyc +0 -0
  151. package/text-to-short-video/scripts/enforce_video_aspect_ratio.py +0 -350
  152. package/text-to-short-video/tests/test_enforce_video_aspect_ratio.py +0 -194
  153. package/weekly-financial-event-report/scripts/extract_pdf_text_pdfkit.swift +0 -99
  154. package/weekly-financial-event-report/tests/test_extract_pdf_text_pdfkit.py +0 -64
@@ -0,0 +1,799 @@
1
+ import { spawn, execSync } from 'node:child_process';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import https from 'node:https';
5
+ import http from 'node:http';
6
+ import type { ToolContext } from '../types';
7
+
8
+ const DEFAULT_API_ENDPOINT =
9
+ 'https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation';
10
+ const DEFAULT_API_MODEL = 'qwen3-tts';
11
+ const DEFAULT_API_VOICE = 'Cherry';
12
+
13
+ interface DocsToVoiceArgs {
14
+ inputText: string | null;
15
+ inputFile: string | null;
16
+ projectDir: string;
17
+ projectName: string | null;
18
+ outputName: string | null;
19
+ mode: string;
20
+ voice: string | null;
21
+ rate: string | null;
22
+ speechRate: string | null;
23
+ apiEndpoint: string;
24
+ apiModel: string;
25
+ apiVoice: string;
26
+ apiKey: string | null;
27
+ maxChars: string | null;
28
+ noAutoProsody: boolean;
29
+ force: boolean;
30
+ help: boolean;
31
+ }
32
+
33
+ interface TimelineEntry {
34
+ index: number;
35
+ text: string;
36
+ startSeconds: number;
37
+ endSeconds: number;
38
+ startMs: number;
39
+ endMs: number;
40
+ }
41
+
42
+ function parseArgs(args: string[]): DocsToVoiceArgs {
43
+ const parsed: DocsToVoiceArgs = {
44
+ inputText: null,
45
+ inputFile: null,
46
+ projectDir: '.',
47
+ projectName: null,
48
+ outputName: null,
49
+ mode: 'say',
50
+ voice: null,
51
+ rate: null,
52
+ speechRate: null,
53
+ apiEndpoint: DEFAULT_API_ENDPOINT,
54
+ apiModel: DEFAULT_API_MODEL,
55
+ apiVoice: DEFAULT_API_VOICE,
56
+ apiKey: null,
57
+ maxChars: null,
58
+ noAutoProsody: false,
59
+ force: false,
60
+ help: false,
61
+ };
62
+
63
+ for (let i = 0; i < args.length; i++) {
64
+ const arg = args[i];
65
+ if (arg === '--help' || arg === '-h') {
66
+ parsed.help = true;
67
+ continue;
68
+ }
69
+ if (arg.startsWith('--')) {
70
+ const eqIndex = arg.indexOf('=');
71
+ let key: string;
72
+ let value: string;
73
+
74
+ if (eqIndex !== -1) {
75
+ key = arg.slice(2, eqIndex);
76
+ value = arg.slice(eqIndex + 1);
77
+ } else {
78
+ key = arg.slice(2);
79
+ value = args[++i] || '';
80
+ }
81
+
82
+ switch (key) {
83
+ case 'text':
84
+ parsed.inputText = value;
85
+ break;
86
+ case 'input':
87
+ case 'input-file':
88
+ parsed.inputFile = value;
89
+ break;
90
+ case 'project-dir':
91
+ parsed.projectDir = value;
92
+ break;
93
+ case 'project-name':
94
+ parsed.projectName = value;
95
+ break;
96
+ case 'output-name':
97
+ parsed.outputName = value;
98
+ break;
99
+ case 'engine':
100
+ case 'mode':
101
+ parsed.mode = value.toLowerCase();
102
+ break;
103
+ case 'voice':
104
+ parsed.voice = value;
105
+ break;
106
+ case 'rate':
107
+ parsed.rate = value;
108
+ break;
109
+ case 'speech-rate':
110
+ parsed.speechRate = value;
111
+ break;
112
+ case 'api-endpoint':
113
+ parsed.apiEndpoint = value;
114
+ break;
115
+ case 'api-model':
116
+ parsed.apiModel = value;
117
+ break;
118
+ case 'api-voice':
119
+ parsed.apiVoice = value;
120
+ break;
121
+ case 'api-key':
122
+ parsed.apiKey = value;
123
+ break;
124
+ case 'max-chars':
125
+ parsed.maxChars = value;
126
+ break;
127
+ case 'no-auto-prosody':
128
+ parsed.noAutoProsody = true;
129
+ break;
130
+ case 'force':
131
+ parsed.force = true;
132
+ break;
133
+ }
134
+ }
135
+ }
136
+
137
+ return parsed;
138
+ }
139
+
140
+ function readInputText(opts: DocsToVoiceArgs): string {
141
+ if (opts.inputFile) {
142
+ const inputPath = path.resolve(opts.inputFile);
143
+ if (!fs.existsSync(inputPath)) {
144
+ throw new Error(`Input file not found: ${inputPath}`);
145
+ }
146
+ return fs.readFileSync(inputPath, 'utf-8');
147
+ }
148
+ return opts.inputText || '';
149
+ }
150
+
151
+ function splitSentences(rawText: string): string[] {
152
+ const endings = new Set(['。', '!', '?', '!', '?', ';', ';']);
153
+ const sentences: string[] = [];
154
+
155
+ for (const rawLine of rawText.split('\n')) {
156
+ const line = rawLine.trim();
157
+ if (!line) continue;
158
+
159
+ let current: string[] = [];
160
+ for (const char of line) {
161
+ current.push(char);
162
+ if (endings.has(char)) {
163
+ const sentence = current.join('').trim();
164
+ if (sentence) sentences.push(sentence);
165
+ current = [];
166
+ }
167
+ }
168
+ const tail = current.join('').trim();
169
+ if (tail) sentences.push(tail);
170
+ }
171
+
172
+ return sentences;
173
+ }
174
+
175
+ function sentenceWeight(sentence: string): number {
176
+ const compact = sentence.replace(/\s+/g, '');
177
+ if (!compact) return 1.0;
178
+
179
+ let total = 0.0;
180
+ for (const char of compact) {
181
+ if (/[A-Za-z0-9]/.test(char)) {
182
+ total += 0.55;
183
+ } else if (/[一-鿿]/.test(char)) {
184
+ total += 1.0;
185
+ } else if (',,、::'.includes(char)) {
186
+ total += 0.25;
187
+ } else if ('。.!!??;;'.includes(char)) {
188
+ total += 0.45;
189
+ } else {
190
+ total += 0.65;
191
+ }
192
+ }
193
+ return Math.max(total, 1.0);
194
+ }
195
+
196
+ function srtTime(seconds: number): string {
197
+ const ms = Math.max(0, Math.round(seconds * 1000));
198
+ const h = Math.floor(ms / 3600000);
199
+ const m = Math.floor((ms % 3600000) / 60000);
200
+ const s = Math.floor((ms % 60000) / 1000);
201
+ const ml = ms % 1000;
202
+ return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')},${String(ml).padStart(3, '0')}`;
203
+ }
204
+
205
+ function readDurationSeconds(filePath: string): number | null {
206
+ try {
207
+ const ext = path.extname(filePath).toLowerCase();
208
+ if (ext === '.wav') {
209
+ const header = Buffer.alloc(44);
210
+ const fd = fs.openSync(filePath, 'r');
211
+ fs.readSync(fd, header, 0, 44, 0);
212
+ fs.closeSync(fd);
213
+ const dataSize = header.readUInt32LE(40);
214
+ const sampleRate = header.readUInt32LE(24);
215
+ const channels = header.readUInt16LE(22);
216
+ const bitsPerSample = header.readUInt16LE(34);
217
+ const bytesPerSec = sampleRate * channels * (bitsPerSample / 8);
218
+ if (bytesPerSec > 0) {
219
+ return dataSize / bytesPerSec;
220
+ }
221
+ }
222
+ } catch {
223
+ // fallback to afinfo
224
+ }
225
+
226
+ // Try afinfo on macOS
227
+ try {
228
+ const output = execSync(`afinfo "${filePath}" 2>/dev/null`, {
229
+ encoding: 'utf-8',
230
+ timeout: 5000,
231
+ });
232
+ const match = output.match(/estimated duration:\s*([0-9.]+)\s*sec/i) ||
233
+ output.match(/duration:\s*([0-9.]+)\s*sec/i);
234
+ if (match) return parseFloat(match[1]);
235
+ } catch {
236
+ // ignore
237
+ }
238
+
239
+ return null;
240
+ }
241
+
242
+ function writeTimelineFiles(
243
+ sourceText: string,
244
+ audioPath: string,
245
+ sentenceDurations: number[] | null,
246
+ ): void {
247
+ const sentences = splitSentences(sourceText);
248
+ if (sentences.length === 0) {
249
+ const stripped = sourceText.trim();
250
+ if (stripped) sentences.push(stripped);
251
+ }
252
+ if (sentences.length === 0) return;
253
+
254
+ const durationSeconds = readDurationSeconds(audioPath) || sentences.length * 2;
255
+
256
+ const entries: TimelineEntry[] = [];
257
+ let cursor = 0;
258
+
259
+ if (sentenceDurations && sentenceDurations.length === sentences.length) {
260
+ const totalDuration = sentenceDurations.reduce((a, b) => a + b, 0);
261
+ const scale = totalDuration > 0 ? durationSeconds / totalDuration : 1;
262
+
263
+ for (let i = 0; i < sentences.length; i++) {
264
+ const end = i === sentences.length - 1
265
+ ? durationSeconds
266
+ : cursor + sentenceDurations[i] * scale;
267
+ entries.push({
268
+ index: i + 1,
269
+ text: sentences[i],
270
+ startSeconds: Math.round(cursor * 1000) / 1000,
271
+ endSeconds: Math.round(Math.max(end, cursor) * 1000) / 1000,
272
+ startMs: Math.round(cursor * 1000),
273
+ endMs: Math.round(Math.max(end, cursor) * 1000),
274
+ });
275
+ cursor = Math.max(end, cursor);
276
+ }
277
+ } else {
278
+ const weights = sentences.map(sentenceWeight);
279
+ const totalWeight = weights.reduce((a, b) => a + b, 0) || sentences.length;
280
+
281
+ for (let i = 0; i < sentences.length; i++) {
282
+ const end = i === sentences.length - 1
283
+ ? durationSeconds
284
+ : cursor + (durationSeconds * weights[i] / totalWeight);
285
+ entries.push({
286
+ index: i + 1,
287
+ text: sentences[i],
288
+ startSeconds: Math.round(cursor * 1000) / 1000,
289
+ endSeconds: Math.round(Math.max(end, cursor) * 1000) / 1000,
290
+ startMs: Math.round(cursor * 1000),
291
+ endMs: Math.round(Math.max(end, cursor) * 1000),
292
+ });
293
+ cursor = Math.max(end, cursor);
294
+ }
295
+ }
296
+
297
+ // Ensure last entry ends at total duration
298
+ if (entries.length > 0) {
299
+ entries[entries.length - 1].endSeconds = Math.round(durationSeconds * 1000) / 1000;
300
+ entries[entries.length - 1].endMs = Math.round(durationSeconds * 1000);
301
+ }
302
+
303
+ const timelineBase = audioPath.replace(/\.[^.]+$/, '');
304
+
305
+ // Write JSON timeline
306
+ const jsonPayload = {
307
+ audio_file: path.basename(audioPath),
308
+ audio_path: audioPath,
309
+ audio_duration_seconds: Math.round(durationSeconds * 1000) / 1000,
310
+ timing_mode: sentenceDurations ? 'sentence-audio' : 'estimated',
311
+ generated_at: new Date().toISOString(),
312
+ sentences: entries,
313
+ };
314
+ fs.writeFileSync(`${timelineBase}.timeline.json`, JSON.stringify(jsonPayload, null, 2) + '\n', 'utf-8');
315
+
316
+ // Write SRT
317
+ const srtLines: string[] = [];
318
+ for (const entry of entries) {
319
+ srtLines.push(String(entry.index));
320
+ srtLines.push(`${srtTime(entry.startSeconds)} --> ${srtTime(entry.endSeconds)}`);
321
+ srtLines.push(entry.text);
322
+ srtLines.push('');
323
+ }
324
+ fs.writeFileSync(`${timelineBase}.srt`, srtLines.join('\n').trim() + '\n', 'utf-8');
325
+ }
326
+
327
+ function buildAutoProsodyText(rawText: string): string {
328
+ return rawText
329
+ .replace(/\n{2,}/g, ' [[slnc 260]] ')
330
+ .replace(/\n/g, ' [[slnc 90]] ')
331
+ .replace(/[,,、::]/g, (m) => `${m} [[slnc 120]] `)
332
+ .replace(/[。.]/g, (m) => `${m} [[slnc 180]] `)
333
+ .replace(/[??]/g, (m) => `${m} [[slnc 190]] `)
334
+ .replace(/[!!]/g, (m) => `${m} [[slnc 150]] `)
335
+ .replace(/[ \t]{2,}/g, ' ');
336
+ }
337
+
338
+ function applySpeechRateToAudio(outputPath: string, speechRate: number): void {
339
+ if (Math.abs(speechRate - 1.0) < 1e-9) return;
340
+
341
+ const tmpPath = `${outputPath}.rate_tmp${path.extname(outputPath)}`;
342
+ try {
343
+ execSync(
344
+ `ffmpeg -hide_banner -loglevel error -y -i "${outputPath}" -filter:a "atempo=${speechRate}" "${tmpPath}"`,
345
+ { stdio: 'ignore', timeout: 120000 },
346
+ );
347
+ if (fs.existsSync(tmpPath) && fs.statSync(tmpPath).size > 0) {
348
+ fs.renameSync(tmpPath, outputPath);
349
+ }
350
+ } catch (err: unknown) {
351
+ if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
352
+ throw new Error(
353
+ `ffmpeg failed while applying --speech-rate: ${err instanceof Error ? err.message : 'unknown error'}`,
354
+ );
355
+ }
356
+ }
357
+
358
+ function splitTextForTts(text: string, maxChars: number | null): string[] {
359
+ text = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n').trim();
360
+ if (!text) return [];
361
+
362
+ if (!maxChars || text.length <= maxChars) return [text];
363
+
364
+ const chunks: string[] = [];
365
+ const paragraphs = text.split(/\n{2,}/).map((p) => p.trim()).filter(Boolean);
366
+
367
+ for (const paragraph of paragraphs) {
368
+ const sentences = paragraph
369
+ .split(/(?<=[。!?!?;;.!?])/)
370
+ .map((s) => s.trim())
371
+ .filter(Boolean);
372
+
373
+ let current = '';
374
+ for (const sentence of sentences) {
375
+ if (sentence.length > maxChars) {
376
+ if (current) {
377
+ chunks.push(current);
378
+ current = '';
379
+ }
380
+ // split oversized sentence
381
+ for (let i = 0; i < sentence.length; i += maxChars) {
382
+ chunks.push(sentence.slice(i, i + maxChars));
383
+ }
384
+ continue;
385
+ }
386
+
387
+ const candidate = current ? `${current} ${sentence}` : sentence;
388
+ if (candidate.length <= maxChars) {
389
+ current = candidate;
390
+ } else {
391
+ chunks.push(current);
392
+ current = sentence;
393
+ }
394
+ }
395
+ if (current) chunks.push(current);
396
+ }
397
+
398
+ return chunks;
399
+ }
400
+
401
+ function concatAudioFiles(partPaths: string[], outputPath: string): void {
402
+ if (partPaths.length === 0) {
403
+ throw new Error('No chunk audio generated for concatenation.');
404
+ }
405
+ if (partPaths.length === 1) {
406
+ fs.copyFileSync(partPaths[0], outputPath);
407
+ return;
408
+ }
409
+
410
+ // Use ffmpeg concat
411
+ const listContent = partPaths.map((p) => `file '${p.replace(/'/g, "'\\''")}'`).join('\n');
412
+ const listFile = path.join(fs.mkdtempSync('docs-to-voice-'), 'concat.txt');
413
+ fs.mkdirSync(path.dirname(listFile), { recursive: true });
414
+ fs.writeFileSync(listFile, listContent + '\n', 'utf-8');
415
+
416
+ try {
417
+ execSync(
418
+ `ffmpeg -hide_banner -loglevel error -y -f concat -safe 0 -i "${listFile}" -c:a copy "${outputPath}"`,
419
+ { stdio: 'ignore', timeout: 120000 },
420
+ );
421
+ } catch (err: unknown) {
422
+ throw new Error(
423
+ `ffmpeg concat failed: ${err instanceof Error ? err.message : 'unknown error'}`,
424
+ );
425
+ } finally {
426
+ try { fs.unlinkSync(listFile); fs.rmdirSync(path.dirname(listFile)); } catch { /* ignore */ }
427
+ }
428
+ }
429
+
430
+ function downloadBinary(url: string, outputPath: string): Promise<void> {
431
+ return new Promise((resolve, reject) => {
432
+ const protocol = url.startsWith('https') ? https : http;
433
+ protocol.get(url, { timeout: 300000 }, (response) => {
434
+ const chunks: Buffer[] = [];
435
+ response.on('data', (chunk: Buffer) => chunks.push(chunk));
436
+ response.on('end', () => {
437
+ fs.writeFileSync(outputPath, Buffer.concat(chunks));
438
+ resolve();
439
+ });
440
+ response.on('error', reject);
441
+ }).on('error', reject);
442
+ });
443
+ }
444
+
445
+ function requestAlibabaCloudTTS(
446
+ endpoint: string,
447
+ apiKey: string,
448
+ model: string,
449
+ voice: string,
450
+ text: string,
451
+ ): Promise<{ audioUrl?: string; audioData?: string; audioFormat?: string }> {
452
+ return new Promise((resolve, reject) => {
453
+ const payload = JSON.stringify({
454
+ model,
455
+ input: { text, voice },
456
+ });
457
+
458
+ const urlObj = new URL(endpoint);
459
+ const client = urlObj.protocol === 'https:' ? https : http;
460
+
461
+ const options = {
462
+ hostname: urlObj.hostname,
463
+ port: urlObj.port || (urlObj.protocol === 'https:' ? 443 : 80),
464
+ path: urlObj.pathname,
465
+ method: 'POST',
466
+ headers: {
467
+ 'Authorization': `Bearer ${apiKey}`,
468
+ 'Content-Type': 'application/json',
469
+ },
470
+ timeout: 300000,
471
+ };
472
+
473
+ const req = client.request(options, (res) => {
474
+ const chunks: Buffer[] = [];
475
+ res.on('data', (chunk: Buffer) => chunks.push(chunk));
476
+ res.on('end', () => {
477
+ const rawPayload = Buffer.concat(chunks).toString('utf-8');
478
+ try {
479
+ const responseJson = JSON.parse(rawPayload);
480
+ const output = responseJson.output || {};
481
+ const audio = output.audio || {};
482
+ const audioUrl = audio.url || '';
483
+ const audioData = audio.data || '';
484
+ const audioFormat = audio.format || audio.mime_type || '';
485
+
486
+ if (!audioUrl && !audioData) {
487
+ reject(new Error('API response does not contain output.audio.url or output.audio.data'));
488
+ return;
489
+ }
490
+
491
+ resolve({ audioUrl, audioData, audioFormat });
492
+ } catch {
493
+ reject(new Error('API response is not valid JSON.'));
494
+ }
495
+ });
496
+ res.on('error', reject);
497
+ });
498
+
499
+ req.on('error', reject);
500
+ req.on('timeout', () => { req.destroy(); reject(new Error('API request timed out')); });
501
+ req.write(payload);
502
+ req.end();
503
+ });
504
+ }
505
+
506
+ export async function docsToVoiceHandler(args: string[], context: ToolContext): Promise<number> {
507
+ const stdout = context.stdout || process.stdout;
508
+ const stderr = context.stderr || process.stderr;
509
+
510
+ try {
511
+ const opts = parseArgs(args);
512
+
513
+ if (opts.help) {
514
+ stdout.write(`Usage: apltk docs-to-voice [options]
515
+
516
+ Convert text into audio and sentence timelines.
517
+
518
+ Options:
519
+ --input, --input-file <path> Path to input text file
520
+ --text <string> Raw text input
521
+ --project-dir <path> Root project directory (default: .)
522
+ --project-name <name> Folder name under DIR/audio/
523
+ --output-name <name> Output filename
524
+ --engine, --mode <mode> TTS mode: say (default) | api
525
+ --voice <name> macOS say voice
526
+ --rate <wpm> macOS say rate
527
+ --speech-rate <factor> Speech rate multiplier (e.g. 1.2)
528
+ --api-endpoint <url> Alibaba Cloud TTS endpoint
529
+ --api-model <name> Alibaba Cloud model (default: qwen3-tts)
530
+ --api-voice <name> Alibaba Cloud voice (default: Cherry)
531
+ --api-key <key> Alibaba Cloud API key
532
+ --max-chars <n> Max chars per TTS chunk (0 disables)
533
+ --no-auto-prosody Disable punctuation pause enhancement
534
+ --force Overwrite existing files
535
+ `);
536
+ return 0;
537
+ }
538
+
539
+ if (opts.mode !== 'say' && opts.mode !== 'api') {
540
+ stderr.write('Error: --mode must be one of: say, api\n');
541
+ return 1;
542
+ }
543
+
544
+ const sourceText = readInputText(opts);
545
+ if (!sourceText.trim()) {
546
+ stderr.write('Error: No text content found for conversion.\n');
547
+ return 1;
548
+ }
549
+
550
+ // Resolve output directory
551
+ const projectDir = path.resolve(opts.projectDir);
552
+ const projectName = opts.projectName || path.basename(projectDir);
553
+ if (!projectName) {
554
+ stderr.write('Error: Unable to determine project name.\n');
555
+ return 1;
556
+ }
557
+
558
+ const outputDir = path.join(projectDir, 'audio', projectName);
559
+ fs.mkdirSync(outputDir, { recursive: true });
560
+
561
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
562
+ const outputName = opts.outputName || `voice-${timestamp}`;
563
+ const hasExtension = outputName.includes('.');
564
+
565
+ if (opts.mode === 'say') {
566
+ // macOS say mode
567
+ const textChunks = splitTextForTts(sourceText, opts.maxChars ? parseInt(opts.maxChars, 10) || null : null);
568
+ if (textChunks.length === 0) {
569
+ stderr.write('Error: No text content found for conversion.\n');
570
+ return 1;
571
+ }
572
+
573
+ // Check if `say` is available
574
+ try {
575
+ execSync('which say', { stdio: 'ignore' });
576
+ } catch {
577
+ stderr.write("Error: macOS 'say' command not found.\n");
578
+ return 1;
579
+ }
580
+
581
+ const finalOutputName = hasExtension ? outputName : `${outputName}.aiff`;
582
+ const outputPath = path.join(outputDir, finalOutputName);
583
+
584
+ if (fs.existsSync(outputPath) && !opts.force) {
585
+ stderr.write(`Error: Output already exists: ${outputPath}. Use --force to overwrite.\n`);
586
+ return 1;
587
+ }
588
+
589
+ // Build prosody-enhanced text
590
+ const chunks = opts.noAutoProsody ? textChunks : textChunks.map(buildAutoProsodyText);
591
+
592
+ if (chunks.length === 1) {
593
+ // Single say command
594
+ const sayArgs = ['-o', outputPath];
595
+ if (opts.voice) sayArgs.push('-v', opts.voice);
596
+ if (opts.rate) sayArgs.push('-r', opts.rate);
597
+
598
+ const tmpFile = path.join(fs.mkdtempSync('docs-to-voice-'), 'input.txt');
599
+ fs.mkdirSync(path.dirname(tmpFile), { recursive: true });
600
+ fs.writeFileSync(tmpFile, chunks[0], 'utf-8');
601
+ sayArgs.push('-f', tmpFile);
602
+
603
+ try {
604
+ execSync(`say ${sayArgs.map((a) => (a.includes(' ') ? `"${a}"` : a)).join(' ')}`, {
605
+ stdio: 'ignore',
606
+ timeout: 300000,
607
+ });
608
+ } catch (err: unknown) {
609
+ const msg = err instanceof Error ? err.message : 'unknown error';
610
+ throw new Error(`say mode failed: ${msg}`);
611
+ } finally {
612
+ try { fs.unlinkSync(tmpFile); fs.rmdirSync(path.dirname(tmpFile)); } catch { /* ignore */ }
613
+ }
614
+ } else {
615
+ // Multiple chunks: generate then concat
616
+ const tempDir = fs.mkdtempSync('docs-to-voice-say-');
617
+ const partPaths: string[] = [];
618
+ const partExt = path.extname(outputPath) || '.aiff';
619
+
620
+ try {
621
+ for (let i = 0; i < chunks.length; i++) {
622
+ const partPath = path.join(tempDir, `part-${String(i + 1).padStart(4, '0')}${partExt}`);
623
+ const sayArgs = ['-o', partPath];
624
+ if (opts.voice) sayArgs.push('-v', opts.voice);
625
+ if (opts.rate) sayArgs.push('-r', opts.rate);
626
+
627
+ const tmpFile = path.join(tempDir, `chunk-${i}.txt`);
628
+ fs.writeFileSync(tmpFile, chunks[i], 'utf-8');
629
+ sayArgs.push('-f', tmpFile);
630
+
631
+ execSync(
632
+ `say ${sayArgs.map((a) => (a.includes(' ') ? `"${a}"` : a)).join(' ')}`,
633
+ { stdio: 'ignore', timeout: 300000 },
634
+ );
635
+ partPaths.push(partPath);
636
+ }
637
+
638
+ concatAudioFiles(partPaths, outputPath);
639
+ } finally {
640
+ try { fs.rmSync(tempDir, { recursive: true }); } catch { /* ignore */ }
641
+ }
642
+ }
643
+
644
+ // Apply speech rate if requested
645
+ if (opts.speechRate) {
646
+ const rate = parseFloat(opts.speechRate);
647
+ if (rate > 0) applySpeechRateToAudio(outputPath, rate);
648
+ }
649
+
650
+ // Write timeline files
651
+ writeTimelineFiles(sourceText, outputPath, null);
652
+ stdout.write(`${outputPath}\n`);
653
+ } else {
654
+ // API mode
655
+ const apiKey = opts.apiKey;
656
+ if (!apiKey) {
657
+ stderr.write('Error: --api-key is required for api mode.\n');
658
+ return 1;
659
+ }
660
+
661
+ const sentences = splitSentences(sourceText);
662
+ if (sentences.length === 0) {
663
+ stderr.write('Error: No text content found for conversion.\n');
664
+ return 1;
665
+ }
666
+
667
+ const maxChars = opts.maxChars ? parseInt(opts.maxChars, 10) || null : null;
668
+
669
+ // Build request items from sentences
670
+ interface RequestItem {
671
+ sentenceIndex: number;
672
+ text: string;
673
+ }
674
+ const requestItems: RequestItem[] = [];
675
+ for (let si = 0; si < sentences.length; si++) {
676
+ const sentence = sentences[si];
677
+ if (maxChars && sentence.length > maxChars) {
678
+ for (let i = 0; i < sentence.length; i += maxChars) {
679
+ requestItems.push({ sentenceIndex: si, text: sentence.slice(i, i + maxChars) });
680
+ }
681
+ } else {
682
+ requestItems.push({ sentenceIndex: si, text: sentence });
683
+ }
684
+ }
685
+
686
+ if (requestItems.length === 0) {
687
+ stderr.write('Error: No text content found for conversion.\n');
688
+ return 1;
689
+ }
690
+
691
+ const tempDir = fs.mkdtempSync('docs-to-voice-api-');
692
+ const partPaths: string[] = [];
693
+ let partExt = '';
694
+ const sentenceDurations = new Array(sentences.length).fill(0);
695
+ const sentenceDurationKnown = new Array(sentences.length).fill(true);
696
+
697
+ try {
698
+ for (let i = 0; i < requestItems.length; i++) {
699
+ const item = requestItems[i];
700
+ const apiResult = await requestAlibabaCloudTTS(
701
+ opts.apiEndpoint,
702
+ apiKey,
703
+ opts.apiModel,
704
+ opts.apiVoice,
705
+ item.text,
706
+ );
707
+
708
+ const currentExt = apiResult.audioFormat || 'wav';
709
+ if (!partExt) partExt = currentExt;
710
+
711
+ const partPath = path.join(tempDir, `part-${String(i + 1).padStart(4, '0')}.${currentExt}`);
712
+ if (apiResult.audioUrl) {
713
+ await downloadBinary(apiResult.audioUrl, partPath);
714
+ } else if (apiResult.audioData) {
715
+ fs.writeFileSync(partPath, Buffer.from(apiResult.audioData, 'base64'));
716
+ } else {
717
+ throw new Error('No audio data in API response.');
718
+ }
719
+
720
+ if (!fs.existsSync(partPath) || fs.statSync(partPath).size === 0) {
721
+ throw new Error(`Failed to generate audio chunk ${i + 1}.`);
722
+ }
723
+ partPaths.push(partPath);
724
+
725
+ const partDuration = readDurationSeconds(partPath);
726
+ if (partDuration === null || partDuration <= 0) {
727
+ sentenceDurationKnown[item.sentenceIndex] = false;
728
+ } else {
729
+ sentenceDurations[item.sentenceIndex] += partDuration;
730
+ }
731
+ }
732
+
733
+ const finalOutputName = hasExtension
734
+ ? outputName
735
+ : `${outputName}.${partExt || 'wav'}`;
736
+ const outputPath = path.join(outputDir, finalOutputName);
737
+
738
+ if (fs.existsSync(outputPath) && !opts.force) {
739
+ stderr.write(`Error: Output already exists: ${outputPath}. Use --force to overwrite.\n`);
740
+ return 1;
741
+ }
742
+
743
+ concatAudioFiles(partPaths, outputPath);
744
+
745
+ // Build timeline durations
746
+ let timelineDurations: number[] | null = null;
747
+ const unknownIndexes = sentenceDurationKnown
748
+ .map((known, idx) => (known ? -1 : idx))
749
+ .filter((idx) => idx >= 0);
750
+
751
+ if (unknownIndexes.length === 0 && sentenceDurations.reduce((a, b) => a + b, 0) > 0) {
752
+ timelineDurations = sentenceDurations;
753
+ } else if (unknownIndexes.length > 0) {
754
+ const outputDuration = readDurationSeconds(outputPath);
755
+ const knownTotal = sentenceDurations.reduce(
756
+ (sum, val, idx) => (sentenceDurationKnown[idx] ? sum + val : sum),
757
+ 0,
758
+ );
759
+
760
+ if (outputDuration && outputDuration > knownTotal) {
761
+ const remaining = outputDuration - knownTotal;
762
+ const unknownWeights = unknownIndexes.map((idx) => sentenceWeight(sentences[idx]));
763
+ const totalUnknownWeight = unknownWeights.reduce((a, b) => a + b, 0);
764
+
765
+ if (totalUnknownWeight > 0) {
766
+ for (let wi = 0; wi < unknownIndexes.length; wi++) {
767
+ sentenceDurations[unknownIndexes[wi]] +=
768
+ remaining * (unknownWeights[wi] / totalUnknownWeight);
769
+ }
770
+ timelineDurations = sentenceDurations;
771
+ }
772
+ }
773
+ }
774
+
775
+ // Apply speech rate if requested
776
+ if (opts.speechRate) {
777
+ const rate = parseFloat(opts.speechRate);
778
+ if (rate > 0) {
779
+ applySpeechRateToAudio(outputPath, rate);
780
+ if (timelineDurations) {
781
+ timelineDurations = timelineDurations.map((d) => d / rate);
782
+ }
783
+ }
784
+ }
785
+
786
+ writeTimelineFiles(sourceText, outputPath, timelineDurations);
787
+ stdout.write(`${outputPath}\n`);
788
+ } finally {
789
+ try { fs.rmSync(tempDir, { recursive: true }); } catch { /* ignore */ }
790
+ }
791
+ }
792
+
793
+ return 0;
794
+ } catch (err: unknown) {
795
+ const msg = err instanceof Error ? err.message : 'Unknown error';
796
+ stderr.write(`Error: ${msg}\n`);
797
+ return 1;
798
+ }
799
+ }