@storyteller-platform/align 0.1.41 → 0.1.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/bin.cjs CHANGED
@@ -359,13 +359,12 @@ async function main() {
359
359
  if (parsed.textRef === "id-fragment") {
360
360
  logger.info("Marking up EPUB...");
361
361
  startProgressBar();
362
- const markedup2 = parsed.markedup ?? (0, import_node_path.join)(os.tmpdir(), `stalign-markedup-${(0, import_node_crypto.randomUUID)()}.epub`);
363
362
  if (!parsed.markedup) {
364
363
  stack.defer(() => {
365
- (0, import_node_fs.rmSync)(markedup2, { recursive: true, force: true });
364
+ (0, import_node_fs.rmSync)(markedup, { recursive: true, force: true });
366
365
  });
367
366
  }
368
- const markupTiming = await (0, import_markup.markup)(input, markedup2, {
367
+ const markupTiming = await (0, import_markup.markup)(input, markedup, {
369
368
  granularity: parsed.granularity,
370
369
  primaryLocale,
371
370
  logger,
@@ -376,12 +375,12 @@ async function main() {
376
375
  }
377
376
  });
378
377
  resetProgressBar();
379
- logger.info(`Markup complete, marked up EPUB saved to ${markedup2}.`);
378
+ logger.info(`Markup complete, marked up EPUB saved to ${markedup}.`);
380
379
  if (parsed.time) {
381
380
  markupTiming.print();
382
381
  }
383
382
  } else {
384
- logger.info("Skipping markup, text-range-type set to text-fragment");
383
+ logger.info("Skipping markup, text-ref set to text-fragment");
385
384
  }
386
385
  logger.info("Aligning EPUB with audiobook...");
387
386
  startProgressBar();
package/dist/cli/bin.js CHANGED
@@ -312,13 +312,12 @@ async function main() {
312
312
  if (parsed.textRef === "id-fragment") {
313
313
  logger.info("Marking up EPUB...");
314
314
  startProgressBar();
315
- const markedup2 = parsed.markedup ?? join(os.tmpdir(), `stalign-markedup-${randomUUID()}.epub`);
316
315
  if (!parsed.markedup) {
317
316
  stack.defer(() => {
318
- rmSync(markedup2, { recursive: true, force: true });
317
+ rmSync(markedup, { recursive: true, force: true });
319
318
  });
320
319
  }
321
- const markupTiming = await markup(input, markedup2, {
320
+ const markupTiming = await markup(input, markedup, {
322
321
  granularity: parsed.granularity,
323
322
  primaryLocale,
324
323
  logger,
@@ -329,12 +328,12 @@ async function main() {
329
328
  }
330
329
  });
331
330
  resetProgressBar();
332
- logger.info(`Markup complete, marked up EPUB saved to ${markedup2}.`);
331
+ logger.info(`Markup complete, marked up EPUB saved to ${markedup}.`);
333
332
  if (parsed.time) {
334
333
  markupTiming.print();
335
334
  }
336
335
  } else {
337
- logger.info("Skipping markup, text-range-type set to text-fragment");
336
+ logger.info("Skipping markup, text-ref set to text-fragment");
338
337
  }
339
338
  logger.info("Aligning EPUB with audiobook...");
340
339
  startProgressBar();
@@ -28,8 +28,11 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
28
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
29
  var ffmpeg_exports = {};
30
30
  __export(ffmpeg_exports, {
31
+ MP3_CBR_BITRATES: () => MP3_CBR_BITRATES,
31
32
  getTrackDuration: () => getTrackDuration,
32
33
  getTrackInfo: () => getTrackInfo,
34
+ isVbrMp3: () => isVbrMp3,
35
+ selectCbrBitrate: () => selectCbrBitrate,
33
36
  splitFile: () => splitFile,
34
37
  transcodeFile: () => transcodeFile
35
38
  });
@@ -43,14 +46,11 @@ var import_mime = require("../process/mime.cjs");
43
46
  var import_shell = require("./shell.cjs");
44
47
  const execPromise = (0, import_node_util.promisify)(import_node_child_process.exec);
45
48
  async function execCmd(command, logger, signal) {
46
- let stdout = "";
47
- let stderr = "";
48
49
  try {
49
- ;
50
- ({ stdout, stderr } = await execPromise(command, {
50
+ const { stdout } = await execPromise(command, {
51
51
  maxBuffer: 50 * 1024 * 1024,
52
52
  signal: signal ?? void 0
53
- }));
53
+ });
54
54
  return stdout;
55
55
  } catch (error) {
56
56
  if (error instanceof RangeError && error.message.includes("stdout maxBuffer length exceeded")) {
@@ -58,14 +58,16 @@ async function execCmd(command, logger, signal) {
58
58
  "stdout maxBuffer length exceeded. This likely means that youre trying to process a very large file, and the ffmpeg process is running out of memory. Maybe check the image size of your cover art."
59
59
  );
60
60
  }
61
+ const execErr = error;
61
62
  logger?.error(error);
62
- logger?.info(stdout);
63
- throw new Error(stderr);
63
+ if (execErr.stdout) logger?.info(execErr.stdout);
64
+ const errorDetail = execErr.stderr || execErr.stdout || `Command failed: ${command}`;
65
+ throw new Error(errorDetail);
64
66
  }
65
67
  }
66
68
  const getTrackInfo = (0, import_memoize.default)(async function getTrackInfo2(path, logger) {
67
69
  const stdout = await execCmd(
68
- `ffprobe -i ${(0, import_shell.quotePath)(path)} -show_format -of json`,
70
+ `ffprobe -v error -i ${(0, import_shell.quotePath)(path)} -show_format -of json`,
69
71
  logger
70
72
  );
71
73
  const info = JSON.parse(stdout);
@@ -75,6 +77,52 @@ async function getTrackDuration(path, logger) {
75
77
  const info = await getTrackInfo(path, logger);
76
78
  return info["duration"];
77
79
  }
80
+ const MP3_CBR_BITRATES = [
81
+ 64e3,
82
+ 8e4,
83
+ 96e3,
84
+ 112e3,
85
+ 128e3,
86
+ 16e4,
87
+ 192e3,
88
+ 224e3,
89
+ 256e3,
90
+ 32e4
91
+ ];
92
+ const VBR_PROBE_PACKET_COUNT = 50;
93
+ const MP3_CBR_MAX_DISTINCT_SIZES = 2;
94
+ const VBR_PROBE_MIN_SEEKABLE_SECONDS = 180;
95
+ async function probeAudioDuration(path) {
96
+ const stdout = await execCmd(
97
+ `ffprobe -i ${(0, import_shell.quotePath)(path)} -v error -show_entries format=duration -output_format json`
98
+ );
99
+ const { format } = JSON.parse(stdout);
100
+ const duration = Number(format?.duration);
101
+ return Number.isFinite(duration) && duration > 0 ? duration : null;
102
+ }
103
+ async function probePacketSizes(path, startSeconds) {
104
+ const interval = startSeconds > 0 ? `${startSeconds}%+#${VBR_PROBE_PACKET_COUNT}` : `%+#${VBR_PROBE_PACKET_COUNT}`;
105
+ const stdout = await execCmd(
106
+ `ffprobe -i ${(0, import_shell.quotePath)(path)} -v error -select_streams a:0 -read_intervals "${interval}" -show_entries packet=size -output_format json`
107
+ );
108
+ const { packets } = JSON.parse(stdout);
109
+ return (packets ?? []).map((packet) => Number(packet.size)).filter((size) => Number.isFinite(size) && size > 0);
110
+ }
111
+ async function isVbrMp3(path) {
112
+ if ((0, import_node_path.extname)(path).toLowerCase() !== ".mp3") return false;
113
+ const duration = await probeAudioDuration(path);
114
+ const startSeconds = duration && duration > VBR_PROBE_MIN_SEEKABLE_SECONDS ? Math.floor(duration / 3) : 0;
115
+ let sizes = await probePacketSizes(path, startSeconds);
116
+ if (sizes.length === 0 && startSeconds > 0) {
117
+ sizes = await probePacketSizes(path, 0);
118
+ }
119
+ if (sizes.length === 0) return false;
120
+ const distinctSizes = new Set(sizes).size;
121
+ return distinctSizes > MP3_CBR_MAX_DISTINCT_SIZES;
122
+ }
123
+ function selectCbrBitrate(averageBitrate) {
124
+ return MP3_CBR_BITRATES.find((tier) => tier >= averageBitrate) ?? MP3_CBR_BITRATES.at(-1) ?? MP3_CBR_BITRATES[0];
125
+ }
78
126
  function parseTrackInfo(format) {
79
127
  return {
80
128
  filename: format.filename,
@@ -137,15 +185,16 @@ async function constructExtractCoverArtCommand(source, destExtension) {
137
185
  ];
138
186
  return `${command} ${args.join(" ")} | `;
139
187
  }
140
- function commonFfmpegArguments(sourceExtension, destExtension, codec, bitrate) {
188
+ function commonFfmpegArguments(options) {
189
+ const { sourceExtension, destExtension, codec, bitrate } = options;
141
190
  const args = ["-vn"];
142
191
  if (codec) {
143
- args.push(
144
- "-c:a",
145
- codec,
146
- ...codec === "libopus" ? ["-b:a", bitrate && /^\d+[kK]$/i.test(bitrate) ? bitrate : "32K"] : [],
147
- ...codec === "libmp3lame" && bitrate ? ["-q:a", bitrate] : []
148
- );
192
+ args.push("-c:a", codec);
193
+ if (codec === "libopus") {
194
+ args.push("-b:a", bitrate && /^\d+[kK]$/i.test(bitrate) ? bitrate : "32K");
195
+ } else if (codec === "libmp3lame" && bitrate) {
196
+ args.push("-b:a", bitrate);
197
+ }
149
198
  } else if ((0, import_mime.areSameType)(sourceExtension, destExtension) || destExtension == ".mp4") {
150
199
  args.push("-c:a", "copy");
151
200
  }
@@ -169,12 +218,12 @@ async function splitFile(input, output, start, end, encoding, signal, logger) {
169
218
  end,
170
219
  "-i",
171
220
  (0, import_shell.quotePath)(input),
172
- ...commonFfmpegArguments(
173
- (0, import_node_path.extname)(input),
174
- (0, import_node_path.extname)(output),
175
- encoding?.codec ?? null,
176
- encoding?.bitrate ?? null
177
- ),
221
+ ...commonFfmpegArguments({
222
+ sourceExtension: (0, import_node_path.extname)(input),
223
+ destExtension: (0, import_node_path.extname)(output),
224
+ codec: encoding?.codec ?? null,
225
+ bitrate: encoding?.bitrate ?? null
226
+ }),
178
227
  (0, import_shell.quotePath)(output)
179
228
  ];
180
229
  const coverArtCommand = await constructExtractCoverArtCommand(
@@ -204,12 +253,12 @@ async function transcodeFile(input, output, encoding, signal, logger) {
204
253
  "-nostdin",
205
254
  "-i",
206
255
  (0, import_shell.quotePath)(input),
207
- ...commonFfmpegArguments(
208
- (0, import_node_path.extname)(input),
209
- (0, import_node_path.extname)(output),
210
- encoding?.codec ?? null,
211
- encoding?.bitrate ?? null
212
- ),
256
+ ...commonFfmpegArguments({
257
+ sourceExtension: (0, import_node_path.extname)(input),
258
+ destExtension: (0, import_node_path.extname)(output),
259
+ codec: encoding?.codec ?? null,
260
+ bitrate: encoding?.bitrate ?? null
261
+ }),
213
262
  (0, import_shell.quotePath)(output)
214
263
  ];
215
264
  const coverArtCommand = await constructExtractCoverArtCommand(
@@ -225,8 +274,11 @@ async function transcodeFile(input, output, encoding, signal, logger) {
225
274
  }
226
275
  // Annotate the CommonJS export names for ESM import in node:
227
276
  0 && (module.exports = {
277
+ MP3_CBR_BITRATES,
228
278
  getTrackDuration,
229
279
  getTrackInfo,
280
+ isVbrMp3,
281
+ selectCbrBitrate,
230
282
  splitFile,
231
283
  transcodeFile
232
284
  });
@@ -3,6 +3,22 @@ import { AudioEncoding } from '../process/AudioEncoding.cjs';
3
3
 
4
4
  declare const getTrackInfo: (path: string, logger?: Logger) => Promise<TrackInfo>;
5
5
  declare function getTrackDuration(path: string, logger?: Logger): Promise<number>;
6
+ /**
7
+ * CBR bitrates (bps) offered for MP3 output, roughly matching LAME -V9..-V0
8
+ */
9
+ declare const MP3_CBR_BITRATES: readonly [64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000];
10
+ /**
11
+ * Detect whether an MP3 file uses a variable bitrate
12
+ * Does this by sampling the first few packets and checking if the sizes are different
13
+ * CBR MP3 files will have the same packet size for the entire file
14
+ *
15
+ * Can't really trust the reported bitrate to tell CBR from VBR
16
+ * LAME writes a Xing header carrying the *average* bitrate,
17
+ * which ffprobe surfaces as a normal per-stream `bit_rate`,
18
+ * so a VBR file looks identical to a CBR one by that measure.
19
+ */
20
+ declare function isVbrMp3(path: string): Promise<boolean>;
21
+ declare function selectCbrBitrate(averageBitrate: number): number;
6
22
  type TrackInfo = {
7
23
  filename: string;
8
24
  nbStreams: number;
@@ -30,4 +46,4 @@ type TrackInfo = {
30
46
  declare function splitFile(input: string, output: string, start: number, end: number, encoding?: AudioEncoding | null, signal?: AbortSignal | null, logger?: Logger | null): Promise<boolean>;
31
47
  declare function transcodeFile(input: string, output: string, encoding?: AudioEncoding | null, signal?: AbortSignal | null, logger?: Logger | null): Promise<true | undefined>;
32
48
 
33
- export { getTrackDuration, getTrackInfo, splitFile, transcodeFile };
49
+ export { MP3_CBR_BITRATES, getTrackDuration, getTrackInfo, isVbrMp3, selectCbrBitrate, splitFile, transcodeFile };
@@ -3,6 +3,22 @@ import { AudioEncoding } from '../process/AudioEncoding.js';
3
3
 
4
4
  declare const getTrackInfo: (path: string, logger?: Logger) => Promise<TrackInfo>;
5
5
  declare function getTrackDuration(path: string, logger?: Logger): Promise<number>;
6
+ /**
7
+ * CBR bitrates (bps) offered for MP3 output, roughly matching LAME -V9..-V0
8
+ */
9
+ declare const MP3_CBR_BITRATES: readonly [64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000];
10
+ /**
11
+ * Detect whether an MP3 file uses a variable bitrate
12
+ * Does this by sampling the first few packets and checking if the sizes are different
13
+ * CBR MP3 files will have the same packet size for the entire file
14
+ *
15
+ * Can't really trust the reported bitrate to tell CBR from VBR
16
+ * LAME writes a Xing header carrying the *average* bitrate,
17
+ * which ffprobe surfaces as a normal per-stream `bit_rate`,
18
+ * so a VBR file looks identical to a CBR one by that measure.
19
+ */
20
+ declare function isVbrMp3(path: string): Promise<boolean>;
21
+ declare function selectCbrBitrate(averageBitrate: number): number;
6
22
  type TrackInfo = {
7
23
  filename: string;
8
24
  nbStreams: number;
@@ -30,4 +46,4 @@ type TrackInfo = {
30
46
  declare function splitFile(input: string, output: string, start: number, end: number, encoding?: AudioEncoding | null, signal?: AbortSignal | null, logger?: Logger | null): Promise<boolean>;
31
47
  declare function transcodeFile(input: string, output: string, encoding?: AudioEncoding | null, signal?: AbortSignal | null, logger?: Logger | null): Promise<true | undefined>;
32
48
 
33
- export { getTrackDuration, getTrackInfo, splitFile, transcodeFile };
49
+ export { MP3_CBR_BITRATES, getTrackDuration, getTrackInfo, isVbrMp3, selectCbrBitrate, splitFile, transcodeFile };
@@ -8,14 +8,11 @@ import { areSameType } from "../process/mime.js";
8
8
  import { quotePath } from "./shell.js";
9
9
  const execPromise = promisify(exec);
10
10
  async function execCmd(command, logger, signal) {
11
- let stdout = "";
12
- let stderr = "";
13
11
  try {
14
- ;
15
- ({ stdout, stderr } = await execPromise(command, {
12
+ const { stdout } = await execPromise(command, {
16
13
  maxBuffer: 50 * 1024 * 1024,
17
14
  signal: signal ?? void 0
18
- }));
15
+ });
19
16
  return stdout;
20
17
  } catch (error) {
21
18
  if (error instanceof RangeError && error.message.includes("stdout maxBuffer length exceeded")) {
@@ -23,14 +20,16 @@ async function execCmd(command, logger, signal) {
23
20
  "stdout maxBuffer length exceeded. This likely means that youre trying to process a very large file, and the ffmpeg process is running out of memory. Maybe check the image size of your cover art."
24
21
  );
25
22
  }
23
+ const execErr = error;
26
24
  logger?.error(error);
27
- logger?.info(stdout);
28
- throw new Error(stderr);
25
+ if (execErr.stdout) logger?.info(execErr.stdout);
26
+ const errorDetail = execErr.stderr || execErr.stdout || `Command failed: ${command}`;
27
+ throw new Error(errorDetail);
29
28
  }
30
29
  }
31
30
  const getTrackInfo = memoize(async function getTrackInfo2(path, logger) {
32
31
  const stdout = await execCmd(
33
- `ffprobe -i ${quotePath(path)} -show_format -of json`,
32
+ `ffprobe -v error -i ${quotePath(path)} -show_format -of json`,
34
33
  logger
35
34
  );
36
35
  const info = JSON.parse(stdout);
@@ -40,6 +39,52 @@ async function getTrackDuration(path, logger) {
40
39
  const info = await getTrackInfo(path, logger);
41
40
  return info["duration"];
42
41
  }
42
+ const MP3_CBR_BITRATES = [
43
+ 64e3,
44
+ 8e4,
45
+ 96e3,
46
+ 112e3,
47
+ 128e3,
48
+ 16e4,
49
+ 192e3,
50
+ 224e3,
51
+ 256e3,
52
+ 32e4
53
+ ];
54
+ const VBR_PROBE_PACKET_COUNT = 50;
55
+ const MP3_CBR_MAX_DISTINCT_SIZES = 2;
56
+ const VBR_PROBE_MIN_SEEKABLE_SECONDS = 180;
57
+ async function probeAudioDuration(path) {
58
+ const stdout = await execCmd(
59
+ `ffprobe -i ${quotePath(path)} -v error -show_entries format=duration -output_format json`
60
+ );
61
+ const { format } = JSON.parse(stdout);
62
+ const duration = Number(format?.duration);
63
+ return Number.isFinite(duration) && duration > 0 ? duration : null;
64
+ }
65
+ async function probePacketSizes(path, startSeconds) {
66
+ const interval = startSeconds > 0 ? `${startSeconds}%+#${VBR_PROBE_PACKET_COUNT}` : `%+#${VBR_PROBE_PACKET_COUNT}`;
67
+ const stdout = await execCmd(
68
+ `ffprobe -i ${quotePath(path)} -v error -select_streams a:0 -read_intervals "${interval}" -show_entries packet=size -output_format json`
69
+ );
70
+ const { packets } = JSON.parse(stdout);
71
+ return (packets ?? []).map((packet) => Number(packet.size)).filter((size) => Number.isFinite(size) && size > 0);
72
+ }
73
+ async function isVbrMp3(path) {
74
+ if (extname(path).toLowerCase() !== ".mp3") return false;
75
+ const duration = await probeAudioDuration(path);
76
+ const startSeconds = duration && duration > VBR_PROBE_MIN_SEEKABLE_SECONDS ? Math.floor(duration / 3) : 0;
77
+ let sizes = await probePacketSizes(path, startSeconds);
78
+ if (sizes.length === 0 && startSeconds > 0) {
79
+ sizes = await probePacketSizes(path, 0);
80
+ }
81
+ if (sizes.length === 0) return false;
82
+ const distinctSizes = new Set(sizes).size;
83
+ return distinctSizes > MP3_CBR_MAX_DISTINCT_SIZES;
84
+ }
85
+ function selectCbrBitrate(averageBitrate) {
86
+ return MP3_CBR_BITRATES.find((tier) => tier >= averageBitrate) ?? MP3_CBR_BITRATES.at(-1) ?? MP3_CBR_BITRATES[0];
87
+ }
43
88
  function parseTrackInfo(format) {
44
89
  return {
45
90
  filename: format.filename,
@@ -102,15 +147,16 @@ async function constructExtractCoverArtCommand(source, destExtension) {
102
147
  ];
103
148
  return `${command} ${args.join(" ")} | `;
104
149
  }
105
- function commonFfmpegArguments(sourceExtension, destExtension, codec, bitrate) {
150
+ function commonFfmpegArguments(options) {
151
+ const { sourceExtension, destExtension, codec, bitrate } = options;
106
152
  const args = ["-vn"];
107
153
  if (codec) {
108
- args.push(
109
- "-c:a",
110
- codec,
111
- ...codec === "libopus" ? ["-b:a", bitrate && /^\d+[kK]$/i.test(bitrate) ? bitrate : "32K"] : [],
112
- ...codec === "libmp3lame" && bitrate ? ["-q:a", bitrate] : []
113
- );
154
+ args.push("-c:a", codec);
155
+ if (codec === "libopus") {
156
+ args.push("-b:a", bitrate && /^\d+[kK]$/i.test(bitrate) ? bitrate : "32K");
157
+ } else if (codec === "libmp3lame" && bitrate) {
158
+ args.push("-b:a", bitrate);
159
+ }
114
160
  } else if (areSameType(sourceExtension, destExtension) || destExtension == ".mp4") {
115
161
  args.push("-c:a", "copy");
116
162
  }
@@ -134,12 +180,12 @@ async function splitFile(input, output, start, end, encoding, signal, logger) {
134
180
  end,
135
181
  "-i",
136
182
  quotePath(input),
137
- ...commonFfmpegArguments(
138
- extname(input),
139
- extname(output),
140
- encoding?.codec ?? null,
141
- encoding?.bitrate ?? null
142
- ),
183
+ ...commonFfmpegArguments({
184
+ sourceExtension: extname(input),
185
+ destExtension: extname(output),
186
+ codec: encoding?.codec ?? null,
187
+ bitrate: encoding?.bitrate ?? null
188
+ }),
143
189
  quotePath(output)
144
190
  ];
145
191
  const coverArtCommand = await constructExtractCoverArtCommand(
@@ -169,12 +215,12 @@ async function transcodeFile(input, output, encoding, signal, logger) {
169
215
  "-nostdin",
170
216
  "-i",
171
217
  quotePath(input),
172
- ...commonFfmpegArguments(
173
- extname(input),
174
- extname(output),
175
- encoding?.codec ?? null,
176
- encoding?.bitrate ?? null
177
- ),
218
+ ...commonFfmpegArguments({
219
+ sourceExtension: extname(input),
220
+ destExtension: extname(output),
221
+ codec: encoding?.codec ?? null,
222
+ bitrate: encoding?.bitrate ?? null
223
+ }),
178
224
  quotePath(output)
179
225
  ];
180
226
  const coverArtCommand = await constructExtractCoverArtCommand(
@@ -189,8 +235,11 @@ async function transcodeFile(input, output, encoding, signal, logger) {
189
235
  return true;
190
236
  }
191
237
  export {
238
+ MP3_CBR_BITRATES,
192
239
  getTrackDuration,
193
240
  getTrackInfo,
241
+ isVbrMp3,
242
+ selectCbrBitrate,
194
243
  splitFile,
195
244
  transcodeFile
196
245
  };
@@ -126,6 +126,27 @@ async function processAudiobook(input, output, options) {
126
126
  );
127
127
  return timing;
128
128
  }
129
+ async function resolveVbrEncoding(filepath, userEncoding, logger) {
130
+ if (userEncoding?.codec && userEncoding.codec !== "libmp3lame") {
131
+ return userEncoding;
132
+ }
133
+ const sourceIsMp3 = (0, import_node_path.extname)(filepath).toLowerCase() === ".mp3";
134
+ if (!userEncoding?.codec && !sourceIsMp3) {
135
+ return userEncoding;
136
+ }
137
+ if (!userEncoding?.codec && !await (0, import_ffmpeg.isVbrMp3)(filepath)) {
138
+ return userEncoding;
139
+ }
140
+ const trackInfo = await (0, import_ffmpeg.getTrackInfo)(filepath, logger ?? void 0);
141
+ const targetBitrate = (0, import_ffmpeg.selectCbrBitrate)(trackInfo.bitRate);
142
+ logger?.info(
143
+ `Forcing CBR MP3 for ${filepath} (avg ${trackInfo.bitRate}bps) at ${targetBitrate / 1e3}k`
144
+ );
145
+ return {
146
+ codec: "libmp3lame",
147
+ bitrate: `${targetBitrate / 1e3}k`
148
+ };
149
+ }
129
150
  async function processFile(input, output, prefix, options) {
130
151
  var _stack = [];
131
152
  try {
@@ -144,13 +165,26 @@ async function processFile(input, output, prefix, options) {
144
165
  options.signal,
145
166
  options.logger
146
167
  );
168
+ const vbrEncodings = /* @__PURE__ */ new Map();
169
+ const uniqueFilepaths = [...new Set(ranges.map((r) => r.filepath))];
170
+ await Promise.all(
171
+ uniqueFilepaths.map(async (filepath) => {
172
+ const result = await resolveVbrEncoding(
173
+ filepath,
174
+ options.encoding,
175
+ options.logger
176
+ );
177
+ vbrEncodings.set(filepath, result);
178
+ })
179
+ );
147
180
  await Promise.all(
148
181
  ranges.map(async (range, index) => {
149
182
  var _stack2 = [];
150
183
  try {
184
+ const effectiveEncoding = vbrEncodings.has(range.filepath) ? vbrEncodings.get(range.filepath) : options.encoding;
151
185
  const outputExtension = determineExtension(
152
186
  range.filepath,
153
- options.encoding?.codec
187
+ effectiveEncoding?.codec
154
188
  );
155
189
  const outputFilename = `${prefix}${(index + 1).toString().padStart(5, "0")}${outputExtension}`;
156
190
  const outputFilepath = (0, import_node_path.join)(output, outputFilename);
@@ -168,7 +202,7 @@ async function processFile(input, output, prefix, options) {
168
202
  outputFilepath,
169
203
  range.start,
170
204
  range.end,
171
- options.encoding,
205
+ effectiveEncoding,
172
206
  options.signal,
173
207
  options.logger
174
208
  );
@@ -19,7 +19,12 @@ import {
19
19
  createAggregator,
20
20
  createTiming
21
21
  } from "@storyteller-platform/ghost-story";
22
- import { splitFile } from "../common/ffmpeg.js";
22
+ import {
23
+ getTrackInfo,
24
+ isVbrMp3,
25
+ selectCbrBitrate,
26
+ splitFile
27
+ } from "../common/ffmpeg.js";
23
28
  import { getSafeChapterRanges } from "./ranges.js";
24
29
  async function processAudiobook(input, output, options) {
25
30
  const timing = createAggregator();
@@ -73,6 +78,27 @@ async function processAudiobook(input, output, options) {
73
78
  );
74
79
  return timing;
75
80
  }
81
+ async function resolveVbrEncoding(filepath, userEncoding, logger) {
82
+ if (userEncoding?.codec && userEncoding.codec !== "libmp3lame") {
83
+ return userEncoding;
84
+ }
85
+ const sourceIsMp3 = extname(filepath).toLowerCase() === ".mp3";
86
+ if (!userEncoding?.codec && !sourceIsMp3) {
87
+ return userEncoding;
88
+ }
89
+ if (!userEncoding?.codec && !await isVbrMp3(filepath)) {
90
+ return userEncoding;
91
+ }
92
+ const trackInfo = await getTrackInfo(filepath, logger ?? void 0);
93
+ const targetBitrate = selectCbrBitrate(trackInfo.bitRate);
94
+ logger?.info(
95
+ `Forcing CBR MP3 for ${filepath} (avg ${trackInfo.bitRate}bps) at ${targetBitrate / 1e3}k`
96
+ );
97
+ return {
98
+ codec: "libmp3lame",
99
+ bitrate: `${targetBitrate / 1e3}k`
100
+ };
101
+ }
76
102
  async function processFile(input, output, prefix, options) {
77
103
  var _stack = [];
78
104
  try {
@@ -91,13 +117,26 @@ async function processFile(input, output, prefix, options) {
91
117
  options.signal,
92
118
  options.logger
93
119
  );
120
+ const vbrEncodings = /* @__PURE__ */ new Map();
121
+ const uniqueFilepaths = [...new Set(ranges.map((r) => r.filepath))];
122
+ await Promise.all(
123
+ uniqueFilepaths.map(async (filepath) => {
124
+ const result = await resolveVbrEncoding(
125
+ filepath,
126
+ options.encoding,
127
+ options.logger
128
+ );
129
+ vbrEncodings.set(filepath, result);
130
+ })
131
+ );
94
132
  await Promise.all(
95
133
  ranges.map(async (range, index) => {
96
134
  var _stack2 = [];
97
135
  try {
136
+ const effectiveEncoding = vbrEncodings.has(range.filepath) ? vbrEncodings.get(range.filepath) : options.encoding;
98
137
  const outputExtension = determineExtension(
99
138
  range.filepath,
100
- options.encoding?.codec
139
+ effectiveEncoding?.codec
101
140
  );
102
141
  const outputFilename = `${prefix}${(index + 1).toString().padStart(5, "0")}${outputExtension}`;
103
142
  const outputFilepath = join(output, outputFilename);
@@ -115,7 +154,7 @@ async function processFile(input, output, prefix, options) {
115
154
  outputFilepath,
116
155
  range.start,
117
156
  range.end,
118
- options.encoding,
157
+ effectiveEncoding,
119
158
  options.signal,
120
159
  options.logger
121
160
  );
@@ -88,10 +88,19 @@ async function generateReadiumManifest(epub, options = {}) {
88
88
  const dir = await epub.getBaseDirection();
89
89
  const epubMetadata = await epub.getMetadata();
90
90
  const vocab = await epub.getPackageVocabularyPrefixes();
91
- const duration = epubMetadata.find(
92
- ({ properties }) => properties["property"] === "media:duration"
93
- )?.value;
94
- const durationMs = duration !== void 0 ? (0, import_smil_clockvalue.default)(duration) : void 0;
91
+ let duration = void 0;
92
+ const refinesDurationMap = /* @__PURE__ */ new Map();
93
+ for (const dur of epubMetadata) {
94
+ if (dur.properties["property"] !== "media:duration") continue;
95
+ if (!dur.properties["refines"]) {
96
+ duration = dur.value ? (0, import_smil_clockvalue.default)(dur.value) / 1e3 : void 0;
97
+ continue;
98
+ }
99
+ const value = dur.value ? (0, import_smil_clockvalue.default)(dur.value) / 1e3 : void 0;
100
+ if (value) {
101
+ refinesDurationMap.set(dur.properties["refines"], value);
102
+ }
103
+ }
95
104
  const otherMetadata = epubMetadata.filter(
96
105
  (meta) => (meta.properties["property"]?.split(":")[0] ?? "") in vocab
97
106
  ).map((meta) => {
@@ -130,8 +139,8 @@ async function generateReadiumManifest(epub, options = {}) {
130
139
  ...dir !== "auto" && {
131
140
  readingProgression: dir === "ltr" ? import_shared.ReadingProgression.ltr : import_shared.ReadingProgression.rtl
132
141
  },
133
- // TODO: is this meant to be in milliseconds (as here) or seconds?
134
- ...durationMs !== void 0 && { duration: durationMs },
142
+ // it's seconds
143
+ ...duration !== void 0 && { duration },
135
144
  ...numberOfPages !== void 0 && { numberOfPages },
136
145
  otherMetadata: Object.fromEntries(otherMetadata)
137
146
  });
@@ -184,16 +193,13 @@ async function generateReadiumManifest(epub, options = {}) {
184
193
  if (!item.mediaOverlay) return link;
185
194
  const mediaOverlayItem = epubManifest[item.id];
186
195
  if (!mediaOverlayItem) return link;
187
- const refinedBy = epubMetadata.find(
188
- ({ properties }) => properties["property"] === "media:duration" && properties["refines"] === `#${mediaOverlayItem.id}`
189
- );
190
- if (!refinedBy?.value) return link;
191
- const itemDuration = (0, import_smil_clockvalue.default)(refinedBy.value);
196
+ const duration2 = refinesDurationMap.get(`#${item.mediaOverlay}`) || refinesDurationMap.get(`#${mediaOverlayItem.id}`);
197
+ if (!duration2) return link;
192
198
  return new import_shared.Link({
193
199
  href: link.href,
194
200
  type: link.mediaType.string,
195
201
  ...link.properties && { properties: link.properties },
196
- duration: itemDuration
202
+ duration: duration2
197
203
  });
198
204
  })
199
205
  );
@@ -71,10 +71,19 @@ async function generateReadiumManifest(epub, options = {}) {
71
71
  const dir = await epub.getBaseDirection();
72
72
  const epubMetadata = await epub.getMetadata();
73
73
  const vocab = await epub.getPackageVocabularyPrefixes();
74
- const duration = epubMetadata.find(
75
- ({ properties }) => properties["property"] === "media:duration"
76
- )?.value;
77
- const durationMs = duration !== void 0 ? clockvalue(duration) : void 0;
74
+ let duration = void 0;
75
+ const refinesDurationMap = /* @__PURE__ */ new Map();
76
+ for (const dur of epubMetadata) {
77
+ if (dur.properties["property"] !== "media:duration") continue;
78
+ if (!dur.properties["refines"]) {
79
+ duration = dur.value ? clockvalue(dur.value) / 1e3 : void 0;
80
+ continue;
81
+ }
82
+ const value = dur.value ? clockvalue(dur.value) / 1e3 : void 0;
83
+ if (value) {
84
+ refinesDurationMap.set(dur.properties["refines"], value);
85
+ }
86
+ }
78
87
  const otherMetadata = epubMetadata.filter(
79
88
  (meta) => (meta.properties["property"]?.split(":")[0] ?? "") in vocab
80
89
  ).map((meta) => {
@@ -113,8 +122,8 @@ async function generateReadiumManifest(epub, options = {}) {
113
122
  ...dir !== "auto" && {
114
123
  readingProgression: dir === "ltr" ? ReadingProgression.ltr : ReadingProgression.rtl
115
124
  },
116
- // TODO: is this meant to be in milliseconds (as here) or seconds?
117
- ...durationMs !== void 0 && { duration: durationMs },
125
+ // it's seconds
126
+ ...duration !== void 0 && { duration },
118
127
  ...numberOfPages !== void 0 && { numberOfPages },
119
128
  otherMetadata: Object.fromEntries(otherMetadata)
120
129
  });
@@ -167,16 +176,13 @@ async function generateReadiumManifest(epub, options = {}) {
167
176
  if (!item.mediaOverlay) return link;
168
177
  const mediaOverlayItem = epubManifest[item.id];
169
178
  if (!mediaOverlayItem) return link;
170
- const refinedBy = epubMetadata.find(
171
- ({ properties }) => properties["property"] === "media:duration" && properties["refines"] === `#${mediaOverlayItem.id}`
172
- );
173
- if (!refinedBy?.value) return link;
174
- const itemDuration = clockvalue(refinedBy.value);
179
+ const duration2 = refinesDurationMap.get(`#${item.mediaOverlay}`) || refinesDurationMap.get(`#${mediaOverlayItem.id}`);
180
+ if (!duration2) return link;
175
181
  return new Link({
176
182
  href: link.href,
177
183
  type: link.mediaType.string,
178
184
  ...link.properties && { properties: link.properties },
179
- duration: itemDuration
185
+ duration: duration2
180
186
  });
181
187
  })
182
188
  );
@@ -101,8 +101,9 @@ async function transcribe(input, output, locale, options) {
101
101
  const engine = options.engine ?? "whisper.cpp";
102
102
  const model = options.model ?? "tiny.en";
103
103
  if (engine === "whisper.cpp") {
104
+ const resolvedModel = getWhisperCppModelId(locale.language, model);
104
105
  await (0, import_ghost_story.ensureWhisperInstalled)({
105
- model,
106
+ model: resolvedModel,
106
107
  printOutput: ["debug", "info"].includes(
107
108
  options.logger?.level ?? "silent"
108
109
  ),
@@ -205,8 +206,12 @@ async function transcribeFile(input, locale, options) {
205
206
  const fallbackVariant = getCpuOverrideVariant(
206
207
  options.whisperCpuOverride ?? null
207
208
  );
209
+ const resolvedModel = getWhisperCppModelId(
210
+ sharedOptions.language,
211
+ options.model
212
+ );
208
213
  const whisperOptions = await (0, import_ghost_story.ensureWhisperInstalled)({
209
- model: options.model,
214
+ model: resolvedModel,
210
215
  variant: fallbackVariant,
211
216
  printOutput: ["debug", "info"].includes(
212
217
  options.logger?.level ?? "silent"
@@ -218,7 +223,7 @@ async function transcribeFile(input, locale, options) {
218
223
  engine: options.engine,
219
224
  options: {
220
225
  flashAttention: true,
221
- model: getWhisperCppModelId(sharedOptions.language, options.model),
226
+ model: resolvedModel,
222
227
  processors: options.processors,
223
228
  threads: options.threads,
224
229
  onProgress: (progress) => {
@@ -32,8 +32,9 @@ async function transcribe(input, output, locale, options) {
32
32
  const engine = options.engine ?? "whisper.cpp";
33
33
  const model = options.model ?? "tiny.en";
34
34
  if (engine === "whisper.cpp") {
35
+ const resolvedModel = getWhisperCppModelId(locale.language, model);
35
36
  await ensureWhisperInstalled({
36
- model,
37
+ model: resolvedModel,
37
38
  printOutput: ["debug", "info"].includes(
38
39
  options.logger?.level ?? "silent"
39
40
  ),
@@ -136,8 +137,12 @@ async function transcribeFile(input, locale, options) {
136
137
  const fallbackVariant = getCpuOverrideVariant(
137
138
  options.whisperCpuOverride ?? null
138
139
  );
140
+ const resolvedModel = getWhisperCppModelId(
141
+ sharedOptions.language,
142
+ options.model
143
+ );
139
144
  const whisperOptions = await ensureWhisperInstalled({
140
- model: options.model,
145
+ model: resolvedModel,
141
146
  variant: fallbackVariant,
142
147
  printOutput: ["debug", "info"].includes(
143
148
  options.logger?.level ?? "silent"
@@ -149,7 +154,7 @@ async function transcribeFile(input, locale, options) {
149
154
  engine: options.engine,
150
155
  options: {
151
156
  flashAttention: true,
152
- model: getWhisperCppModelId(sharedOptions.language, options.model),
157
+ model: resolvedModel,
153
158
  processors: options.processors,
154
159
  threads: options.threads,
155
160
  onProgress: (progress) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@storyteller-platform/align",
3
- "version": "0.1.41",
3
+ "version": "0.1.48",
4
4
  "description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
5
5
  "author": "Shane Friedman",
6
6
  "license": "MIT",
@@ -70,8 +70,8 @@
70
70
  "@optique/core": "^0.10.7",
71
71
  "@optique/run": "^0.10.7",
72
72
  "@readium/shared": "^2.2.0",
73
- "@storyteller-platform/audiobook": "^0.4.0",
74
- "@storyteller-platform/epub": "^0.6.0",
73
+ "@storyteller-platform/audiobook": "^0.4.1",
74
+ "@storyteller-platform/epub": "^0.6.2",
75
75
  "@storyteller-platform/ghost-story": "^0.1.11",
76
76
  "@storyteller-platform/transliteration": "^3.1.2",
77
77
  "chalk": "^5.4.1",