@storyteller-platform/align 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import { enumerate, min } from "itertools";
3
+ import { runes } from "runes2";
4
+ class TextFragmentTrie {
5
+ root = new Node(null, "");
6
+ spans;
7
+ constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
8
+ this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
9
+ for (const [i, span] of enumerate(this.spans)) {
10
+ const parents = [this.root];
11
+ for (const [j, char] of enumerate(runes(span))) {
12
+ for (const [k, parent] of enumerate(parents)) {
13
+ const newNode = new Node(parent, char, { span: i, pos: j });
14
+ let node = parent.children.find((child) => child.eq(newNode));
15
+ if (!node) {
16
+ node = newNode;
17
+ parent.children.push(node);
18
+ } else {
19
+ node.indices.push({ span: i, pos: j });
20
+ }
21
+ parents[k] = node;
22
+ }
23
+ parents.push(this.root);
24
+ }
25
+ }
26
+ }
27
+ findMinimalFragment(spanIndex) {
28
+ let node = this.root;
29
+ while (node.children.length) {
30
+ const candidates = node.children.filter(
31
+ (child2) => child2.indices.some(
32
+ ({ span: childSpanIndex }) => childSpanIndex === spanIndex
33
+ )
34
+ );
35
+ const child = min(
36
+ candidates,
37
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
38
+ (c) => c.indices.find((i) => i.span === spanIndex).pos
39
+ );
40
+ if (!child) {
41
+ return this.nodeToFragment(node, spanIndex, true);
42
+ }
43
+ if (child.indices.length === 1) {
44
+ return this.nodeToFragment(child, spanIndex);
45
+ }
46
+ node = child;
47
+ }
48
+ return this.nodeToFragment(node, spanIndex, true);
49
+ }
50
+ nodeToFragment(node, spanIndex, findPrefix) {
51
+ const span = this.spans[spanIndex];
52
+ let fragment = ":~:text=";
53
+ let prefix = "";
54
+ if (findPrefix) {
55
+ const prev = this.spans[spanIndex - 1];
56
+ if (prev) {
57
+ const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
58
+ let startNode2 = node;
59
+ let startPos = pos;
60
+ while (startNode2.parent && startNode2.parent !== this.root) {
61
+ startPos -= startNode2.value.length;
62
+ startNode2 = startNode2.parent;
63
+ }
64
+ const prev2 = this.spans[spanIndex2 - 1];
65
+ const span2 = this.spans[spanIndex2];
66
+ return (prev2 ?? "") + span2.slice(0, startPos);
67
+ });
68
+ const reversedPrefixes = prefixes.map((p) => runes(p).toReversed());
69
+ for (const [i2, char] of enumerate(runes(prev).toReversed())) {
70
+ prefix = char + prefix;
71
+ for (const [j, p] of enumerate([...reversedPrefixes.toReversed()])) {
72
+ if (p[i2] !== char) {
73
+ reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
74
+ }
75
+ }
76
+ if (reversedPrefixes.length === 0) {
77
+ break;
78
+ }
79
+ }
80
+ }
81
+ }
82
+ if (prefix) {
83
+ fragment += `${encodeTextFragmentPart(prefix)}-,`;
84
+ }
85
+ let startNode = node;
86
+ let start = "";
87
+ while (startNode) {
88
+ start = startNode.value + start;
89
+ startNode = startNode.parent;
90
+ }
91
+ fragment += encodeTextFragmentPart(start);
92
+ const remainingSentence = span.slice(start.length + node.value.length);
93
+ let end = "";
94
+ let i = remainingSentence.length - 1;
95
+ while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
96
+ end = remainingSentence.slice(i);
97
+ i--;
98
+ }
99
+ if (end) {
100
+ fragment += `,${encodeTextFragmentPart(end)}`;
101
+ }
102
+ return fragment;
103
+ }
104
+ }
105
+ function encodeTextFragmentPart(part) {
106
+ return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
107
+ }
108
+ class Node {
109
+ constructor(parent, value, firstIndex) {
110
+ this.parent = parent;
111
+ this.value = value;
112
+ if (firstIndex !== void 0) {
113
+ this.indices.push(firstIndex);
114
+ }
115
+ }
116
+ children = [];
117
+ indices = [];
118
+ eq(other) {
119
+ return this.value === other.value;
120
+ }
121
+ }
122
+ export {
123
+ TextFragmentTrie
124
+ };
package/dist/cli/bin.cjs CHANGED
@@ -85,7 +85,9 @@ var import_markup = require("../markup/markup.cjs");
85
85
  var import_parse3 = require("../markup/parse.cjs");
86
86
  var import_parse4 = require("../process/parse.cjs");
87
87
  var import_processAudiobook = require("../process/processAudiobook.cjs");
88
- var import_parse5 = require("../transcribe/parse.cjs");
88
+ var import_parse5 = require("../snapshot/parse.cjs");
89
+ var import_snapshot = require("../snapshot/snapshot.cjs");
90
+ var import_parse6 = require("../transcribe/parse.cjs");
89
91
  var import_transcribe = require("../transcribe/transcribe.cjs");
90
92
  const pipelineCommand = (0, import_core.merge)(
91
93
  (0, import_core.object)({
@@ -110,7 +112,7 @@ const pipelineCommand = (0, import_core.merge)(
110
112
  output: (0, import_core.option)("--output", (0, import_valueparser.path)({ type: "file", extensions: [".epub"] }))
111
113
  }),
112
114
  import_parse4.processParser,
113
- (0, import_core.group)("Transcription", import_parse5.transcribeParser),
115
+ (0, import_core.group)("Transcription", import_parse6.transcribeParser),
114
116
  import_parse2.granularityParser,
115
117
  import_parse2.languageParser,
116
118
  import_parse.alignParser,
@@ -118,10 +120,11 @@ const pipelineCommand = (0, import_core.merge)(
118
120
  );
119
121
  const parser = (0, import_core.or)(
120
122
  import_parse4.processCommand,
121
- import_parse5.transcribeCommand,
123
+ import_parse6.transcribeCommand,
122
124
  import_parse3.markupCommand,
123
125
  import_parse.alignCommand,
124
- pipelineCommand
126
+ pipelineCommand,
127
+ import_parse5.snapshotCommand
125
128
  );
126
129
  async function main() {
127
130
  var _stack2 = [];
@@ -223,6 +226,7 @@ async function main() {
223
226
  parsed.audiobook,
224
227
  {
225
228
  granularity: parsed.granularity,
229
+ textRef: parsed.textRef,
226
230
  primaryLocale: parsed.language,
227
231
  logger,
228
232
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -304,28 +308,33 @@ async function main() {
304
308
  if (parsed.time) {
305
309
  transcribeTiming.print();
306
310
  }
307
- logger.info("Marking up EPUB...");
308
- startProgressBar();
309
- const markedup = parsed.markedup ?? (0, import_node_path.join)(os.tmpdir(), `stalign-markedup-${(0, import_node_crypto.randomUUID)()}.epub`);
310
- if (!parsed.markedup) {
311
- stack.defer(() => {
312
- (0, import_node_fs.rmSync)(markedup, { recursive: true, force: true });
313
- });
314
- }
315
- const markupTiming = await (0, import_markup.markup)(parsed.epub, markedup, {
316
- granularity: parsed.granularity,
317
- primaryLocale,
318
- logger,
319
- ...!parsed.noProgress && parsed.logLevel === "silent" && {
320
- onProgress: (progress) => {
321
- progressBar.update(Math.floor(progress * 100));
311
+ const markedup = parsed.textRef === "id-fragment" ? parsed.markedup ?? (0, import_node_path.join)(os.tmpdir(), `stalign-markedup-${(0, import_node_crypto.randomUUID)()}.epub`) : parsed.epub;
312
+ if (parsed.textRef === "id-fragment") {
313
+ logger.info("Marking up EPUB...");
314
+ startProgressBar();
315
+ const markedup2 = parsed.markedup ?? (0, import_node_path.join)(os.tmpdir(), `stalign-markedup-${(0, import_node_crypto.randomUUID)()}.epub`);
316
+ if (!parsed.markedup) {
317
+ stack.defer(() => {
318
+ (0, import_node_fs.rmSync)(markedup2, { recursive: true, force: true });
319
+ });
320
+ }
321
+ const markupTiming = await (0, import_markup.markup)(parsed.epub, markedup2, {
322
+ granularity: parsed.granularity,
323
+ primaryLocale,
324
+ logger,
325
+ ...!parsed.noProgress && parsed.logLevel === "silent" && {
326
+ onProgress: (progress) => {
327
+ progressBar.update(Math.floor(progress * 100));
328
+ }
322
329
  }
330
+ });
331
+ resetProgressBar();
332
+ logger.info(`Markup complete, marked up EPUB saved to ${markedup2}.`);
333
+ if (parsed.time) {
334
+ markupTiming.print();
323
335
  }
324
- });
325
- resetProgressBar();
326
- logger.info(`Markup complete, marked up EPUB saved to ${markedup}.`);
327
- if (parsed.time) {
328
- markupTiming.print();
336
+ } else {
337
+ logger.info("Skipping markup, text-range-type set to text-fragment");
329
338
  }
330
339
  logger.info("Aligning EPUB with audiobook...");
331
340
  startProgressBar();
@@ -336,6 +345,7 @@ async function main() {
336
345
  processedAudio,
337
346
  {
338
347
  granularity: parsed.granularity,
348
+ textRef: parsed.textRef,
339
349
  primaryLocale,
340
350
  logger,
341
351
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -350,12 +360,16 @@ async function main() {
350
360
  if (parsed.time) {
351
361
  alignTiming.print();
352
362
  }
363
+ break;
353
364
  } catch (_) {
354
365
  var _error = _, _hasError = true;
355
366
  } finally {
356
367
  __callDispose(_stack, _error, _hasError);
357
368
  }
358
369
  }
370
+ case "snapshot": {
371
+ await (0, import_snapshot.snapshotAlignment)(parsed.epub, parsed.transcriptions, parsed.output);
372
+ }
359
373
  }
360
374
  } catch (_2) {
361
375
  var _error2 = _2, _hasError2 = true;
package/dist/cli/bin.js CHANGED
@@ -36,6 +36,8 @@ import { markup } from "../markup/markup.js";
36
36
  import { markupCommand } from "../markup/parse.js";
37
37
  import { processCommand, processParser } from "../process/parse.js";
38
38
  import { processAudiobook } from "../process/processAudiobook.js";
39
+ import { snapshotCommand } from "../snapshot/parse.js";
40
+ import { snapshotAlignment } from "../snapshot/snapshot.js";
39
41
  import { transcribeCommand, transcribeParser } from "../transcribe/parse.js";
40
42
  import { transcribe } from "../transcribe/transcribe.js";
41
43
  const pipelineCommand = merge(
@@ -72,7 +74,8 @@ const parser = or(
72
74
  transcribeCommand,
73
75
  markupCommand,
74
76
  alignCommand,
75
- pipelineCommand
77
+ pipelineCommand,
78
+ snapshotCommand
76
79
  );
77
80
  async function main() {
78
81
  var _stack2 = [];
@@ -174,6 +177,7 @@ async function main() {
174
177
  parsed.audiobook,
175
178
  {
176
179
  granularity: parsed.granularity,
180
+ textRef: parsed.textRef,
177
181
  primaryLocale: parsed.language,
178
182
  logger,
179
183
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -255,28 +259,33 @@ async function main() {
255
259
  if (parsed.time) {
256
260
  transcribeTiming.print();
257
261
  }
258
- logger.info("Marking up EPUB...");
259
- startProgressBar();
260
- const markedup = parsed.markedup ?? join(os.tmpdir(), `stalign-markedup-${randomUUID()}.epub`);
261
- if (!parsed.markedup) {
262
- stack.defer(() => {
263
- rmSync(markedup, { recursive: true, force: true });
264
- });
265
- }
266
- const markupTiming = await markup(parsed.epub, markedup, {
267
- granularity: parsed.granularity,
268
- primaryLocale,
269
- logger,
270
- ...!parsed.noProgress && parsed.logLevel === "silent" && {
271
- onProgress: (progress) => {
272
- progressBar.update(Math.floor(progress * 100));
262
+ const markedup = parsed.textRef === "id-fragment" ? parsed.markedup ?? join(os.tmpdir(), `stalign-markedup-${randomUUID()}.epub`) : parsed.epub;
263
+ if (parsed.textRef === "id-fragment") {
264
+ logger.info("Marking up EPUB...");
265
+ startProgressBar();
266
+ const markedup2 = parsed.markedup ?? join(os.tmpdir(), `stalign-markedup-${randomUUID()}.epub`);
267
+ if (!parsed.markedup) {
268
+ stack.defer(() => {
269
+ rmSync(markedup2, { recursive: true, force: true });
270
+ });
271
+ }
272
+ const markupTiming = await markup(parsed.epub, markedup2, {
273
+ granularity: parsed.granularity,
274
+ primaryLocale,
275
+ logger,
276
+ ...!parsed.noProgress && parsed.logLevel === "silent" && {
277
+ onProgress: (progress) => {
278
+ progressBar.update(Math.floor(progress * 100));
279
+ }
273
280
  }
281
+ });
282
+ resetProgressBar();
283
+ logger.info(`Markup complete, marked up EPUB saved to ${markedup2}.`);
284
+ if (parsed.time) {
285
+ markupTiming.print();
274
286
  }
275
- });
276
- resetProgressBar();
277
- logger.info(`Markup complete, marked up EPUB saved to ${markedup}.`);
278
- if (parsed.time) {
279
- markupTiming.print();
287
+ } else {
288
+ logger.info("Skipping markup, text-range-type set to text-fragment");
280
289
  }
281
290
  logger.info("Aligning EPUB with audiobook...");
282
291
  startProgressBar();
@@ -287,6 +296,7 @@ async function main() {
287
296
  processedAudio,
288
297
  {
289
298
  granularity: parsed.granularity,
299
+ textRef: parsed.textRef,
290
300
  primaryLocale,
291
301
  logger,
292
302
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -301,12 +311,16 @@ async function main() {
301
311
  if (parsed.time) {
302
312
  alignTiming.print();
303
313
  }
314
+ break;
304
315
  } catch (_) {
305
316
  var _error = _, _hasError = true;
306
317
  } finally {
307
318
  __callDispose(_stack, _error, _hasError);
308
319
  }
309
320
  }
321
+ case "snapshot": {
322
+ await snapshotAlignment(parsed.epub, parsed.transcriptions, parsed.output);
323
+ }
310
324
  }
311
325
  } catch (_2) {
312
326
  var _error2 = _2, _hasError2 = true;
package/dist/index.d.cts CHANGED
@@ -6,6 +6,7 @@ import '@storyteller-platform/ghost-story';
6
6
  import '@esfx/async-semaphore';
7
7
  import 'pino';
8
8
  import './process/AudioEncoding.cjs';
9
+ import '@storyteller-platform/ghost-story/constants';
9
10
  import '@echogarden/text-segmentation';
10
11
  import '@storyteller-platform/epub';
11
12
  import './markup/map.cjs';
package/dist/index.d.ts CHANGED
@@ -6,6 +6,7 @@ import '@storyteller-platform/ghost-story';
6
6
  import '@esfx/async-semaphore';
7
7
  import 'pino';
8
8
  import './process/AudioEncoding.js';
9
+ import '@storyteller-platform/ghost-story/constants';
9
10
  import '@echogarden/text-segmentation';
10
11
  import '@storyteller-platform/epub';
11
12
  import './markup/map.js';
@@ -0,0 +1,61 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var parse_exports = {};
20
+ __export(parse_exports, {
21
+ snapshotCommand: () => snapshotCommand,
22
+ snapshotParser: () => snapshotParser
23
+ });
24
+ module.exports = __toCommonJS(parse_exports);
25
+ var import_core = require("@optique/core");
26
+ var import_run = require("@optique/run");
27
+ var import_parse = require("../common/parse.cjs");
28
+ const snapshotParser = (0, import_core.object)("Snapshot", {
29
+ transcriptions: (0, import_core.option)(
30
+ "--transcriptions",
31
+ (0, import_run.path)({ mustExist: true, type: "directory" })
32
+ ),
33
+ epub: (0, import_core.option)(
34
+ "--epub",
35
+ (0, import_run.path)({ mustExist: true, type: "file", extensions: [".epub"] }),
36
+ {
37
+ description: import_core.message`Path to an EPUB file to snapshot. This EPUB must have Media Overlays and audio files corresponding to the transcription files passed to --transcriptions.`
38
+ }
39
+ ),
40
+ output: (0, import_core.argument)((0, import_run.path)({ type: "file", metavar: "OUTPUT_PATH" }), {
41
+ description: import_core.message`Path to save the snapshot.`
42
+ })
43
+ });
44
+ const snapshotCommand = (0, import_core.command)(
45
+ "snapshot",
46
+ (0, import_core.merge)(
47
+ (0, import_core.object)({
48
+ action: (0, import_core.constant)("snapshot")
49
+ }),
50
+ snapshotParser,
51
+ import_parse.loggingParser
52
+ ),
53
+ {
54
+ description: import_core.message`Print a human-readable snapshot of the EPUB’s alignment to a text file.`
55
+ }
56
+ );
57
+ // Annotate the CommonJS export names for ESM import in node:
58
+ 0 && (module.exports = {
59
+ snapshotCommand,
60
+ snapshotParser
61
+ });
@@ -0,0 +1,24 @@
1
+ import * as _optique_core from '@optique/core';
2
+
3
+ declare const snapshotParser: _optique_core.Parser<"sync", {
4
+ readonly transcriptions: string;
5
+ readonly epub: string;
6
+ readonly output: string;
7
+ }, {
8
+ readonly transcriptions: _optique_core.ValueParserResult<string> | undefined;
9
+ readonly epub: _optique_core.ValueParserResult<string> | undefined;
10
+ readonly output: _optique_core.ValueParserResult<string> | undefined;
11
+ }>;
12
+ declare const snapshotCommand: _optique_core.Parser<"sync", {
13
+ readonly action: "snapshot";
14
+ } & {
15
+ readonly transcriptions: string;
16
+ readonly epub: string;
17
+ readonly output: string;
18
+ } & {
19
+ readonly noProgress: boolean;
20
+ readonly logLevel: "silent" | "debug" | "info" | "warn" | "error";
21
+ readonly time: boolean;
22
+ }, ["matched", string] | ["parsing", Record<string | symbol, unknown>] | undefined>;
23
+
24
+ export { snapshotCommand, snapshotParser };
@@ -0,0 +1,24 @@
1
+ import * as _optique_core from '@optique/core';
2
+
3
+ declare const snapshotParser: _optique_core.Parser<"sync", {
4
+ readonly transcriptions: string;
5
+ readonly epub: string;
6
+ readonly output: string;
7
+ }, {
8
+ readonly transcriptions: _optique_core.ValueParserResult<string> | undefined;
9
+ readonly epub: _optique_core.ValueParserResult<string> | undefined;
10
+ readonly output: _optique_core.ValueParserResult<string> | undefined;
11
+ }>;
12
+ declare const snapshotCommand: _optique_core.Parser<"sync", {
13
+ readonly action: "snapshot";
14
+ } & {
15
+ readonly transcriptions: string;
16
+ readonly epub: string;
17
+ readonly output: string;
18
+ } & {
19
+ readonly noProgress: boolean;
20
+ readonly logLevel: "silent" | "debug" | "info" | "warn" | "error";
21
+ readonly time: boolean;
22
+ }, ["matched", string] | ["parsing", Record<string | symbol, unknown>] | undefined>;
23
+
24
+ export { snapshotCommand, snapshotParser };
@@ -0,0 +1,45 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import {
3
+ argument,
4
+ command,
5
+ constant,
6
+ merge,
7
+ message,
8
+ object,
9
+ option
10
+ } from "@optique/core";
11
+ import { path } from "@optique/run";
12
+ import { loggingParser } from "../common/parse.js";
13
+ const snapshotParser = object("Snapshot", {
14
+ transcriptions: option(
15
+ "--transcriptions",
16
+ path({ mustExist: true, type: "directory" })
17
+ ),
18
+ epub: option(
19
+ "--epub",
20
+ path({ mustExist: true, type: "file", extensions: [".epub"] }),
21
+ {
22
+ description: message`Path to an EPUB file to snapshot. This EPUB must have Media Overlays and audio files corresponding to the transcription files passed to --transcriptions.`
23
+ }
24
+ ),
25
+ output: argument(path({ type: "file", metavar: "OUTPUT_PATH" }), {
26
+ description: message`Path to save the snapshot.`
27
+ })
28
+ });
29
+ const snapshotCommand = command(
30
+ "snapshot",
31
+ merge(
32
+ object({
33
+ action: constant("snapshot")
34
+ }),
35
+ snapshotParser,
36
+ loggingParser
37
+ ),
38
+ {
39
+ description: message`Print a human-readable snapshot of the EPUB’s alignment to a text file.`
40
+ }
41
+ );
42
+ export {
43
+ snapshotCommand,
44
+ snapshotParser
45
+ };