@storyteller-platform/align 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +81 -15
- package/dist/align/align.d.cts +4 -2
- package/dist/align/align.d.ts +4 -2
- package/dist/align/align.js +82 -16
- package/dist/align/getSentenceRanges.cjs +1 -0
- package/dist/align/getSentenceRanges.d.cts +1 -0
- package/dist/align/getSentenceRanges.d.ts +1 -0
- package/dist/align/getSentenceRanges.js +1 -0
- package/dist/align/parse.cjs +6 -0
- package/dist/align/parse.d.cts +3 -0
- package/dist/align/parse.d.ts +3 -0
- package/dist/align/parse.js +9 -1
- package/dist/align/textFragments.cjs +147 -0
- package/dist/align/textFragments.d.cts +23 -0
- package/dist/align/textFragments.d.ts +23 -0
- package/dist/align/textFragments.js +124 -0
- package/dist/cli/bin.cjs +38 -24
- package/dist/cli/bin.js +35 -21
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/snapshot/parse.cjs +61 -0
- package/dist/snapshot/parse.d.cts +24 -0
- package/dist/snapshot/parse.d.ts +24 -0
- package/dist/snapshot/parse.js +45 -0
- package/dist/snapshot/snapshot.cjs +224 -0
- package/dist/snapshot/snapshot.d.cts +6 -0
- package/dist/snapshot/snapshot.d.ts +6 -0
- package/dist/snapshot/snapshot.js +161 -0
- package/dist/transcribe/parse.cjs +2 -2
- package/dist/transcribe/parse.js +1 -1
- package/dist/transcribe/transcribe.cjs +2 -0
- package/dist/transcribe/transcribe.d.cts +2 -1
- package/dist/transcribe/transcribe.d.ts +2 -1
- package/dist/transcribe/transcribe.js +2 -0
- package/package.json +3 -3
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var textFragments_exports = {};
|
|
20
|
+
__export(textFragments_exports, {
|
|
21
|
+
TextFragmentTrie: () => TextFragmentTrie
|
|
22
|
+
});
|
|
23
|
+
module.exports = __toCommonJS(textFragments_exports);
|
|
24
|
+
var import_itertools = require("itertools");
|
|
25
|
+
var import_runes2 = require("runes2");
|
|
26
|
+
class TextFragmentTrie {
|
|
27
|
+
root = new Node(null, "");
|
|
28
|
+
spans;
|
|
29
|
+
constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
|
|
30
|
+
this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
|
|
31
|
+
for (const [i, span] of (0, import_itertools.enumerate)(this.spans)) {
|
|
32
|
+
const parents = [this.root];
|
|
33
|
+
for (const [j, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(span))) {
|
|
34
|
+
for (const [k, parent] of (0, import_itertools.enumerate)(parents)) {
|
|
35
|
+
const newNode = new Node(parent, char, { span: i, pos: j });
|
|
36
|
+
let node = parent.children.find((child) => child.eq(newNode));
|
|
37
|
+
if (!node) {
|
|
38
|
+
node = newNode;
|
|
39
|
+
parent.children.push(node);
|
|
40
|
+
} else {
|
|
41
|
+
node.indices.push({ span: i, pos: j });
|
|
42
|
+
}
|
|
43
|
+
parents[k] = node;
|
|
44
|
+
}
|
|
45
|
+
parents.push(this.root);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
findMinimalFragment(spanIndex) {
|
|
50
|
+
let node = this.root;
|
|
51
|
+
while (node.children.length) {
|
|
52
|
+
const candidates = node.children.filter(
|
|
53
|
+
(child2) => child2.indices.some(
|
|
54
|
+
({ span: childSpanIndex }) => childSpanIndex === spanIndex
|
|
55
|
+
)
|
|
56
|
+
);
|
|
57
|
+
const child = (0, import_itertools.min)(
|
|
58
|
+
candidates,
|
|
59
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
60
|
+
(c) => c.indices.find((i) => i.span === spanIndex).pos
|
|
61
|
+
);
|
|
62
|
+
if (!child) {
|
|
63
|
+
return this.nodeToFragment(node, spanIndex, true);
|
|
64
|
+
}
|
|
65
|
+
if (child.indices.length === 1) {
|
|
66
|
+
return this.nodeToFragment(child, spanIndex);
|
|
67
|
+
}
|
|
68
|
+
node = child;
|
|
69
|
+
}
|
|
70
|
+
return this.nodeToFragment(node, spanIndex, true);
|
|
71
|
+
}
|
|
72
|
+
nodeToFragment(node, spanIndex, findPrefix) {
|
|
73
|
+
const span = this.spans[spanIndex];
|
|
74
|
+
let fragment = ":~:text=";
|
|
75
|
+
let prefix = "";
|
|
76
|
+
if (findPrefix) {
|
|
77
|
+
const prev = this.spans[spanIndex - 1];
|
|
78
|
+
if (prev) {
|
|
79
|
+
const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
|
|
80
|
+
let startNode2 = node;
|
|
81
|
+
let startPos = pos;
|
|
82
|
+
while (startNode2.parent && startNode2.parent !== this.root) {
|
|
83
|
+
startPos -= startNode2.value.length;
|
|
84
|
+
startNode2 = startNode2.parent;
|
|
85
|
+
}
|
|
86
|
+
const prev2 = this.spans[spanIndex2 - 1];
|
|
87
|
+
const span2 = this.spans[spanIndex2];
|
|
88
|
+
return (prev2 ?? "") + span2.slice(0, startPos);
|
|
89
|
+
});
|
|
90
|
+
const reversedPrefixes = prefixes.map((p) => (0, import_runes2.runes)(p).toReversed());
|
|
91
|
+
for (const [i2, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(prev).toReversed())) {
|
|
92
|
+
prefix = char + prefix;
|
|
93
|
+
for (const [j, p] of (0, import_itertools.enumerate)([...reversedPrefixes.toReversed()])) {
|
|
94
|
+
if (p[i2] !== char) {
|
|
95
|
+
reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (reversedPrefixes.length === 0) {
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (prefix) {
|
|
105
|
+
fragment += `${encodeTextFragmentPart(prefix)}-,`;
|
|
106
|
+
}
|
|
107
|
+
let startNode = node;
|
|
108
|
+
let start = "";
|
|
109
|
+
while (startNode) {
|
|
110
|
+
start = startNode.value + start;
|
|
111
|
+
startNode = startNode.parent;
|
|
112
|
+
}
|
|
113
|
+
fragment += encodeTextFragmentPart(start);
|
|
114
|
+
const remainingSentence = span.slice(start.length + node.value.length);
|
|
115
|
+
let end = "";
|
|
116
|
+
let i = remainingSentence.length - 1;
|
|
117
|
+
while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
|
|
118
|
+
end = remainingSentence.slice(i);
|
|
119
|
+
i--;
|
|
120
|
+
}
|
|
121
|
+
if (end) {
|
|
122
|
+
fragment += `,${encodeTextFragmentPart(end)}`;
|
|
123
|
+
}
|
|
124
|
+
return fragment;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
function encodeTextFragmentPart(part) {
|
|
128
|
+
return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
|
|
129
|
+
}
|
|
130
|
+
class Node {
|
|
131
|
+
constructor(parent, value, firstIndex) {
|
|
132
|
+
this.parent = parent;
|
|
133
|
+
this.value = value;
|
|
134
|
+
if (firstIndex !== void 0) {
|
|
135
|
+
this.indices.push(firstIndex);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
children = [];
|
|
139
|
+
indices = [];
|
|
140
|
+
eq(other) {
|
|
141
|
+
return this.value === other.value;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
145
|
+
0 && (module.exports = {
|
|
146
|
+
TextFragmentTrie
|
|
147
|
+
});
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
declare class TextFragmentTrie {
|
|
2
|
+
private root;
|
|
3
|
+
private spans;
|
|
4
|
+
constructor(casedSpans: string[], locale?: Intl.Locale);
|
|
5
|
+
findMinimalFragment(spanIndex: number): string;
|
|
6
|
+
nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
|
|
7
|
+
}
|
|
8
|
+
declare class Node {
|
|
9
|
+
parent: Node | null;
|
|
10
|
+
value: string;
|
|
11
|
+
children: Node[];
|
|
12
|
+
indices: {
|
|
13
|
+
span: number;
|
|
14
|
+
pos: number;
|
|
15
|
+
}[];
|
|
16
|
+
constructor(parent: Node | null, value: string, firstIndex?: {
|
|
17
|
+
span: number;
|
|
18
|
+
pos: number;
|
|
19
|
+
});
|
|
20
|
+
eq(other: Node): boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { TextFragmentTrie };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
declare class TextFragmentTrie {
|
|
2
|
+
private root;
|
|
3
|
+
private spans;
|
|
4
|
+
constructor(casedSpans: string[], locale?: Intl.Locale);
|
|
5
|
+
findMinimalFragment(spanIndex: number): string;
|
|
6
|
+
nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
|
|
7
|
+
}
|
|
8
|
+
declare class Node {
|
|
9
|
+
parent: Node | null;
|
|
10
|
+
value: string;
|
|
11
|
+
children: Node[];
|
|
12
|
+
indices: {
|
|
13
|
+
span: number;
|
|
14
|
+
pos: number;
|
|
15
|
+
}[];
|
|
16
|
+
constructor(parent: Node | null, value: string, firstIndex?: {
|
|
17
|
+
span: number;
|
|
18
|
+
pos: number;
|
|
19
|
+
});
|
|
20
|
+
eq(other: Node): boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { TextFragmentTrie };
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import { enumerate, min } from "itertools";
|
|
3
|
+
import { runes } from "runes2";
|
|
4
|
+
class TextFragmentTrie {
|
|
5
|
+
root = new Node(null, "");
|
|
6
|
+
spans;
|
|
7
|
+
constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
|
|
8
|
+
this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
|
|
9
|
+
for (const [i, span] of enumerate(this.spans)) {
|
|
10
|
+
const parents = [this.root];
|
|
11
|
+
for (const [j, char] of enumerate(runes(span))) {
|
|
12
|
+
for (const [k, parent] of enumerate(parents)) {
|
|
13
|
+
const newNode = new Node(parent, char, { span: i, pos: j });
|
|
14
|
+
let node = parent.children.find((child) => child.eq(newNode));
|
|
15
|
+
if (!node) {
|
|
16
|
+
node = newNode;
|
|
17
|
+
parent.children.push(node);
|
|
18
|
+
} else {
|
|
19
|
+
node.indices.push({ span: i, pos: j });
|
|
20
|
+
}
|
|
21
|
+
parents[k] = node;
|
|
22
|
+
}
|
|
23
|
+
parents.push(this.root);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
findMinimalFragment(spanIndex) {
|
|
28
|
+
let node = this.root;
|
|
29
|
+
while (node.children.length) {
|
|
30
|
+
const candidates = node.children.filter(
|
|
31
|
+
(child2) => child2.indices.some(
|
|
32
|
+
({ span: childSpanIndex }) => childSpanIndex === spanIndex
|
|
33
|
+
)
|
|
34
|
+
);
|
|
35
|
+
const child = min(
|
|
36
|
+
candidates,
|
|
37
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
38
|
+
(c) => c.indices.find((i) => i.span === spanIndex).pos
|
|
39
|
+
);
|
|
40
|
+
if (!child) {
|
|
41
|
+
return this.nodeToFragment(node, spanIndex, true);
|
|
42
|
+
}
|
|
43
|
+
if (child.indices.length === 1) {
|
|
44
|
+
return this.nodeToFragment(child, spanIndex);
|
|
45
|
+
}
|
|
46
|
+
node = child;
|
|
47
|
+
}
|
|
48
|
+
return this.nodeToFragment(node, spanIndex, true);
|
|
49
|
+
}
|
|
50
|
+
nodeToFragment(node, spanIndex, findPrefix) {
|
|
51
|
+
const span = this.spans[spanIndex];
|
|
52
|
+
let fragment = ":~:text=";
|
|
53
|
+
let prefix = "";
|
|
54
|
+
if (findPrefix) {
|
|
55
|
+
const prev = this.spans[spanIndex - 1];
|
|
56
|
+
if (prev) {
|
|
57
|
+
const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
|
|
58
|
+
let startNode2 = node;
|
|
59
|
+
let startPos = pos;
|
|
60
|
+
while (startNode2.parent && startNode2.parent !== this.root) {
|
|
61
|
+
startPos -= startNode2.value.length;
|
|
62
|
+
startNode2 = startNode2.parent;
|
|
63
|
+
}
|
|
64
|
+
const prev2 = this.spans[spanIndex2 - 1];
|
|
65
|
+
const span2 = this.spans[spanIndex2];
|
|
66
|
+
return (prev2 ?? "") + span2.slice(0, startPos);
|
|
67
|
+
});
|
|
68
|
+
const reversedPrefixes = prefixes.map((p) => runes(p).toReversed());
|
|
69
|
+
for (const [i2, char] of enumerate(runes(prev).toReversed())) {
|
|
70
|
+
prefix = char + prefix;
|
|
71
|
+
for (const [j, p] of enumerate([...reversedPrefixes.toReversed()])) {
|
|
72
|
+
if (p[i2] !== char) {
|
|
73
|
+
reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (reversedPrefixes.length === 0) {
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (prefix) {
|
|
83
|
+
fragment += `${encodeTextFragmentPart(prefix)}-,`;
|
|
84
|
+
}
|
|
85
|
+
let startNode = node;
|
|
86
|
+
let start = "";
|
|
87
|
+
while (startNode) {
|
|
88
|
+
start = startNode.value + start;
|
|
89
|
+
startNode = startNode.parent;
|
|
90
|
+
}
|
|
91
|
+
fragment += encodeTextFragmentPart(start);
|
|
92
|
+
const remainingSentence = span.slice(start.length + node.value.length);
|
|
93
|
+
let end = "";
|
|
94
|
+
let i = remainingSentence.length - 1;
|
|
95
|
+
while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
|
|
96
|
+
end = remainingSentence.slice(i);
|
|
97
|
+
i--;
|
|
98
|
+
}
|
|
99
|
+
if (end) {
|
|
100
|
+
fragment += `,${encodeTextFragmentPart(end)}`;
|
|
101
|
+
}
|
|
102
|
+
return fragment;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
function encodeTextFragmentPart(part) {
|
|
106
|
+
return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
|
|
107
|
+
}
|
|
108
|
+
class Node {
|
|
109
|
+
constructor(parent, value, firstIndex) {
|
|
110
|
+
this.parent = parent;
|
|
111
|
+
this.value = value;
|
|
112
|
+
if (firstIndex !== void 0) {
|
|
113
|
+
this.indices.push(firstIndex);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
children = [];
|
|
117
|
+
indices = [];
|
|
118
|
+
eq(other) {
|
|
119
|
+
return this.value === other.value;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
export {
|
|
123
|
+
TextFragmentTrie
|
|
124
|
+
};
|
package/dist/cli/bin.cjs
CHANGED
|
@@ -85,7 +85,9 @@ var import_markup = require("../markup/markup.cjs");
|
|
|
85
85
|
var import_parse3 = require("../markup/parse.cjs");
|
|
86
86
|
var import_parse4 = require("../process/parse.cjs");
|
|
87
87
|
var import_processAudiobook = require("../process/processAudiobook.cjs");
|
|
88
|
-
var import_parse5 = require("../
|
|
88
|
+
var import_parse5 = require("../snapshot/parse.cjs");
|
|
89
|
+
var import_snapshot = require("../snapshot/snapshot.cjs");
|
|
90
|
+
var import_parse6 = require("../transcribe/parse.cjs");
|
|
89
91
|
var import_transcribe = require("../transcribe/transcribe.cjs");
|
|
90
92
|
const pipelineCommand = (0, import_core.merge)(
|
|
91
93
|
(0, import_core.object)({
|
|
@@ -110,7 +112,7 @@ const pipelineCommand = (0, import_core.merge)(
|
|
|
110
112
|
output: (0, import_core.option)("--output", (0, import_valueparser.path)({ type: "file", extensions: [".epub"] }))
|
|
111
113
|
}),
|
|
112
114
|
import_parse4.processParser,
|
|
113
|
-
(0, import_core.group)("Transcription",
|
|
115
|
+
(0, import_core.group)("Transcription", import_parse6.transcribeParser),
|
|
114
116
|
import_parse2.granularityParser,
|
|
115
117
|
import_parse2.languageParser,
|
|
116
118
|
import_parse.alignParser,
|
|
@@ -118,10 +120,11 @@ const pipelineCommand = (0, import_core.merge)(
|
|
|
118
120
|
);
|
|
119
121
|
const parser = (0, import_core.or)(
|
|
120
122
|
import_parse4.processCommand,
|
|
121
|
-
|
|
123
|
+
import_parse6.transcribeCommand,
|
|
122
124
|
import_parse3.markupCommand,
|
|
123
125
|
import_parse.alignCommand,
|
|
124
|
-
pipelineCommand
|
|
126
|
+
pipelineCommand,
|
|
127
|
+
import_parse5.snapshotCommand
|
|
125
128
|
);
|
|
126
129
|
async function main() {
|
|
127
130
|
var _stack2 = [];
|
|
@@ -223,6 +226,7 @@ async function main() {
|
|
|
223
226
|
parsed.audiobook,
|
|
224
227
|
{
|
|
225
228
|
granularity: parsed.granularity,
|
|
229
|
+
textRef: parsed.textRef,
|
|
226
230
|
primaryLocale: parsed.language,
|
|
227
231
|
logger,
|
|
228
232
|
...!parsed.noProgress && parsed.logLevel === "silent" && {
|
|
@@ -304,28 +308,33 @@ async function main() {
|
|
|
304
308
|
if (parsed.time) {
|
|
305
309
|
transcribeTiming.print();
|
|
306
310
|
}
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
311
|
+
const markedup = parsed.textRef === "id-fragment" ? parsed.markedup ?? (0, import_node_path.join)(os.tmpdir(), `stalign-markedup-${(0, import_node_crypto.randomUUID)()}.epub`) : parsed.epub;
|
|
312
|
+
if (parsed.textRef === "id-fragment") {
|
|
313
|
+
logger.info("Marking up EPUB...");
|
|
314
|
+
startProgressBar();
|
|
315
|
+
const markedup2 = parsed.markedup ?? (0, import_node_path.join)(os.tmpdir(), `stalign-markedup-${(0, import_node_crypto.randomUUID)()}.epub`);
|
|
316
|
+
if (!parsed.markedup) {
|
|
317
|
+
stack.defer(() => {
|
|
318
|
+
(0, import_node_fs.rmSync)(markedup2, { recursive: true, force: true });
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
const markupTiming = await (0, import_markup.markup)(parsed.epub, markedup2, {
|
|
322
|
+
granularity: parsed.granularity,
|
|
323
|
+
primaryLocale,
|
|
324
|
+
logger,
|
|
325
|
+
...!parsed.noProgress && parsed.logLevel === "silent" && {
|
|
326
|
+
onProgress: (progress) => {
|
|
327
|
+
progressBar.update(Math.floor(progress * 100));
|
|
328
|
+
}
|
|
322
329
|
}
|
|
330
|
+
});
|
|
331
|
+
resetProgressBar();
|
|
332
|
+
logger.info(`Markup complete, marked up EPUB saved to ${markedup2}.`);
|
|
333
|
+
if (parsed.time) {
|
|
334
|
+
markupTiming.print();
|
|
323
335
|
}
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
logger.info(`Markup complete, marked up EPUB saved to ${markedup}.`);
|
|
327
|
-
if (parsed.time) {
|
|
328
|
-
markupTiming.print();
|
|
336
|
+
} else {
|
|
337
|
+
logger.info("Skipping markup, text-range-type set to text-fragment");
|
|
329
338
|
}
|
|
330
339
|
logger.info("Aligning EPUB with audiobook...");
|
|
331
340
|
startProgressBar();
|
|
@@ -336,6 +345,7 @@ async function main() {
|
|
|
336
345
|
processedAudio,
|
|
337
346
|
{
|
|
338
347
|
granularity: parsed.granularity,
|
|
348
|
+
textRef: parsed.textRef,
|
|
339
349
|
primaryLocale,
|
|
340
350
|
logger,
|
|
341
351
|
...!parsed.noProgress && parsed.logLevel === "silent" && {
|
|
@@ -350,12 +360,16 @@ async function main() {
|
|
|
350
360
|
if (parsed.time) {
|
|
351
361
|
alignTiming.print();
|
|
352
362
|
}
|
|
363
|
+
break;
|
|
353
364
|
} catch (_) {
|
|
354
365
|
var _error = _, _hasError = true;
|
|
355
366
|
} finally {
|
|
356
367
|
__callDispose(_stack, _error, _hasError);
|
|
357
368
|
}
|
|
358
369
|
}
|
|
370
|
+
case "snapshot": {
|
|
371
|
+
await (0, import_snapshot.snapshotAlignment)(parsed.epub, parsed.transcriptions, parsed.output);
|
|
372
|
+
}
|
|
359
373
|
}
|
|
360
374
|
} catch (_2) {
|
|
361
375
|
var _error2 = _2, _hasError2 = true;
|
package/dist/cli/bin.js
CHANGED
|
@@ -36,6 +36,8 @@ import { markup } from "../markup/markup.js";
|
|
|
36
36
|
import { markupCommand } from "../markup/parse.js";
|
|
37
37
|
import { processCommand, processParser } from "../process/parse.js";
|
|
38
38
|
import { processAudiobook } from "../process/processAudiobook.js";
|
|
39
|
+
import { snapshotCommand } from "../snapshot/parse.js";
|
|
40
|
+
import { snapshotAlignment } from "../snapshot/snapshot.js";
|
|
39
41
|
import { transcribeCommand, transcribeParser } from "../transcribe/parse.js";
|
|
40
42
|
import { transcribe } from "../transcribe/transcribe.js";
|
|
41
43
|
const pipelineCommand = merge(
|
|
@@ -72,7 +74,8 @@ const parser = or(
|
|
|
72
74
|
transcribeCommand,
|
|
73
75
|
markupCommand,
|
|
74
76
|
alignCommand,
|
|
75
|
-
pipelineCommand
|
|
77
|
+
pipelineCommand,
|
|
78
|
+
snapshotCommand
|
|
76
79
|
);
|
|
77
80
|
async function main() {
|
|
78
81
|
var _stack2 = [];
|
|
@@ -174,6 +177,7 @@ async function main() {
|
|
|
174
177
|
parsed.audiobook,
|
|
175
178
|
{
|
|
176
179
|
granularity: parsed.granularity,
|
|
180
|
+
textRef: parsed.textRef,
|
|
177
181
|
primaryLocale: parsed.language,
|
|
178
182
|
logger,
|
|
179
183
|
...!parsed.noProgress && parsed.logLevel === "silent" && {
|
|
@@ -255,28 +259,33 @@ async function main() {
|
|
|
255
259
|
if (parsed.time) {
|
|
256
260
|
transcribeTiming.print();
|
|
257
261
|
}
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
262
|
+
const markedup = parsed.textRef === "id-fragment" ? parsed.markedup ?? join(os.tmpdir(), `stalign-markedup-${randomUUID()}.epub`) : parsed.epub;
|
|
263
|
+
if (parsed.textRef === "id-fragment") {
|
|
264
|
+
logger.info("Marking up EPUB...");
|
|
265
|
+
startProgressBar();
|
|
266
|
+
const markedup2 = parsed.markedup ?? join(os.tmpdir(), `stalign-markedup-${randomUUID()}.epub`);
|
|
267
|
+
if (!parsed.markedup) {
|
|
268
|
+
stack.defer(() => {
|
|
269
|
+
rmSync(markedup2, { recursive: true, force: true });
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
const markupTiming = await markup(parsed.epub, markedup2, {
|
|
273
|
+
granularity: parsed.granularity,
|
|
274
|
+
primaryLocale,
|
|
275
|
+
logger,
|
|
276
|
+
...!parsed.noProgress && parsed.logLevel === "silent" && {
|
|
277
|
+
onProgress: (progress) => {
|
|
278
|
+
progressBar.update(Math.floor(progress * 100));
|
|
279
|
+
}
|
|
273
280
|
}
|
|
281
|
+
});
|
|
282
|
+
resetProgressBar();
|
|
283
|
+
logger.info(`Markup complete, marked up EPUB saved to ${markedup2}.`);
|
|
284
|
+
if (parsed.time) {
|
|
285
|
+
markupTiming.print();
|
|
274
286
|
}
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
logger.info(`Markup complete, marked up EPUB saved to ${markedup}.`);
|
|
278
|
-
if (parsed.time) {
|
|
279
|
-
markupTiming.print();
|
|
287
|
+
} else {
|
|
288
|
+
logger.info("Skipping markup, text-range-type set to text-fragment");
|
|
280
289
|
}
|
|
281
290
|
logger.info("Aligning EPUB with audiobook...");
|
|
282
291
|
startProgressBar();
|
|
@@ -287,6 +296,7 @@ async function main() {
|
|
|
287
296
|
processedAudio,
|
|
288
297
|
{
|
|
289
298
|
granularity: parsed.granularity,
|
|
299
|
+
textRef: parsed.textRef,
|
|
290
300
|
primaryLocale,
|
|
291
301
|
logger,
|
|
292
302
|
...!parsed.noProgress && parsed.logLevel === "silent" && {
|
|
@@ -301,12 +311,16 @@ async function main() {
|
|
|
301
311
|
if (parsed.time) {
|
|
302
312
|
alignTiming.print();
|
|
303
313
|
}
|
|
314
|
+
break;
|
|
304
315
|
} catch (_) {
|
|
305
316
|
var _error = _, _hasError = true;
|
|
306
317
|
} finally {
|
|
307
318
|
__callDispose(_stack, _error, _hasError);
|
|
308
319
|
}
|
|
309
320
|
}
|
|
321
|
+
case "snapshot": {
|
|
322
|
+
await snapshotAlignment(parsed.epub, parsed.transcriptions, parsed.output);
|
|
323
|
+
}
|
|
310
324
|
}
|
|
311
325
|
} catch (_2) {
|
|
312
326
|
var _error2 = _2, _hasError2 = true;
|
package/dist/index.d.cts
CHANGED
|
@@ -6,6 +6,7 @@ import '@storyteller-platform/ghost-story';
|
|
|
6
6
|
import '@esfx/async-semaphore';
|
|
7
7
|
import 'pino';
|
|
8
8
|
import './process/AudioEncoding.cjs';
|
|
9
|
+
import '@storyteller-platform/ghost-story/constants';
|
|
9
10
|
import '@echogarden/text-segmentation';
|
|
10
11
|
import '@storyteller-platform/epub';
|
|
11
12
|
import './markup/map.cjs';
|
package/dist/index.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ import '@storyteller-platform/ghost-story';
|
|
|
6
6
|
import '@esfx/async-semaphore';
|
|
7
7
|
import 'pino';
|
|
8
8
|
import './process/AudioEncoding.js';
|
|
9
|
+
import '@storyteller-platform/ghost-story/constants';
|
|
9
10
|
import '@echogarden/text-segmentation';
|
|
10
11
|
import '@storyteller-platform/epub';
|
|
11
12
|
import './markup/map.js';
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var parse_exports = {};
|
|
20
|
+
__export(parse_exports, {
|
|
21
|
+
snapshotCommand: () => snapshotCommand,
|
|
22
|
+
snapshotParser: () => snapshotParser
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(parse_exports);
|
|
25
|
+
var import_core = require("@optique/core");
|
|
26
|
+
var import_run = require("@optique/run");
|
|
27
|
+
var import_parse = require("../common/parse.cjs");
|
|
28
|
+
const snapshotParser = (0, import_core.object)("Snapshot", {
|
|
29
|
+
transcriptions: (0, import_core.option)(
|
|
30
|
+
"--transcriptions",
|
|
31
|
+
(0, import_run.path)({ mustExist: true, type: "directory" })
|
|
32
|
+
),
|
|
33
|
+
epub: (0, import_core.option)(
|
|
34
|
+
"--epub",
|
|
35
|
+
(0, import_run.path)({ mustExist: true, type: "file", extensions: [".epub"] }),
|
|
36
|
+
{
|
|
37
|
+
description: import_core.message`Path to an EPUB file to snapshot. This EPUB must have Media Overlays and audio files corresponding to the transcription files passed to --transcriptions.`
|
|
38
|
+
}
|
|
39
|
+
),
|
|
40
|
+
output: (0, import_core.argument)((0, import_run.path)({ type: "file", metavar: "OUTPUT_PATH" }), {
|
|
41
|
+
description: import_core.message`Path to save the snapshot.`
|
|
42
|
+
})
|
|
43
|
+
});
|
|
44
|
+
const snapshotCommand = (0, import_core.command)(
|
|
45
|
+
"snapshot",
|
|
46
|
+
(0, import_core.merge)(
|
|
47
|
+
(0, import_core.object)({
|
|
48
|
+
action: (0, import_core.constant)("snapshot")
|
|
49
|
+
}),
|
|
50
|
+
snapshotParser,
|
|
51
|
+
import_parse.loggingParser
|
|
52
|
+
),
|
|
53
|
+
{
|
|
54
|
+
description: import_core.message`Print a human-readable snapshot of the EPUB’s alignment to a text file.`
|
|
55
|
+
}
|
|
56
|
+
);
|
|
57
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
58
|
+
0 && (module.exports = {
|
|
59
|
+
snapshotCommand,
|
|
60
|
+
snapshotParser
|
|
61
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import * as _optique_core from '@optique/core';
|
|
2
|
+
|
|
3
|
+
declare const snapshotParser: _optique_core.Parser<"sync", {
|
|
4
|
+
readonly transcriptions: string;
|
|
5
|
+
readonly epub: string;
|
|
6
|
+
readonly output: string;
|
|
7
|
+
}, {
|
|
8
|
+
readonly transcriptions: _optique_core.ValueParserResult<string> | undefined;
|
|
9
|
+
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
10
|
+
readonly output: _optique_core.ValueParserResult<string> | undefined;
|
|
11
|
+
}>;
|
|
12
|
+
declare const snapshotCommand: _optique_core.Parser<"sync", {
|
|
13
|
+
readonly action: "snapshot";
|
|
14
|
+
} & {
|
|
15
|
+
readonly transcriptions: string;
|
|
16
|
+
readonly epub: string;
|
|
17
|
+
readonly output: string;
|
|
18
|
+
} & {
|
|
19
|
+
readonly noProgress: boolean;
|
|
20
|
+
readonly logLevel: "silent" | "debug" | "info" | "warn" | "error";
|
|
21
|
+
readonly time: boolean;
|
|
22
|
+
}, ["matched", string] | ["parsing", Record<string | symbol, unknown>] | undefined>;
|
|
23
|
+
|
|
24
|
+
export { snapshotCommand, snapshotParser };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import * as _optique_core from '@optique/core';
|
|
2
|
+
|
|
3
|
+
declare const snapshotParser: _optique_core.Parser<"sync", {
|
|
4
|
+
readonly transcriptions: string;
|
|
5
|
+
readonly epub: string;
|
|
6
|
+
readonly output: string;
|
|
7
|
+
}, {
|
|
8
|
+
readonly transcriptions: _optique_core.ValueParserResult<string> | undefined;
|
|
9
|
+
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
10
|
+
readonly output: _optique_core.ValueParserResult<string> | undefined;
|
|
11
|
+
}>;
|
|
12
|
+
declare const snapshotCommand: _optique_core.Parser<"sync", {
|
|
13
|
+
readonly action: "snapshot";
|
|
14
|
+
} & {
|
|
15
|
+
readonly transcriptions: string;
|
|
16
|
+
readonly epub: string;
|
|
17
|
+
readonly output: string;
|
|
18
|
+
} & {
|
|
19
|
+
readonly noProgress: boolean;
|
|
20
|
+
readonly logLevel: "silent" | "debug" | "info" | "warn" | "error";
|
|
21
|
+
readonly time: boolean;
|
|
22
|
+
}, ["matched", string] | ["parsing", Record<string | symbol, unknown>] | undefined>;
|
|
23
|
+
|
|
24
|
+
export { snapshotCommand, snapshotParser };
|