@storyteller-platform/align 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +81 -15
- package/dist/align/align.d.cts +4 -2
- package/dist/align/align.d.ts +4 -2
- package/dist/align/align.js +82 -16
- package/dist/align/getSentenceRanges.cjs +1 -0
- package/dist/align/getSentenceRanges.d.cts +1 -0
- package/dist/align/getSentenceRanges.d.ts +1 -0
- package/dist/align/getSentenceRanges.js +1 -0
- package/dist/align/parse.cjs +6 -0
- package/dist/align/parse.d.cts +3 -0
- package/dist/align/parse.d.ts +3 -0
- package/dist/align/parse.js +9 -1
- package/dist/align/textFragments.cjs +147 -0
- package/dist/align/textFragments.d.cts +23 -0
- package/dist/align/textFragments.d.ts +23 -0
- package/dist/align/textFragments.js +124 -0
- package/dist/cli/bin.cjs +38 -24
- package/dist/cli/bin.js +35 -21
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/snapshot/parse.cjs +61 -0
- package/dist/snapshot/parse.d.cts +24 -0
- package/dist/snapshot/parse.d.ts +24 -0
- package/dist/snapshot/parse.js +45 -0
- package/dist/snapshot/snapshot.cjs +224 -0
- package/dist/snapshot/snapshot.d.cts +6 -0
- package/dist/snapshot/snapshot.d.ts +6 -0
- package/dist/snapshot/snapshot.js +161 -0
- package/dist/transcribe/parse.cjs +2 -2
- package/dist/transcribe/parse.js +1 -1
- package/dist/transcribe/transcribe.cjs +2 -0
- package/dist/transcribe/transcribe.d.cts +2 -1
- package/dist/transcribe/transcribe.d.ts +2 -1
- package/dist/transcribe/transcribe.js +2 -0
- package/package.json +3 -3
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import {
|
|
3
|
+
argument,
|
|
4
|
+
command,
|
|
5
|
+
constant,
|
|
6
|
+
merge,
|
|
7
|
+
message,
|
|
8
|
+
object,
|
|
9
|
+
option
|
|
10
|
+
} from "@optique/core";
|
|
11
|
+
import { path } from "@optique/run";
|
|
12
|
+
import { loggingParser } from "../common/parse.js";
|
|
13
|
+
const snapshotParser = object("Snapshot", {
|
|
14
|
+
transcriptions: option(
|
|
15
|
+
"--transcriptions",
|
|
16
|
+
path({ mustExist: true, type: "directory" })
|
|
17
|
+
),
|
|
18
|
+
epub: option(
|
|
19
|
+
"--epub",
|
|
20
|
+
path({ mustExist: true, type: "file", extensions: [".epub"] }),
|
|
21
|
+
{
|
|
22
|
+
description: message`Path to an EPUB file to snapshot. This EPUB must have Media Overlays and audio files corresponding to the transcription files passed to --transcriptions.`
|
|
23
|
+
}
|
|
24
|
+
),
|
|
25
|
+
output: argument(path({ type: "file", metavar: "OUTPUT_PATH" }), {
|
|
26
|
+
description: message`Path to save the snapshot.`
|
|
27
|
+
})
|
|
28
|
+
});
|
|
29
|
+
const snapshotCommand = command(
|
|
30
|
+
"snapshot",
|
|
31
|
+
merge(
|
|
32
|
+
object({
|
|
33
|
+
action: constant("snapshot")
|
|
34
|
+
}),
|
|
35
|
+
snapshotParser,
|
|
36
|
+
loggingParser
|
|
37
|
+
),
|
|
38
|
+
{
|
|
39
|
+
description: message`Print a human-readable snapshot of the EPUB’s alignment to a text file.`
|
|
40
|
+
}
|
|
41
|
+
);
|
|
42
|
+
export {
|
|
43
|
+
snapshotCommand,
|
|
44
|
+
snapshotParser
|
|
45
|
+
};
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
|
|
7
|
+
var __typeError = (msg) => {
|
|
8
|
+
throw TypeError(msg);
|
|
9
|
+
};
|
|
10
|
+
var __export = (target, all) => {
|
|
11
|
+
for (var name in all)
|
|
12
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
13
|
+
};
|
|
14
|
+
var __copyProps = (to, from, except, desc) => {
|
|
15
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
16
|
+
for (let key of __getOwnPropNames(from))
|
|
17
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
18
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
19
|
+
}
|
|
20
|
+
return to;
|
|
21
|
+
};
|
|
22
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
23
|
+
var __using = (stack, value, async) => {
|
|
24
|
+
if (value != null) {
|
|
25
|
+
if (typeof value !== "object" && typeof value !== "function") __typeError("Object expected");
|
|
26
|
+
var dispose, inner;
|
|
27
|
+
if (async) dispose = value[__knownSymbol("asyncDispose")];
|
|
28
|
+
if (dispose === void 0) {
|
|
29
|
+
dispose = value[__knownSymbol("dispose")];
|
|
30
|
+
if (async) inner = dispose;
|
|
31
|
+
}
|
|
32
|
+
if (typeof dispose !== "function") __typeError("Object not disposable");
|
|
33
|
+
if (inner) dispose = function() {
|
|
34
|
+
try {
|
|
35
|
+
inner.call(this);
|
|
36
|
+
} catch (e) {
|
|
37
|
+
return Promise.reject(e);
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
stack.push([async, dispose, value]);
|
|
41
|
+
} else if (async) {
|
|
42
|
+
stack.push([async]);
|
|
43
|
+
}
|
|
44
|
+
return value;
|
|
45
|
+
};
|
|
46
|
+
var __callDispose = (stack, error, hasError) => {
|
|
47
|
+
var E = typeof SuppressedError === "function" ? SuppressedError : function(e, s, m, _) {
|
|
48
|
+
return _ = Error(m), _.name = "SuppressedError", _.error = e, _.suppressed = s, _;
|
|
49
|
+
};
|
|
50
|
+
var fail = (e) => error = hasError ? new E(e, error, "An error was suppressed during disposal") : (hasError = true, e);
|
|
51
|
+
var next = (it) => {
|
|
52
|
+
while (it = stack.pop()) {
|
|
53
|
+
try {
|
|
54
|
+
var result = it[1] && it[1].call(it[2]);
|
|
55
|
+
if (it[0]) return Promise.resolve(result).then(next, (e) => (fail(e), next()));
|
|
56
|
+
} catch (e) {
|
|
57
|
+
fail(e);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (hasError) throw error;
|
|
61
|
+
};
|
|
62
|
+
return next();
|
|
63
|
+
};
|
|
64
|
+
var snapshot_exports = {};
|
|
65
|
+
__export(snapshot_exports, {
|
|
66
|
+
createAlignmentSnapshot: () => createAlignmentSnapshot,
|
|
67
|
+
snapshotAlignment: () => snapshotAlignment
|
|
68
|
+
});
|
|
69
|
+
module.exports = __toCommonJS(snapshot_exports);
|
|
70
|
+
var import_promises = require("node:fs/promises");
|
|
71
|
+
var import_node_path = require("node:path");
|
|
72
|
+
var import_posix = require("node:path/posix");
|
|
73
|
+
var import_epub = require("@storyteller-platform/epub");
|
|
74
|
+
var import_segmentation = require("../markup/segmentation.cjs");
|
|
75
|
+
async function snapshotAlignment(epubPath, transcriptionsPath, outputPath) {
|
|
76
|
+
var _stack = [];
|
|
77
|
+
try {
|
|
78
|
+
const transcriptionFilepaths = await (0, import_promises.readdir)(transcriptionsPath).then(
|
|
79
|
+
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => (0, import_node_path.join)(transcriptionsPath, f))
|
|
80
|
+
);
|
|
81
|
+
const epub = __using(_stack, await import_epub.Epub.from(epubPath));
|
|
82
|
+
const snapshot = await createAlignmentSnapshot(
|
|
83
|
+
epub,
|
|
84
|
+
transcriptionFilepaths,
|
|
85
|
+
"id-fragment"
|
|
86
|
+
);
|
|
87
|
+
await (0, import_promises.writeFile)(outputPath, snapshot, { encoding: "utf-8" });
|
|
88
|
+
} catch (_) {
|
|
89
|
+
var _error = _, _hasError = true;
|
|
90
|
+
} finally {
|
|
91
|
+
__callDispose(_stack, _error, _hasError);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
async function createAlignmentSnapshot(epub, transcriptionFilepaths, textRef) {
|
|
95
|
+
let newSnapshot = "";
|
|
96
|
+
const manifest = await epub.getManifest();
|
|
97
|
+
const mediaOverlayItems = Object.values(manifest).map((item) => item.mediaOverlay).filter((mediaOverlayId) => !!mediaOverlayId).map((id) => manifest[id]);
|
|
98
|
+
const mediaOverlays = [];
|
|
99
|
+
for (const item of mediaOverlayItems) {
|
|
100
|
+
const contents = await epub.readItemContents(item.id, "utf-8");
|
|
101
|
+
const parsed = import_epub.Epub.xmlParser.parse(contents);
|
|
102
|
+
mediaOverlays.push(parsed);
|
|
103
|
+
const smil = import_epub.Epub.findXmlChildByName("smil", parsed);
|
|
104
|
+
if (!smil) continue;
|
|
105
|
+
const body = import_epub.Epub.findXmlChildByName("body", import_epub.Epub.getXmlChildren(smil));
|
|
106
|
+
if (!body) continue;
|
|
107
|
+
const seq = import_epub.Epub.findXmlChildByName("seq", import_epub.Epub.getXmlChildren(body));
|
|
108
|
+
if (!seq) continue;
|
|
109
|
+
const textref = seq[":@"]?.["@_epub:textref"];
|
|
110
|
+
if (!textref) continue;
|
|
111
|
+
newSnapshot += `// ${(0, import_posix.basename)(textref)}
|
|
112
|
+
|
|
113
|
+
`;
|
|
114
|
+
const chapterContents = await epub.readFileContents(
|
|
115
|
+
textref,
|
|
116
|
+
item.href,
|
|
117
|
+
"utf-8"
|
|
118
|
+
);
|
|
119
|
+
const chapterXml = import_epub.Epub.xhtmlParser.parse(chapterContents);
|
|
120
|
+
const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
121
|
+
import_epub.Epub.getXhtmlBody(chapterXml),
|
|
122
|
+
{
|
|
123
|
+
primaryLocale: new Intl.Locale("en-US")
|
|
124
|
+
}
|
|
125
|
+
);
|
|
126
|
+
let lastChapterSentence = -1;
|
|
127
|
+
const chapterSentences = segmentation.filter((s) => s.text.match(/\S/));
|
|
128
|
+
for (const par of import_epub.Epub.getXmlChildren(seq)) {
|
|
129
|
+
newSnapshot += `
|
|
130
|
+
`;
|
|
131
|
+
const text = import_epub.Epub.findXmlChildByName("text", import_epub.Epub.getXmlChildren(par));
|
|
132
|
+
if (!text) continue;
|
|
133
|
+
const audio = import_epub.Epub.findXmlChildByName("audio", import_epub.Epub.getXmlChildren(par));
|
|
134
|
+
if (!audio) continue;
|
|
135
|
+
const textSrc = text[":@"]?.["@_src"];
|
|
136
|
+
if (!textSrc) continue;
|
|
137
|
+
const result = textRef === "id-fragment" ? getTextSentenceIndexByIdFragment(textSrc) : getTextSentenceIndexByTextFragment(
|
|
138
|
+
textSrc,
|
|
139
|
+
chapterSentences,
|
|
140
|
+
lastChapterSentence
|
|
141
|
+
);
|
|
142
|
+
if (result === null) continue;
|
|
143
|
+
const { fragment, sentenceId } = result;
|
|
144
|
+
const textSentence = chapterSentences[sentenceId]?.text;
|
|
145
|
+
if (!textSentence) continue;
|
|
146
|
+
lastChapterSentence = sentenceId;
|
|
147
|
+
if (textRef === "text-fragment") {
|
|
148
|
+
newSnapshot += `${fragment}
|
|
149
|
+
`;
|
|
150
|
+
}
|
|
151
|
+
newSnapshot += `Text: ${textSentence.replace(/\n/, "")}
|
|
152
|
+
`;
|
|
153
|
+
const audioSrc = audio[":@"]?.["@_src"];
|
|
154
|
+
if (!audioSrc) continue;
|
|
155
|
+
const audioStart = audio[":@"]?.["@_clipBegin"];
|
|
156
|
+
const audioEnd = audio[":@"]?.["@_clipEnd"];
|
|
157
|
+
if (!audioStart || !audioEnd) continue;
|
|
158
|
+
const audioStartTime = parseFloat(audioStart.slice(0, -1));
|
|
159
|
+
const audioEndTime = parseFloat(audioEnd.slice(0, -1));
|
|
160
|
+
const audioFilename = (0, import_posix.basename)(audioSrc, (0, import_posix.extname)(audioSrc));
|
|
161
|
+
const transcriptionFilepath = transcriptionFilepaths.find(
|
|
162
|
+
(f) => (0, import_node_path.basename)(f, (0, import_node_path.extname)(f)) === audioFilename
|
|
163
|
+
);
|
|
164
|
+
if (!transcriptionFilepath) continue;
|
|
165
|
+
const transcription = JSON.parse(
|
|
166
|
+
await (0, import_promises.readFile)(transcriptionFilepath, { encoding: "utf-8" })
|
|
167
|
+
);
|
|
168
|
+
if ("wordTimeline" in transcription) {
|
|
169
|
+
transcription.timeline = transcription.wordTimeline;
|
|
170
|
+
}
|
|
171
|
+
const transcriptionWords = [];
|
|
172
|
+
let started = false;
|
|
173
|
+
let i = 0;
|
|
174
|
+
let word = transcription.timeline[i];
|
|
175
|
+
while (word && word.endTime <= audioEndTime) {
|
|
176
|
+
if (word.startTime >= audioStartTime) {
|
|
177
|
+
started = true;
|
|
178
|
+
}
|
|
179
|
+
if (started) {
|
|
180
|
+
transcriptionWords.push(word.text);
|
|
181
|
+
}
|
|
182
|
+
word = transcription.timeline[++i];
|
|
183
|
+
}
|
|
184
|
+
const transcriptionSentence = transcriptionWords.join(" ");
|
|
185
|
+
newSnapshot += `Audio: ${transcriptionSentence}
|
|
186
|
+
`;
|
|
187
|
+
}
|
|
188
|
+
newSnapshot += `
|
|
189
|
+
`;
|
|
190
|
+
}
|
|
191
|
+
return newSnapshot;
|
|
192
|
+
}
|
|
193
|
+
function getTextSentenceIndexByTextFragment(textSrc, chapterSentences, lastChapterSentence) {
|
|
194
|
+
const textSrcMatch = textSrc.match(/#:~:text=(.+)$/);
|
|
195
|
+
if (!textSrcMatch) return null;
|
|
196
|
+
const textFragment = textSrcMatch[1];
|
|
197
|
+
if (textFragment === void 0) return null;
|
|
198
|
+
const textFragmentParts = textFragment.split(",");
|
|
199
|
+
const textFragmentPrefix = textFragmentParts[0]?.endsWith("-") ? decodeURIComponent(textFragmentParts[0]).slice(0, -1) : "";
|
|
200
|
+
const textFragmentStart = decodeURIComponent(
|
|
201
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
202
|
+
textFragmentPrefix ? textFragmentParts[1] : textFragmentParts[0]
|
|
203
|
+
);
|
|
204
|
+
const textSentenceIndex = chapterSentences.slice(lastChapterSentence + 1).findIndex((s, i) => {
|
|
205
|
+
const prev = chapterSentences[lastChapterSentence + i];
|
|
206
|
+
return (!prev || prev.text.replace("\n", " ").toLowerCase().endsWith(textFragmentPrefix)) && s.text.replace("\n", " ").toLowerCase().startsWith(textFragmentStart);
|
|
207
|
+
});
|
|
208
|
+
if (textSentenceIndex === -1) return null;
|
|
209
|
+
return {
|
|
210
|
+
fragment: textSrcMatch[0],
|
|
211
|
+
sentenceId: textSentenceIndex + lastChapterSentence + 1
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
function getTextSentenceIndexByIdFragment(textSrc) {
|
|
215
|
+
const match = textSrc.match(/#.*s([0-9]+)$/);
|
|
216
|
+
if (!match) return null;
|
|
217
|
+
const [fragment, sentenceId] = match;
|
|
218
|
+
return { fragment, sentenceId: parseInt(sentenceId, 10) };
|
|
219
|
+
}
|
|
220
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
221
|
+
0 && (module.exports = {
|
|
222
|
+
createAlignmentSnapshot,
|
|
223
|
+
snapshotAlignment
|
|
224
|
+
});
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { Epub } from '@storyteller-platform/epub';
|
|
2
|
+
|
|
3
|
+
declare function snapshotAlignment(epubPath: string, transcriptionsPath: string, outputPath: string): Promise<void>;
|
|
4
|
+
declare function createAlignmentSnapshot(epub: Epub, transcriptionFilepaths: string[], textRef: "id-fragment" | "text-fragment"): Promise<string>;
|
|
5
|
+
|
|
6
|
+
export { createAlignmentSnapshot, snapshotAlignment };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { Epub } from '@storyteller-platform/epub';
|
|
2
|
+
|
|
3
|
+
declare function snapshotAlignment(epubPath: string, transcriptionsPath: string, outputPath: string): Promise<void>;
|
|
4
|
+
declare function createAlignmentSnapshot(epub: Epub, transcriptionFilepaths: string[], textRef: "id-fragment" | "text-fragment"): Promise<string>;
|
|
5
|
+
|
|
6
|
+
export { createAlignmentSnapshot, snapshotAlignment };
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__callDispose,
|
|
3
|
+
__using
|
|
4
|
+
} from "../chunk-BIEQXUOY.js";
|
|
5
|
+
import { readFile, readdir, writeFile } from "node:fs/promises";
|
|
6
|
+
import { basename, extname, join } from "node:path";
|
|
7
|
+
import {
|
|
8
|
+
basename as posixBasename,
|
|
9
|
+
extname as posixExtname
|
|
10
|
+
} from "node:path/posix";
|
|
11
|
+
import { Epub } from "@storyteller-platform/epub";
|
|
12
|
+
import { getXhtmlSegmentation } from "../markup/segmentation.js";
|
|
13
|
+
async function snapshotAlignment(epubPath, transcriptionsPath, outputPath) {
|
|
14
|
+
var _stack = [];
|
|
15
|
+
try {
|
|
16
|
+
const transcriptionFilepaths = await readdir(transcriptionsPath).then(
|
|
17
|
+
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => join(transcriptionsPath, f))
|
|
18
|
+
);
|
|
19
|
+
const epub = __using(_stack, await Epub.from(epubPath));
|
|
20
|
+
const snapshot = await createAlignmentSnapshot(
|
|
21
|
+
epub,
|
|
22
|
+
transcriptionFilepaths,
|
|
23
|
+
"id-fragment"
|
|
24
|
+
);
|
|
25
|
+
await writeFile(outputPath, snapshot, { encoding: "utf-8" });
|
|
26
|
+
} catch (_) {
|
|
27
|
+
var _error = _, _hasError = true;
|
|
28
|
+
} finally {
|
|
29
|
+
__callDispose(_stack, _error, _hasError);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
async function createAlignmentSnapshot(epub, transcriptionFilepaths, textRef) {
|
|
33
|
+
let newSnapshot = "";
|
|
34
|
+
const manifest = await epub.getManifest();
|
|
35
|
+
const mediaOverlayItems = Object.values(manifest).map((item) => item.mediaOverlay).filter((mediaOverlayId) => !!mediaOverlayId).map((id) => manifest[id]);
|
|
36
|
+
const mediaOverlays = [];
|
|
37
|
+
for (const item of mediaOverlayItems) {
|
|
38
|
+
const contents = await epub.readItemContents(item.id, "utf-8");
|
|
39
|
+
const parsed = Epub.xmlParser.parse(contents);
|
|
40
|
+
mediaOverlays.push(parsed);
|
|
41
|
+
const smil = Epub.findXmlChildByName("smil", parsed);
|
|
42
|
+
if (!smil) continue;
|
|
43
|
+
const body = Epub.findXmlChildByName("body", Epub.getXmlChildren(smil));
|
|
44
|
+
if (!body) continue;
|
|
45
|
+
const seq = Epub.findXmlChildByName("seq", Epub.getXmlChildren(body));
|
|
46
|
+
if (!seq) continue;
|
|
47
|
+
const textref = seq[":@"]?.["@_epub:textref"];
|
|
48
|
+
if (!textref) continue;
|
|
49
|
+
newSnapshot += `// ${posixBasename(textref)}
|
|
50
|
+
|
|
51
|
+
`;
|
|
52
|
+
const chapterContents = await epub.readFileContents(
|
|
53
|
+
textref,
|
|
54
|
+
item.href,
|
|
55
|
+
"utf-8"
|
|
56
|
+
);
|
|
57
|
+
const chapterXml = Epub.xhtmlParser.parse(chapterContents);
|
|
58
|
+
const { result: segmentation } = await getXhtmlSegmentation(
|
|
59
|
+
Epub.getXhtmlBody(chapterXml),
|
|
60
|
+
{
|
|
61
|
+
primaryLocale: new Intl.Locale("en-US")
|
|
62
|
+
}
|
|
63
|
+
);
|
|
64
|
+
let lastChapterSentence = -1;
|
|
65
|
+
const chapterSentences = segmentation.filter((s) => s.text.match(/\S/));
|
|
66
|
+
for (const par of Epub.getXmlChildren(seq)) {
|
|
67
|
+
newSnapshot += `
|
|
68
|
+
`;
|
|
69
|
+
const text = Epub.findXmlChildByName("text", Epub.getXmlChildren(par));
|
|
70
|
+
if (!text) continue;
|
|
71
|
+
const audio = Epub.findXmlChildByName("audio", Epub.getXmlChildren(par));
|
|
72
|
+
if (!audio) continue;
|
|
73
|
+
const textSrc = text[":@"]?.["@_src"];
|
|
74
|
+
if (!textSrc) continue;
|
|
75
|
+
const result = textRef === "id-fragment" ? getTextSentenceIndexByIdFragment(textSrc) : getTextSentenceIndexByTextFragment(
|
|
76
|
+
textSrc,
|
|
77
|
+
chapterSentences,
|
|
78
|
+
lastChapterSentence
|
|
79
|
+
);
|
|
80
|
+
if (result === null) continue;
|
|
81
|
+
const { fragment, sentenceId } = result;
|
|
82
|
+
const textSentence = chapterSentences[sentenceId]?.text;
|
|
83
|
+
if (!textSentence) continue;
|
|
84
|
+
lastChapterSentence = sentenceId;
|
|
85
|
+
if (textRef === "text-fragment") {
|
|
86
|
+
newSnapshot += `${fragment}
|
|
87
|
+
`;
|
|
88
|
+
}
|
|
89
|
+
newSnapshot += `Text: ${textSentence.replace(/\n/, "")}
|
|
90
|
+
`;
|
|
91
|
+
const audioSrc = audio[":@"]?.["@_src"];
|
|
92
|
+
if (!audioSrc) continue;
|
|
93
|
+
const audioStart = audio[":@"]?.["@_clipBegin"];
|
|
94
|
+
const audioEnd = audio[":@"]?.["@_clipEnd"];
|
|
95
|
+
if (!audioStart || !audioEnd) continue;
|
|
96
|
+
const audioStartTime = parseFloat(audioStart.slice(0, -1));
|
|
97
|
+
const audioEndTime = parseFloat(audioEnd.slice(0, -1));
|
|
98
|
+
const audioFilename = posixBasename(audioSrc, posixExtname(audioSrc));
|
|
99
|
+
const transcriptionFilepath = transcriptionFilepaths.find(
|
|
100
|
+
(f) => basename(f, extname(f)) === audioFilename
|
|
101
|
+
);
|
|
102
|
+
if (!transcriptionFilepath) continue;
|
|
103
|
+
const transcription = JSON.parse(
|
|
104
|
+
await readFile(transcriptionFilepath, { encoding: "utf-8" })
|
|
105
|
+
);
|
|
106
|
+
if ("wordTimeline" in transcription) {
|
|
107
|
+
transcription.timeline = transcription.wordTimeline;
|
|
108
|
+
}
|
|
109
|
+
const transcriptionWords = [];
|
|
110
|
+
let started = false;
|
|
111
|
+
let i = 0;
|
|
112
|
+
let word = transcription.timeline[i];
|
|
113
|
+
while (word && word.endTime <= audioEndTime) {
|
|
114
|
+
if (word.startTime >= audioStartTime) {
|
|
115
|
+
started = true;
|
|
116
|
+
}
|
|
117
|
+
if (started) {
|
|
118
|
+
transcriptionWords.push(word.text);
|
|
119
|
+
}
|
|
120
|
+
word = transcription.timeline[++i];
|
|
121
|
+
}
|
|
122
|
+
const transcriptionSentence = transcriptionWords.join(" ");
|
|
123
|
+
newSnapshot += `Audio: ${transcriptionSentence}
|
|
124
|
+
`;
|
|
125
|
+
}
|
|
126
|
+
newSnapshot += `
|
|
127
|
+
`;
|
|
128
|
+
}
|
|
129
|
+
return newSnapshot;
|
|
130
|
+
}
|
|
131
|
+
function getTextSentenceIndexByTextFragment(textSrc, chapterSentences, lastChapterSentence) {
|
|
132
|
+
const textSrcMatch = textSrc.match(/#:~:text=(.+)$/);
|
|
133
|
+
if (!textSrcMatch) return null;
|
|
134
|
+
const textFragment = textSrcMatch[1];
|
|
135
|
+
if (textFragment === void 0) return null;
|
|
136
|
+
const textFragmentParts = textFragment.split(",");
|
|
137
|
+
const textFragmentPrefix = textFragmentParts[0]?.endsWith("-") ? decodeURIComponent(textFragmentParts[0]).slice(0, -1) : "";
|
|
138
|
+
const textFragmentStart = decodeURIComponent(
|
|
139
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
140
|
+
textFragmentPrefix ? textFragmentParts[1] : textFragmentParts[0]
|
|
141
|
+
);
|
|
142
|
+
const textSentenceIndex = chapterSentences.slice(lastChapterSentence + 1).findIndex((s, i) => {
|
|
143
|
+
const prev = chapterSentences[lastChapterSentence + i];
|
|
144
|
+
return (!prev || prev.text.replace("\n", " ").toLowerCase().endsWith(textFragmentPrefix)) && s.text.replace("\n", " ").toLowerCase().startsWith(textFragmentStart);
|
|
145
|
+
});
|
|
146
|
+
if (textSentenceIndex === -1) return null;
|
|
147
|
+
return {
|
|
148
|
+
fragment: textSrcMatch[0],
|
|
149
|
+
sentenceId: textSentenceIndex + lastChapterSentence + 1
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
function getTextSentenceIndexByIdFragment(textSrc) {
|
|
153
|
+
const match = textSrc.match(/#.*s([0-9]+)$/);
|
|
154
|
+
if (!match) return null;
|
|
155
|
+
const [fragment, sentenceId] = match;
|
|
156
|
+
return { fragment, sentenceId: parseInt(sentenceId, 10) };
|
|
157
|
+
}
|
|
158
|
+
export {
|
|
159
|
+
createAlignmentSnapshot,
|
|
160
|
+
snapshotAlignment
|
|
161
|
+
};
|
|
@@ -25,7 +25,7 @@ module.exports = __toCommonJS(parse_exports);
|
|
|
25
25
|
var import_core = require("@optique/core");
|
|
26
26
|
var import_valueparser = require("@optique/core/valueparser");
|
|
27
27
|
var import_valueparser2 = require("@optique/run/valueparser");
|
|
28
|
-
var
|
|
28
|
+
var import_constants = require("@storyteller-platform/ghost-story/constants");
|
|
29
29
|
var import_parse = require("../common/parse.cjs");
|
|
30
30
|
const transcribeParser = (0, import_core.or)(
|
|
31
31
|
(0, import_core.object)("whisper.cpp", {
|
|
@@ -35,7 +35,7 @@ const transcribeParser = (0, import_core.or)(
|
|
|
35
35
|
(0, import_core.choice)(["whisper.cpp"], { metavar: "whisper.cpp" })
|
|
36
36
|
),
|
|
37
37
|
model: (0, import_core.withDefault)(
|
|
38
|
-
(0, import_core.option)("--model", "-m", (0, import_core.choice)(
|
|
38
|
+
(0, import_core.option)("--model", "-m", (0, import_core.choice)(import_constants.WHISPER_MODELS, { metavar: "MODEL" }), {
|
|
39
39
|
description: import_core.message`The whisper model to use`
|
|
40
40
|
}),
|
|
41
41
|
"tiny.en"
|
package/dist/transcribe/parse.js
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
} from "@optique/core";
|
|
17
17
|
import { string, url } from "@optique/core/valueparser";
|
|
18
18
|
import { path } from "@optique/run/valueparser";
|
|
19
|
-
import { WHISPER_MODELS } from "@storyteller-platform/ghost-story";
|
|
19
|
+
import { WHISPER_MODELS } from "@storyteller-platform/ghost-story/constants";
|
|
20
20
|
import {
|
|
21
21
|
languageParser,
|
|
22
22
|
loggingParser,
|
|
@@ -140,6 +140,8 @@ async function transcribe(input, output, locale, options) {
|
|
|
140
140
|
});
|
|
141
141
|
options.logger?.info(`Found existing transcription for ${filepath}`);
|
|
142
142
|
transcriptions.push(transcriptionFilepath);
|
|
143
|
+
options.onProgress?.((transcriptions.length + 1) / filenames.length);
|
|
144
|
+
return;
|
|
143
145
|
} catch {
|
|
144
146
|
}
|
|
145
147
|
if (aborted()) throw new Error("Aborted");
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
|
|
2
|
-
import {
|
|
2
|
+
import { TimingAggregator } from '@storyteller-platform/ghost-story';
|
|
3
3
|
import { Logger } from 'pino';
|
|
4
|
+
import { RecognitionEngine, WhisperModel } from '@storyteller-platform/ghost-story/constants';
|
|
4
5
|
|
|
5
6
|
type WhisperCpuOverride = "blas" | "cpu" | null;
|
|
6
7
|
interface TranscribeOptions {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
|
|
2
|
-
import {
|
|
2
|
+
import { TimingAggregator } from '@storyteller-platform/ghost-story';
|
|
3
3
|
import { Logger } from 'pino';
|
|
4
|
+
import { RecognitionEngine, WhisperModel } from '@storyteller-platform/ghost-story/constants';
|
|
4
5
|
|
|
5
6
|
type WhisperCpuOverride = "blas" | "cpu" | null;
|
|
6
7
|
interface TranscribeOptions {
|
|
@@ -71,6 +71,8 @@ async function transcribe(input, output, locale, options) {
|
|
|
71
71
|
});
|
|
72
72
|
options.logger?.info(`Found existing transcription for ${filepath}`);
|
|
73
73
|
transcriptions.push(transcriptionFilepath);
|
|
74
|
+
options.onProgress?.((transcriptions.length + 1) / filenames.length);
|
|
75
|
+
return;
|
|
74
76
|
} catch {
|
|
75
77
|
}
|
|
76
78
|
if (aborted()) throw new Error("Aborted");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@storyteller-platform/align",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.22",
|
|
4
4
|
"description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
|
|
5
5
|
"author": "Shane Friedman",
|
|
6
6
|
"license": "MIT",
|
|
@@ -60,8 +60,8 @@
|
|
|
60
60
|
"@optique/core": "^0.10.7",
|
|
61
61
|
"@optique/run": "^0.10.7",
|
|
62
62
|
"@storyteller-platform/audiobook": "^0.3.10",
|
|
63
|
-
"@storyteller-platform/epub": "^0.4.
|
|
64
|
-
"@storyteller-platform/ghost-story": "^0.1.
|
|
63
|
+
"@storyteller-platform/epub": "^0.4.9",
|
|
64
|
+
"@storyteller-platform/ghost-story": "^0.1.8",
|
|
65
65
|
"@storyteller-platform/transliteration": "^3.1.2",
|
|
66
66
|
"chalk": "^5.4.1",
|
|
67
67
|
"cli-progress": "^3.12.0",
|