@storyteller-platform/align 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +26 -0
- package/dist/align/align.cjs +6 -7
- package/dist/align/align.js +6 -7
- package/dist/cli/bin.cjs +1 -1
- package/dist/cli/bin.js +2 -2
- package/dist/common/ffmpeg.cjs +12 -12
- package/dist/common/ffmpeg.js +12 -12
- package/dist/errorAlign/backtraceGraph.cjs +5 -8
- package/dist/errorAlign/backtraceGraph.js +5 -8
- package/dist/errorAlign/beamSearch.cjs +1 -2
- package/dist/errorAlign/beamSearch.js +1 -2
- package/dist/markup/markup.cjs +3 -4
- package/dist/markup/markup.js +3 -4
- package/dist/markup/serializeDom.cjs +1 -1
- package/dist/markup/serializeDom.js +1 -1
- package/dist/process/processAudiobook.cjs +8 -12
- package/dist/process/processAudiobook.js +8 -12
- package/dist/process/ranges.cjs +3 -3
- package/dist/process/ranges.js +3 -3
- package/dist/transcribe/transcribe.cjs +9 -14
- package/dist/transcribe/transcribe.js +9 -14
- package/package.json +3 -1
- package/prebuilds/darwin-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-x64/@storyteller-platform+align.node +0 -0
- package/dist/align/__tests__/align.test.cjs +0 -283
- package/dist/align/__tests__/align.test.d.cts +0 -2
- package/dist/align/__tests__/align.test.d.ts +0 -2
- package/dist/align/__tests__/align.test.js +0 -219
- package/dist/align/__tests__/slugify.test.cjs +0 -64
- package/dist/align/__tests__/slugify.test.d.cts +0 -2
- package/dist/align/__tests__/slugify.test.d.ts +0 -2
- package/dist/align/__tests__/slugify.test.js +0 -41
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +0 -100
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.js +0 -77
- package/dist/errorAlign/__tests__/native.test.cjs +0 -118
- package/dist/errorAlign/__tests__/native.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/native.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/native.test.js +0 -107
- package/dist/markup/__tests__/markup.test.cjs +0 -491
- package/dist/markup/__tests__/markup.test.d.cts +0 -2
- package/dist/markup/__tests__/markup.test.d.ts +0 -2
- package/dist/markup/__tests__/markup.test.js +0 -468
- package/dist/markup/__tests__/parseDom.test.cjs +0 -112
- package/dist/markup/__tests__/parseDom.test.d.cts +0 -2
- package/dist/markup/__tests__/parseDom.test.d.ts +0 -2
- package/dist/markup/__tests__/parseDom.test.js +0 -89
- package/dist/markup/__tests__/serializeDom.test.cjs +0 -120
- package/dist/markup/__tests__/serializeDom.test.d.cts +0 -2
- package/dist/markup/__tests__/serializeDom.test.d.ts +0 -2
- package/dist/markup/__tests__/serializeDom.test.js +0 -97
- package/dist/markup/__tests__/transform.test.cjs +0 -122
- package/dist/markup/__tests__/transform.test.d.cts +0 -2
- package/dist/markup/__tests__/transform.test.d.ts +0 -2
- package/dist/markup/__tests__/transform.test.js +0 -99
- package/dist/process/__tests__/processAudiobook.test.cjs +0 -232
- package/dist/process/__tests__/processAudiobook.test.d.cts +0 -2
- package/dist/process/__tests__/processAudiobook.test.d.ts +0 -2
- package/dist/process/__tests__/processAudiobook.test.js +0 -209
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
import assert from "node:assert";
|
|
2
|
-
import { describe, test } from "node:test";
|
|
3
|
-
import { errorAlignBeamSearch as tsBeamSearch } from "../beamSearch.js";
|
|
4
|
-
import {
|
|
5
|
-
computeErrorAlignDistanceMatrix as tsErrorAlign,
|
|
6
|
-
computeLevenshteinDistanceMatrix as tsLevenshtein
|
|
7
|
-
} from "../editDistance.js";
|
|
8
|
-
import { SubgraphMetadata } from "../graphMetadata.js";
|
|
9
|
-
import {
|
|
10
|
-
computeErrorAlignDistanceMatrix as nativeErrorAlign,
|
|
11
|
-
computeLevenshteinDistanceMatrix as nativeLevenshtein,
|
|
12
|
-
errorAlignBeamSearch as nativeBeamSearch
|
|
13
|
-
} from "../native.js";
|
|
14
|
-
import { getAlignments } from "../pathToAlignment.js";
|
|
15
|
-
import {
|
|
16
|
-
basicNormalizer,
|
|
17
|
-
basicTokenizer,
|
|
18
|
-
ensureLengthPreservation,
|
|
19
|
-
unpackRegexMatch
|
|
20
|
-
} from "../utils.js";
|
|
21
|
-
void describe("native C++ vs TypeScript implementations", () => {
|
|
22
|
-
void describe("Levenshtein distance matrix", () => {
|
|
23
|
-
void test("string input", () => {
|
|
24
|
-
const ref = "kitten";
|
|
25
|
-
const hyp = "sitting";
|
|
26
|
-
const tsResult = tsLevenshtein(ref, hyp);
|
|
27
|
-
const nativeResult = nativeLevenshtein(ref, hyp);
|
|
28
|
-
assert.deepStrictEqual(nativeResult, tsResult);
|
|
29
|
-
});
|
|
30
|
-
void test("string array input", () => {
|
|
31
|
-
const ref = ["hello", "world"];
|
|
32
|
-
const hyp = ["hello", "there"];
|
|
33
|
-
const tsResult = tsLevenshtein(ref, hyp);
|
|
34
|
-
const nativeResult = nativeLevenshtein(ref, hyp);
|
|
35
|
-
assert.deepStrictEqual(nativeResult, tsResult);
|
|
36
|
-
});
|
|
37
|
-
void test("with backtrace", () => {
|
|
38
|
-
const ref = "kitten";
|
|
39
|
-
const hyp = "sitting";
|
|
40
|
-
const tsResult = tsLevenshtein(ref, hyp, true);
|
|
41
|
-
const nativeResult = nativeLevenshtein(ref, hyp, true);
|
|
42
|
-
assert.deepStrictEqual(nativeResult, tsResult);
|
|
43
|
-
});
|
|
44
|
-
});
|
|
45
|
-
void describe("error align distance matrix", () => {
|
|
46
|
-
void test("string input", () => {
|
|
47
|
-
const ref = "test";
|
|
48
|
-
const hyp = "best";
|
|
49
|
-
const tsResult = tsErrorAlign(ref, hyp);
|
|
50
|
-
const nativeResult = nativeErrorAlign(ref, hyp);
|
|
51
|
-
assert.deepStrictEqual(nativeResult, tsResult);
|
|
52
|
-
});
|
|
53
|
-
void test("with backtrace", () => {
|
|
54
|
-
const ref = "test";
|
|
55
|
-
const hyp = "best";
|
|
56
|
-
const tsResult = tsErrorAlign(ref, hyp, true);
|
|
57
|
-
const nativeResult = nativeErrorAlign(ref, hyp, true);
|
|
58
|
-
assert.deepStrictEqual(nativeResult, tsResult);
|
|
59
|
-
});
|
|
60
|
-
});
|
|
61
|
-
void describe("beam search", () => {
|
|
62
|
-
function buildSubgraphMetadata(ref, hyp) {
|
|
63
|
-
const tokenizer = basicTokenizer;
|
|
64
|
-
const normalizer = basicNormalizer;
|
|
65
|
-
const unpackedTokenizer = unpackRegexMatch(tokenizer);
|
|
66
|
-
const ensuredNormalizer = ensureLengthPreservation(normalizer);
|
|
67
|
-
const refTokenMatches = unpackedTokenizer(ref);
|
|
68
|
-
const hypTokenMatches = unpackedTokenizer(hyp);
|
|
69
|
-
const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
|
|
70
|
-
const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
|
|
71
|
-
return new SubgraphMetadata(
|
|
72
|
-
ref,
|
|
73
|
-
hyp,
|
|
74
|
-
refTokenMatches,
|
|
75
|
-
hypTokenMatches,
|
|
76
|
-
refNorm,
|
|
77
|
-
hypNorm
|
|
78
|
-
);
|
|
79
|
-
}
|
|
80
|
-
void test("simple substitution", () => {
|
|
81
|
-
const src = buildSubgraphMetadata("hello", "jello");
|
|
82
|
-
const tsPath = tsBeamSearch(src);
|
|
83
|
-
const nativePath = nativeBeamSearch(src);
|
|
84
|
-
const tsAlignments = getAlignments(tsPath);
|
|
85
|
-
const nativeAlignments = getAlignments(nativePath);
|
|
86
|
-
assert.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
87
|
-
});
|
|
88
|
-
void test("multi-word alignment with all op types", () => {
|
|
89
|
-
const ref = "This is a substitution test deleted.";
|
|
90
|
-
const hyp = "Inserted this is a contribution test.";
|
|
91
|
-
const src = buildSubgraphMetadata(ref, hyp);
|
|
92
|
-
const tsPath = tsBeamSearch(src);
|
|
93
|
-
const nativePath = nativeBeamSearch(src);
|
|
94
|
-
const tsAlignments = getAlignments(tsPath);
|
|
95
|
-
const nativeAlignments = getAlignments(nativePath);
|
|
96
|
-
assert.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
97
|
-
});
|
|
98
|
-
void test("identical strings", () => {
|
|
99
|
-
const src = buildSubgraphMetadata("test words", "test words");
|
|
100
|
-
const tsPath = tsBeamSearch(src);
|
|
101
|
-
const nativePath = nativeBeamSearch(src);
|
|
102
|
-
const tsAlignments = getAlignments(tsPath);
|
|
103
|
-
const nativeAlignments = getAlignments(nativePath);
|
|
104
|
-
assert.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
105
|
-
});
|
|
106
|
-
});
|
|
107
|
-
});
|
|
@@ -1,491 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __copyProps = (to, from, except, desc) => {
|
|
9
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
-
for (let key of __getOwnPropNames(from))
|
|
11
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
-
}
|
|
14
|
-
return to;
|
|
15
|
-
};
|
|
16
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
-
mod
|
|
23
|
-
));
|
|
24
|
-
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
-
var import_promises = require("node:fs/promises");
|
|
26
|
-
var import_node_path = require("node:path");
|
|
27
|
-
var import_node_test = require("node:test");
|
|
28
|
-
var import_epub = require("@storyteller-platform/epub");
|
|
29
|
-
var import_markup = require("../markup.cjs");
|
|
30
|
-
var import_segmentation = require("../segmentation.cjs");
|
|
31
|
-
function sanitizeFilename(title) {
|
|
32
|
-
return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
|
|
33
|
-
}
|
|
34
|
-
function truncate(input, byteLimit, suffix = "") {
|
|
35
|
-
const normalized = input.normalize("NFC");
|
|
36
|
-
const encoder = new TextEncoder();
|
|
37
|
-
let result = "";
|
|
38
|
-
for (const char of normalized) {
|
|
39
|
-
const withSuffix = result + char + suffix;
|
|
40
|
-
const byteLength = encoder.encode(withSuffix).length;
|
|
41
|
-
if (byteLength > byteLimit) break;
|
|
42
|
-
result += char;
|
|
43
|
-
}
|
|
44
|
-
return result + suffix;
|
|
45
|
-
}
|
|
46
|
-
function getSafeFilepathSegment(name, suffix = "") {
|
|
47
|
-
return truncate(sanitizeFilename(name), 150, suffix);
|
|
48
|
-
}
|
|
49
|
-
async function assertMarkupSnapshot(context, output) {
|
|
50
|
-
const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
|
|
51
|
-
const snapshotFilepath = (0, import_node_path.join)(
|
|
52
|
-
"src",
|
|
53
|
-
"markup",
|
|
54
|
-
"__snapshots__",
|
|
55
|
-
snapshotFilename
|
|
56
|
-
);
|
|
57
|
-
if (process.env["UPDATE_SNAPSHOTS"]) {
|
|
58
|
-
await (0, import_promises.mkdir)((0, import_node_path.dirname)(snapshotFilepath), { recursive: true });
|
|
59
|
-
await (0, import_promises.writeFile)(snapshotFilepath, output, { encoding: "utf-8" });
|
|
60
|
-
return;
|
|
61
|
-
}
|
|
62
|
-
try {
|
|
63
|
-
const existingSnapshot = await (0, import_promises.readFile)(snapshotFilepath, {
|
|
64
|
-
encoding: "utf-8"
|
|
65
|
-
});
|
|
66
|
-
const existingLines = existingSnapshot.split("\n");
|
|
67
|
-
const newLines = output.split("\n");
|
|
68
|
-
for (let i = 0; i < existingLines.length; i++) {
|
|
69
|
-
const existingLine = existingLines[i];
|
|
70
|
-
const newLine = newLines[i];
|
|
71
|
-
if (existingLine !== newLine) {
|
|
72
|
-
import_node_assert.default.strictEqual(
|
|
73
|
-
newLines.slice(Math.max(0, i - 5), i + 5),
|
|
74
|
-
existingLines.slice(Math.max(0, i - 5), i + 5)
|
|
75
|
-
);
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
} catch (e) {
|
|
79
|
-
if (e instanceof import_node_assert.default.AssertionError) {
|
|
80
|
-
throw e;
|
|
81
|
-
}
|
|
82
|
-
throw new import_node_assert.default.AssertionError({
|
|
83
|
-
actual: output,
|
|
84
|
-
expected: "",
|
|
85
|
-
diff: "simple"
|
|
86
|
-
});
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
void (0, import_node_test.describe)("markupChapter", () => {
|
|
90
|
-
void (0, import_node_test.it)("can tag sentences", async (t) => {
|
|
91
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
92
|
-
/* xml */
|
|
93
|
-
`
|
|
94
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
95
|
-
|
|
96
|
-
<html>
|
|
97
|
-
<head>
|
|
98
|
-
<meta charset="utf-8" />
|
|
99
|
-
<title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
|
|
100
|
-
</head>
|
|
101
|
-
<body>
|
|
102
|
-
<p>
|
|
103
|
-
Call me Ishmael. Some years ago\u2014never mind how long precisely\u2014having
|
|
104
|
-
little or no money in my purse, and nothing particular to interest me on
|
|
105
|
-
shore, I thought I would sail about a little and see the watery part of
|
|
106
|
-
the world. It is a way I have of driving off the spleen and regulating the
|
|
107
|
-
circulation. Whenever I find myself growing grim about the mouth; whenever
|
|
108
|
-
it is a damp, drizzly November in my soul; whenever I find myself
|
|
109
|
-
involuntarily pausing before coffin warehouses, and bringing up the rear
|
|
110
|
-
of every funeral I meet; and especially whenever my hypos get such an
|
|
111
|
-
upper hand of me, that it requires a strong moral principle to prevent me
|
|
112
|
-
from deliberately stepping into the street, and methodically knocking
|
|
113
|
-
people\u2019s hats off\u2014then, I account it high time to get to sea as soon
|
|
114
|
-
as I can.
|
|
115
|
-
</p>
|
|
116
|
-
<p>
|
|
117
|
-
This is my substitute for pistol and ball. With a philosophical
|
|
118
|
-
flourish Cato throws himself upon his sword; I quietly take to the ship.
|
|
119
|
-
There is nothing surprising in this. If they but knew it, almost all men
|
|
120
|
-
in their degree, some time or other, cherish very nearly the same feelings
|
|
121
|
-
towards the ocean with me.
|
|
122
|
-
</p>
|
|
123
|
-
</body>
|
|
124
|
-
</html>
|
|
125
|
-
`
|
|
126
|
-
);
|
|
127
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
128
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
129
|
-
{}
|
|
130
|
-
);
|
|
131
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
132
|
-
"chapter_one",
|
|
133
|
-
input,
|
|
134
|
-
segmentation,
|
|
135
|
-
mapping
|
|
136
|
-
);
|
|
137
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
138
|
-
});
|
|
139
|
-
void (0, import_node_test.it)("can tag sentences with formatting marks", async (t) => {
|
|
140
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
141
|
-
/* xml */
|
|
142
|
-
`
|
|
143
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
144
|
-
|
|
145
|
-
<html>
|
|
146
|
-
<head>
|
|
147
|
-
<meta charset="utf-8" />
|
|
148
|
-
<title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
|
|
149
|
-
</head>
|
|
150
|
-
<body>
|
|
151
|
-
<p>
|
|
152
|
-
Call me <strong>Ishmael</strong>. Some years ago\u2014never mind how long precisely\u2014having
|
|
153
|
-
little or no money in my purse, and nothing particular to interest me on
|
|
154
|
-
shore, I thought I would sail about a little and see the watery part of
|
|
155
|
-
the world.
|
|
156
|
-
</p>
|
|
157
|
-
</body>
|
|
158
|
-
</html>
|
|
159
|
-
`
|
|
160
|
-
);
|
|
161
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
162
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
163
|
-
{}
|
|
164
|
-
);
|
|
165
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
166
|
-
"chapter_one",
|
|
167
|
-
input,
|
|
168
|
-
segmentation,
|
|
169
|
-
mapping
|
|
170
|
-
);
|
|
171
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
172
|
-
});
|
|
173
|
-
void (0, import_node_test.it)("can tag sentences with formatting marks that overlap sentence boundaries", async (t) => {
|
|
174
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
175
|
-
/* xml */
|
|
176
|
-
`
|
|
177
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
178
|
-
|
|
179
|
-
<html>
|
|
180
|
-
<head>
|
|
181
|
-
<meta charset="utf-8" />
|
|
182
|
-
<title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
|
|
183
|
-
</head>
|
|
184
|
-
<body>
|
|
185
|
-
<p>
|
|
186
|
-
Call me <strong>Ishmael. Some years ago</strong>\u2014never mind how long precisely\u2014having
|
|
187
|
-
little or no money in my purse, and nothing particular to interest me on
|
|
188
|
-
shore, I thought I would sail about a little and see the watery part of
|
|
189
|
-
the world.
|
|
190
|
-
</p>
|
|
191
|
-
</body>
|
|
192
|
-
</html>
|
|
193
|
-
`
|
|
194
|
-
);
|
|
195
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
196
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
197
|
-
{}
|
|
198
|
-
);
|
|
199
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
200
|
-
"chapter_one",
|
|
201
|
-
input,
|
|
202
|
-
segmentation,
|
|
203
|
-
mapping
|
|
204
|
-
);
|
|
205
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
206
|
-
});
|
|
207
|
-
void (0, import_node_test.it)("can tag sentences with nested formatting marks", async (t) => {
|
|
208
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
209
|
-
/* xml */
|
|
210
|
-
`
|
|
211
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
212
|
-
|
|
213
|
-
<html>
|
|
214
|
-
<head>
|
|
215
|
-
<meta charset="utf-8" />
|
|
216
|
-
<title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
|
|
217
|
-
</head>
|
|
218
|
-
<body>
|
|
219
|
-
<p>
|
|
220
|
-
<em>Call me <strong>Ishmael</strong>.</em> Some years ago\u2014never mind how long precisely\u2014having
|
|
221
|
-
little or no money in my purse, and nothing particular to interest me on
|
|
222
|
-
shore, I thought I would sail about a little and see the watery part of
|
|
223
|
-
the world.
|
|
224
|
-
</p>
|
|
225
|
-
</body>
|
|
226
|
-
</html>
|
|
227
|
-
`
|
|
228
|
-
);
|
|
229
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
230
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
231
|
-
{}
|
|
232
|
-
);
|
|
233
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
234
|
-
"chapter_one",
|
|
235
|
-
input,
|
|
236
|
-
segmentation,
|
|
237
|
-
mapping
|
|
238
|
-
);
|
|
239
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
240
|
-
});
|
|
241
|
-
void (0, import_node_test.it)("can tag sentences with atoms", async (t) => {
|
|
242
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
243
|
-
/* xml */
|
|
244
|
-
`
|
|
245
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
246
|
-
|
|
247
|
-
<html>
|
|
248
|
-
<head>
|
|
249
|
-
<meta charset="utf-8" />
|
|
250
|
-
<title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
|
|
251
|
-
</head>
|
|
252
|
-
<body>
|
|
253
|
-
<p>
|
|
254
|
-
Call me Ishmael. Some<img src="#"/> years ago\u2014never mind how long precisely\u2014having
|
|
255
|
-
little or no money in my purse, and nothing particular to interest me on
|
|
256
|
-
shore, I thought I would sail about a little and see the watery part of
|
|
257
|
-
the world.
|
|
258
|
-
</p>
|
|
259
|
-
</body>
|
|
260
|
-
</html>
|
|
261
|
-
`
|
|
262
|
-
);
|
|
263
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
264
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
265
|
-
{}
|
|
266
|
-
);
|
|
267
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
268
|
-
"chapter_one",
|
|
269
|
-
input,
|
|
270
|
-
segmentation,
|
|
271
|
-
mapping
|
|
272
|
-
);
|
|
273
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
274
|
-
});
|
|
275
|
-
void (0, import_node_test.it)("can tag sentences in nested textblocks", async (t) => {
|
|
276
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
277
|
-
/* xml */
|
|
278
|
-
`
|
|
279
|
-
<?xml version='1.0' encoding='utf-8'?>
|
|
280
|
-
<!DOCTYPE html>
|
|
281
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"
|
|
282
|
-
epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#" lang="en" xml:lang="en">
|
|
283
|
-
|
|
284
|
-
<head>
|
|
285
|
-
<link href="../styles/9781534431010.css" rel="stylesheet" type="text/css" />
|
|
286
|
-
<link href="../styles/SS_global.css" rel="stylesheet" type="text/css" />
|
|
287
|
-
<link rel="stylesheet" href="../../Styles/storyteller-readaloud.css" type="text/css" />
|
|
288
|
-
</head>
|
|
289
|
-
|
|
290
|
-
<body>
|
|
291
|
-
<blockquote class="blockquotelet">
|
|
292
|
-
<p class="blockno"><span aria-label="page 7" id="page_7" role="doc-pagebreak" /></p>
|
|
293
|
-
<p class="blockno">Look on my works, ye mighty, and despair!</p>
|
|
294
|
-
<p class="blockno1">A little joke.</p>
|
|
295
|
-
<p class="blockno1"> </p>
|
|
296
|
-
<p class="blockno1">Trust that I have accounted for all variables of irony.</p>
|
|
297
|
-
<p class="blockno1"> </p>
|
|
298
|
-
<p class="blockno1">Though I suppose if you\u2019re unfamiliar with overanthologized works of the early Strand 6
|
|
299
|
-
nineteenth century, the joke\u2019s on me.</p>
|
|
300
|
-
<p class="blockin">I hoped you\u2019d come.</p>
|
|
301
|
-
</blockquote>
|
|
302
|
-
</body>
|
|
303
|
-
|
|
304
|
-
</html>
|
|
305
|
-
`
|
|
306
|
-
);
|
|
307
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
308
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
309
|
-
{}
|
|
310
|
-
);
|
|
311
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
312
|
-
"chapter_one",
|
|
313
|
-
input,
|
|
314
|
-
segmentation,
|
|
315
|
-
mapping
|
|
316
|
-
);
|
|
317
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
318
|
-
});
|
|
319
|
-
void (0, import_node_test.it)("can tag sentences that cross textblock boundaries", async (t) => {
|
|
320
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
321
|
-
/* xml */
|
|
322
|
-
`
|
|
323
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
324
|
-
|
|
325
|
-
<html>
|
|
326
|
-
<head>
|
|
327
|
-
<meta charset="utf-8" />
|
|
328
|
-
<title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
|
|
329
|
-
</head>
|
|
330
|
-
<body>
|
|
331
|
-
<p>
|
|
332
|
-
Call me Ishmael. Some years ago\u2014never mind how long precisely\u2014having
|
|
333
|
-
little or no money in my purse, and nothing particular to interest me on
|
|
334
|
-
shore,
|
|
335
|
-
</p>
|
|
336
|
-
<p>
|
|
337
|
-
I thought I would sail about a little and see the watery part of
|
|
338
|
-
the world.
|
|
339
|
-
</p>
|
|
340
|
-
</body>
|
|
341
|
-
</html>
|
|
342
|
-
`
|
|
343
|
-
);
|
|
344
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
345
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
346
|
-
{}
|
|
347
|
-
);
|
|
348
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
349
|
-
"chapter_one",
|
|
350
|
-
input,
|
|
351
|
-
segmentation,
|
|
352
|
-
mapping
|
|
353
|
-
);
|
|
354
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
355
|
-
});
|
|
356
|
-
void import_node_test.it.only("can handle soft page breaks", async (t) => {
|
|
357
|
-
const input = import_epub.Epub.xhtmlParser.parse(
|
|
358
|
-
/* xml */
|
|
359
|
-
`
|
|
360
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en-US" xml:lang="en-US">
|
|
361
|
-
<head>
|
|
362
|
-
<title>Chapter 1, Black Powder War</title>
|
|
363
|
-
<meta charset="utf-8"/>
|
|
364
|
-
<link href="../css/prh_resets.css" rel="stylesheet" type="text/css"/>
|
|
365
|
-
<link href="../css/rh_static.css" rel="stylesheet" type="text/css"/>
|
|
366
|
-
<link href="../css/9780345493439_style.css" rel="stylesheet" type="text/css"/>
|
|
367
|
-
<meta content="urn:uuid:52698e83-e600-48be-b763-c64bde1e3e0c" name="Adept.expected.resource"/>
|
|
368
|
-
</head>
|
|
369
|
-
<body>
|
|
370
|
-
<a id="d1-d2s6d3s2"/>
|
|
371
|
-
<div class="page_top_padding">
|
|
372
|
-
<span epub:type="pagebreak" id="page_9" role="doc-pagebreak" title="9"/>
|
|
373
|
-
<h1 class="para-cn-chap-pg trajan-pro-3">CHAPTER 1</h1>
|
|
374
|
-
<div class="para-orn">
|
|
375
|
-
<span class="figure figure_dingbat">
|
|
376
|
-
<img alt="" class="height_1em" role="presentation" src="../images/Novi_9780345493439_epub3_001_r1.jpg"/></span></div>
|
|
377
|
-
<p class="para-pf dropcaps3line char-dropcap-DC trajan-pro-3-dc" style="text-indent:0;">The hot wind blowing into Macao was sluggish and unrefreshing, only stirring up the rotting salt smell of the harbor, the fish-corpses and great knots of black-red seaweed, the effluvia of human and dragon wastes. Even so the sailors were sitting crowded along the rails of the <i class="char-i">Allegiance</i> for a breath of the moving air, leaning against one another to get a little room. A little scuffling broke out amongst them from time to time, a dull exchange of shoving back and forth, but these quarrels died almost at once in the punishing heat.</p>
|
|
378
|
-
<p class="para-p">Temeraire lay disconsolately upon the dragondeck, gazing towards the white haze of the open ocean, the aviators on duty lying half-asleep in his great shadow. Laurence himself had sacrificed dignity so far as to take off his coat, as he was sitting in the crook of Temeraire\u2019s foreleg and so concealed from view.</p>
|
|
379
|
-
<p class="para-p">\u201CI am sure I could pull the ship out of the harbor,\u201D Temeraire said, not for the first time in the past week; and sighed when this amiable plan was again refused: in a calm he might indeed have been able to tow even the enormous dragon transport, but against a direct headwind he could only exhaust himself to no purpose.</p>
|
|
380
|
-
<span epub:type="pagebreak" id="page_10" role="doc-pagebreak" title="10"/>
|
|
381
|
-
<p class="para-p">\u201CEven in a calm you could scarcely pull her any great distance,\u201D Laurence added consolingly. \u201CA few miles may be of some use out in the open ocean, but at present we may as well stay in harbor, and be a little more comfortable; we would make very little speed even if we could get her out.\u201D</p>
|
|
382
|
-
<p class="para-p">\u201CIt seems a great pity to me that we must always be waiting on the wind, when everything else is ready and we are also,\u201D Temeraire said. \u201CI would so like to be home <i class="char-i">soon:</i> there is so very much to be done.\u201D His tail thumped hollowly upon the boards, for emphasis.</p>
|
|
383
|
-
<p class="para-p">\u201CI beg you will not raise your hopes too high,\u201D Laurence said, himself a little hopelessly: urging Temeraire to restraint had so far not produced any effect, and he did not expect a different event now. \u201CYou must be prepared to endure some delays; at home as much as here.\u201D</p>
|
|
384
|
-
</div>
|
|
385
|
-
</body>
|
|
386
|
-
</html>`
|
|
387
|
-
);
|
|
388
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
389
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
390
|
-
{}
|
|
391
|
-
);
|
|
392
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
393
|
-
"chapter_one",
|
|
394
|
-
input,
|
|
395
|
-
segmentation,
|
|
396
|
-
mapping
|
|
397
|
-
);
|
|
398
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
399
|
-
});
|
|
400
|
-
void (0, import_node_test.it)("can handle boolean-like text values", async (t) => {
|
|
401
|
-
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
402
|
-
<?xml version="1.0" encoding="UTF-8" standalone="no"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
403
|
-
<head>
|
|
404
|
-
</head>
|
|
405
|
-
<body>
|
|
406
|
-
<p>true</p>
|
|
407
|
-
</body>
|
|
408
|
-
</html>
|
|
409
|
-
`);
|
|
410
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
411
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
412
|
-
{}
|
|
413
|
-
);
|
|
414
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
415
|
-
"chapter_one",
|
|
416
|
-
input,
|
|
417
|
-
segmentation,
|
|
418
|
-
mapping
|
|
419
|
-
);
|
|
420
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
421
|
-
});
|
|
422
|
-
void (0, import_node_test.it)("can handle number-like text values", async (t) => {
|
|
423
|
-
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
424
|
-
<?xml version="1.0" encoding="UTF-8" standalone="no"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
425
|
-
<head>
|
|
426
|
-
</head>
|
|
427
|
-
<body>
|
|
428
|
-
<p>5.000</p>
|
|
429
|
-
</body>
|
|
430
|
-
</html>
|
|
431
|
-
`);
|
|
432
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
433
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
434
|
-
{}
|
|
435
|
-
);
|
|
436
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
437
|
-
"chapter_one",
|
|
438
|
-
input,
|
|
439
|
-
segmentation,
|
|
440
|
-
mapping
|
|
441
|
-
);
|
|
442
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
443
|
-
});
|
|
444
|
-
void (0, import_node_test.it)("can handle null-like text values", async (t) => {
|
|
445
|
-
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
446
|
-
<?xml version="1.0" encoding="UTF-8" standalone="no"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
447
|
-
<head>
|
|
448
|
-
</head>
|
|
449
|
-
<body>
|
|
450
|
-
<p>null</p>
|
|
451
|
-
</body>
|
|
452
|
-
</html>
|
|
453
|
-
`);
|
|
454
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
455
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
456
|
-
{}
|
|
457
|
-
);
|
|
458
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
459
|
-
"chapter_one",
|
|
460
|
-
input,
|
|
461
|
-
segmentation,
|
|
462
|
-
mapping
|
|
463
|
-
);
|
|
464
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
465
|
-
});
|
|
466
|
-
void (0, import_node_test.it)("can preserve nbsp entities", async (t) => {
|
|
467
|
-
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
468
|
-
<?xml version="1.0" encoding="UTF-8"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
469
|
-
<head>
|
|
470
|
-
</head>
|
|
471
|
-
<body>
|
|
472
|
-
<p>First paragraph.</p>
|
|
473
|
-
<p> </p>
|
|
474
|
-
<p> </p>
|
|
475
|
-
<p>Second paragraph.</p>
|
|
476
|
-
</body>
|
|
477
|
-
</html>
|
|
478
|
-
`);
|
|
479
|
-
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
480
|
-
import_epub.Epub.getXhtmlBody(input),
|
|
481
|
-
{}
|
|
482
|
-
);
|
|
483
|
-
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
484
|
-
"chapter_one",
|
|
485
|
-
input,
|
|
486
|
-
segmentation,
|
|
487
|
-
mapping
|
|
488
|
-
);
|
|
489
|
-
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
490
|
-
});
|
|
491
|
-
});
|