@storyteller-platform/align 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE.txt +21 -0
  2. package/README.md +3 -0
  3. package/dist/align/align.cjs +525 -0
  4. package/dist/align/align.d.cts +58 -0
  5. package/dist/align/align.d.ts +58 -0
  6. package/dist/align/align.js +458 -0
  7. package/dist/align/fuzzy.cjs +164 -0
  8. package/dist/align/fuzzy.d.cts +6 -0
  9. package/dist/align/fuzzy.d.ts +6 -0
  10. package/dist/align/fuzzy.js +141 -0
  11. package/dist/align/getSentenceRanges.cjs +304 -0
  12. package/dist/align/getSentenceRanges.d.cts +31 -0
  13. package/dist/align/getSentenceRanges.d.ts +31 -0
  14. package/dist/align/getSentenceRanges.js +277 -0
  15. package/dist/align/parse.cjs +63 -0
  16. package/dist/align/parse.d.cts +30 -0
  17. package/dist/align/parse.d.ts +30 -0
  18. package/dist/align/parse.js +51 -0
  19. package/dist/chunk-BIEQXUOY.js +50 -0
  20. package/dist/cli/bin.cjs +368 -0
  21. package/dist/cli/bin.d.cts +1 -0
  22. package/dist/cli/bin.d.ts +1 -0
  23. package/dist/cli/bin.js +319 -0
  24. package/dist/common/ffmpeg.cjs +232 -0
  25. package/dist/common/ffmpeg.d.cts +33 -0
  26. package/dist/common/ffmpeg.d.ts +33 -0
  27. package/dist/common/ffmpeg.js +196 -0
  28. package/dist/common/logging.cjs +45 -0
  29. package/dist/common/logging.d.cts +5 -0
  30. package/dist/common/logging.d.ts +5 -0
  31. package/dist/common/logging.js +12 -0
  32. package/dist/common/parse.cjs +73 -0
  33. package/dist/common/parse.d.cts +28 -0
  34. package/dist/common/parse.d.ts +28 -0
  35. package/dist/common/parse.js +56 -0
  36. package/dist/common/shell.cjs +30 -0
  37. package/dist/common/shell.d.cts +3 -0
  38. package/dist/common/shell.d.ts +3 -0
  39. package/dist/common/shell.js +7 -0
  40. package/dist/index.cjs +37 -0
  41. package/dist/index.d.cts +12 -0
  42. package/dist/index.d.ts +12 -0
  43. package/dist/index.js +11 -0
  44. package/dist/markup/__tests__/markup.test.cjs +464 -0
  45. package/dist/markup/__tests__/markup.test.d.cts +2 -0
  46. package/dist/markup/__tests__/markup.test.d.ts +2 -0
  47. package/dist/markup/__tests__/markup.test.js +441 -0
  48. package/dist/markup/markup.cjs +316 -0
  49. package/dist/markup/markup.d.cts +24 -0
  50. package/dist/markup/markup.d.ts +24 -0
  51. package/dist/markup/markup.js +254 -0
  52. package/dist/markup/parse.cjs +55 -0
  53. package/dist/markup/parse.d.cts +17 -0
  54. package/dist/markup/parse.d.ts +17 -0
  55. package/dist/markup/parse.js +43 -0
  56. package/dist/markup/segmentation.cjs +87 -0
  57. package/dist/markup/segmentation.d.cts +8 -0
  58. package/dist/markup/segmentation.d.ts +8 -0
  59. package/dist/markup/segmentation.js +67 -0
  60. package/dist/markup/semantics.cjs +79 -0
  61. package/dist/markup/semantics.d.cts +6 -0
  62. package/dist/markup/semantics.d.ts +6 -0
  63. package/dist/markup/semantics.js +53 -0
  64. package/dist/process/AudioEncoding.cjs +16 -0
  65. package/dist/process/AudioEncoding.d.cts +8 -0
  66. package/dist/process/AudioEncoding.d.ts +8 -0
  67. package/dist/process/AudioEncoding.js +0 -0
  68. package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
  69. package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
  70. package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
  71. package/dist/process/__tests__/processAudiobook.test.js +209 -0
  72. package/dist/process/mime.cjs +43 -0
  73. package/dist/process/mime.d.cts +3 -0
  74. package/dist/process/mime.d.ts +3 -0
  75. package/dist/process/mime.js +24 -0
  76. package/dist/process/parse.cjs +84 -0
  77. package/dist/process/parse.d.cts +28 -0
  78. package/dist/process/parse.d.ts +28 -0
  79. package/dist/process/parse.js +73 -0
  80. package/dist/process/processAudiobook.cjs +220 -0
  81. package/dist/process/processAudiobook.d.cts +24 -0
  82. package/dist/process/processAudiobook.d.ts +24 -0
  83. package/dist/process/processAudiobook.js +166 -0
  84. package/dist/process/ranges.cjs +203 -0
  85. package/dist/process/ranges.d.cts +15 -0
  86. package/dist/process/ranges.d.ts +15 -0
  87. package/dist/process/ranges.js +137 -0
  88. package/dist/transcribe/parse.cjs +149 -0
  89. package/dist/transcribe/parse.d.cts +114 -0
  90. package/dist/transcribe/parse.d.ts +114 -0
  91. package/dist/transcribe/parse.js +143 -0
  92. package/dist/transcribe/transcribe.cjs +400 -0
  93. package/dist/transcribe/transcribe.d.cts +41 -0
  94. package/dist/transcribe/transcribe.d.ts +41 -0
  95. package/dist/transcribe/transcribe.js +330 -0
  96. package/package.json +96 -0
@@ -0,0 +1,2 @@
1
+
2
+ export { }
@@ -0,0 +1,2 @@
1
+
2
+ export { }
@@ -0,0 +1,441 @@
1
+ import assert from "node:assert";
2
+ import { describe, it } from "node:test";
3
+ import { Epub } from "@storyteller-platform/epub";
4
+ import { appendTextNode, markupChapter } from "../markup.js";
5
+ import { getXhtmlSegmentation } from "../segmentation.js";
6
+ void describe("appendTextNode", () => {
7
+ void it("can append text nodes to empty parents", () => {
8
+ const input = [];
9
+ appendTextNode("chapter_one", input, "test", [], /* @__PURE__ */ new Set());
10
+ assert.deepStrictEqual(input, [{ "#text": "test" }]);
11
+ });
12
+ void it("can append text nodes with marks", () => {
13
+ const input = [];
14
+ appendTextNode(
15
+ "chapter_one",
16
+ input,
17
+ "test",
18
+ [{ elementName: "a", attributes: { "@_href": "#" } }],
19
+ /* @__PURE__ */ new Set()
20
+ );
21
+ assert.deepStrictEqual(input, [
22
+ { a: [{ "#text": "test" }], ":@": { "@_href": "#" } }
23
+ ]);
24
+ });
25
+ void it("can wrap text nodes with sentence spans", () => {
26
+ const input = [];
27
+ appendTextNode("chapter_one", input, "test", [], /* @__PURE__ */ new Set(), 0);
28
+ assert.deepStrictEqual(input, [
29
+ {
30
+ span: [{ "#text": "test" }],
31
+ ":@": { "@_id": "chapter_one-s0" }
32
+ }
33
+ ]);
34
+ });
35
+ void it("can join text nodes with the same sentence ids", () => {
36
+ const input = [
37
+ {
38
+ span: [{ "#text": "test" }],
39
+ ":@": { "@_id": "chapter_one-s0" }
40
+ }
41
+ ];
42
+ appendTextNode("chapter_one", input, "test", [], /* @__PURE__ */ new Set(), 0);
43
+ assert.deepStrictEqual(input, [
44
+ {
45
+ span: [{ "#text": "test" }, { "#text": "test" }],
46
+ ":@": { "@_id": "chapter_one-s0" }
47
+ }
48
+ ]);
49
+ });
50
+ });
51
+ void describe("markupChapter", () => {
52
+ void it("can tag sentences", async (t) => {
53
+ const input = Epub.xhtmlParser.parse(
54
+ /* xml */
55
+ `
56
+ <?xml version="1.0" encoding="UTF-8"?>
57
+
58
+ <html>
59
+ <head>
60
+ <meta charset="utf-8" />
61
+ <title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
62
+ </head>
63
+ <body>
64
+ <p>
65
+ Call me Ishmael. Some years ago\u2014never mind how long precisely\u2014having
66
+ little or no money in my purse, and nothing particular to interest me on
67
+ shore, I thought I would sail about a little and see the watery part of
68
+ the world. It is a way I have of driving off the spleen and regulating the
69
+ circulation. Whenever I find myself growing grim about the mouth; whenever
70
+ it is a damp, drizzly November in my soul; whenever I find myself
71
+ involuntarily pausing before coffin warehouses, and bringing up the rear
72
+ of every funeral I meet; and especially whenever my hypos get such an
73
+ upper hand of me, that it requires a strong moral principle to prevent me
74
+ from deliberately stepping into the street, and methodically knocking
75
+ people\u2019s hats off\u2014then, I account it high time to get to sea as soon
76
+ as I can.
77
+ </p>
78
+ <p>
79
+ This is my substitute for pistol and ball. With a philosophical
80
+ flourish Cato throws himself upon his sword; I quietly take to the ship.
81
+ There is nothing surprising in this. If they but knew it, almost all men
82
+ in their degree, some time or other, cherish very nearly the same feelings
83
+ towards the ocean with me.
84
+ </p>
85
+ </body>
86
+ </html>
87
+ `
88
+ );
89
+ const segmentation = await getXhtmlSegmentation(
90
+ Epub.getXhtmlBody(input),
91
+ {}
92
+ );
93
+ const { markedUp: output } = markupChapter(
94
+ "chapter_one",
95
+ input,
96
+ segmentation
97
+ );
98
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
99
+ });
100
+ void it("can tag sentences with formatting marks", async (t) => {
101
+ const input = Epub.xhtmlParser.parse(
102
+ /* xml */
103
+ `
104
+ <?xml version="1.0" encoding="UTF-8"?>
105
+
106
+ <html>
107
+ <head>
108
+ <meta charset="utf-8" />
109
+ <title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
110
+ </head>
111
+ <body>
112
+ <p>
113
+ Call me <strong>Ishmael</strong>. Some years ago\u2014never mind how long precisely\u2014having
114
+ little or no money in my purse, and nothing particular to interest me on
115
+ shore, I thought I would sail about a little and see the watery part of
116
+ the world.
117
+ </p>
118
+ </body>
119
+ </html>
120
+ `
121
+ );
122
+ const segmentation = await getXhtmlSegmentation(
123
+ Epub.getXhtmlBody(input),
124
+ {}
125
+ );
126
+ const { markedUp: output } = markupChapter(
127
+ "chapter_one",
128
+ input,
129
+ segmentation
130
+ );
131
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
132
+ });
133
+ void it("can tag sentences with formatting marks that overlap sentence boundaries", async (t) => {
134
+ const input = Epub.xhtmlParser.parse(
135
+ /* xml */
136
+ `
137
+ <?xml version="1.0" encoding="UTF-8"?>
138
+
139
+ <html>
140
+ <head>
141
+ <meta charset="utf-8" />
142
+ <title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
143
+ </head>
144
+ <body>
145
+ <p>
146
+ Call me <strong>Ishmael. Some years ago</strong>\u2014never mind how long precisely\u2014having
147
+ little or no money in my purse, and nothing particular to interest me on
148
+ shore, I thought I would sail about a little and see the watery part of
149
+ the world.
150
+ </p>
151
+ </body>
152
+ </html>
153
+ `
154
+ );
155
+ const segmentation = await getXhtmlSegmentation(
156
+ Epub.getXhtmlBody(input),
157
+ {}
158
+ );
159
+ const { markedUp: output } = markupChapter(
160
+ "chapter_one",
161
+ input,
162
+ segmentation
163
+ );
164
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
165
+ });
166
+ void it("can tag sentences with nested formatting marks", async (t) => {
167
+ const input = Epub.xhtmlParser.parse(
168
+ /* xml */
169
+ `
170
+ <?xml version="1.0" encoding="UTF-8"?>
171
+
172
+ <html>
173
+ <head>
174
+ <meta charset="utf-8" />
175
+ <title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
176
+ </head>
177
+ <body>
178
+ <p>
179
+ <em>Call me <strong>Ishmael</strong>.</em> Some years ago\u2014never mind how long precisely\u2014having
180
+ little or no money in my purse, and nothing particular to interest me on
181
+ shore, I thought I would sail about a little and see the watery part of
182
+ the world.
183
+ </p>
184
+ </body>
185
+ </html>
186
+ `
187
+ );
188
+ const segmentation = await getXhtmlSegmentation(
189
+ Epub.getXhtmlBody(input),
190
+ {}
191
+ );
192
+ const { markedUp: output } = markupChapter(
193
+ "chapter_one",
194
+ input,
195
+ segmentation
196
+ );
197
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
198
+ });
199
+ void it("can tag sentences with atoms", async (t) => {
200
+ const input = Epub.xhtmlParser.parse(
201
+ /* xml */
202
+ `
203
+ <?xml version="1.0" encoding="UTF-8"?>
204
+
205
+ <html>
206
+ <head>
207
+ <meta charset="utf-8" />
208
+ <title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
209
+ </head>
210
+ <body>
211
+ <p>
212
+ Call me Ishmael. Some<img src="#"/> years ago\u2014never mind how long precisely\u2014having
213
+ little or no money in my purse, and nothing particular to interest me on
214
+ shore, I thought I would sail about a little and see the watery part of
215
+ the world.
216
+ </p>
217
+ </body>
218
+ </html>
219
+ `
220
+ );
221
+ const segmentation = await getXhtmlSegmentation(
222
+ Epub.getXhtmlBody(input),
223
+ {}
224
+ );
225
+ const { markedUp: output } = markupChapter(
226
+ "chapter_one",
227
+ input,
228
+ segmentation
229
+ );
230
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
231
+ });
232
+ void it("can tag sentences in nested textblocks", async (t) => {
233
+ const input = Epub.xhtmlParser.parse(
234
+ /* xml */
235
+ `
236
+ <?xml version='1.0' encoding='utf-8'?>
237
+ <!DOCTYPE html>
238
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"
239
+ epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#" lang="en" xml:lang="en">
240
+
241
+ <head>
242
+ <link href="../styles/9781534431010.css" rel="stylesheet" type="text/css" />
243
+ <link href="../styles/SS_global.css" rel="stylesheet" type="text/css" />
244
+ <link rel="stylesheet" href="../../Styles/storyteller-readaloud.css" type="text/css" />
245
+ </head>
246
+
247
+ <body>
248
+ <blockquote class="blockquotelet">
249
+ <p class="blockno"><span aria-label="page 7" id="page_7" role="doc-pagebreak" /></p>
250
+ <p class="blockno">Look on my works, ye mighty, and despair!</p>
251
+ <p class="blockno1">A little joke.</p>
252
+ <p class="blockno1"> </p>
253
+ <p class="blockno1">Trust that I have accounted for all variables of irony.</p>
254
+ <p class="blockno1"> </p>
255
+ <p class="blockno1">Though I suppose if you\u2019re unfamiliar with overanthologized works of the early Strand 6
256
+ nineteenth century, the joke\u2019s on me.</p>
257
+ <p class="blockin">I hoped you\u2019d come.</p>
258
+ </blockquote>
259
+ </body>
260
+
261
+ </html>
262
+ `
263
+ );
264
+ const segmentation = await getXhtmlSegmentation(
265
+ Epub.getXhtmlBody(input),
266
+ {}
267
+ );
268
+ const { markedUp: output } = markupChapter(
269
+ "chapter_one",
270
+ input,
271
+ segmentation
272
+ );
273
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
274
+ });
275
+ void it("can tag sentences that cross textblock boundaries", async (t) => {
276
+ const input = Epub.xhtmlParser.parse(
277
+ /* xml */
278
+ `
279
+ <?xml version="1.0" encoding="UTF-8"?>
280
+
281
+ <html>
282
+ <head>
283
+ <meta charset="utf-8" />
284
+ <title>The Project Gutenberg eBook of Moby Dick; Or the Whale, by Herman Melville</title>
285
+ </head>
286
+ <body>
287
+ <p>
288
+ Call me Ishmael. Some years ago\u2014never mind how long precisely\u2014having
289
+ little or no money in my purse, and nothing particular to interest me on
290
+ shore,
291
+ </p>
292
+ <p>
293
+ I thought I would sail about a little and see the watery part of
294
+ the world.
295
+ </p>
296
+ </body>
297
+ </html>
298
+ `
299
+ );
300
+ const segmentation = await getXhtmlSegmentation(
301
+ Epub.getXhtmlBody(input),
302
+ {}
303
+ );
304
+ const { markedUp: output } = markupChapter(
305
+ "chapter_one",
306
+ input,
307
+ segmentation
308
+ );
309
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
310
+ });
311
+ void it("can handle soft page breaks", async (t) => {
312
+ const input = Epub.xhtmlParser.parse(
313
+ /* xml */
314
+ `
315
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en-US" xml:lang="en-US">
316
+ <head>
317
+ <title>Chapter 1, Black Powder War</title>
318
+ <meta charset="utf-8"/>
319
+ <link href="../css/prh_resets.css" rel="stylesheet" type="text/css"/>
320
+ <link href="../css/rh_static.css" rel="stylesheet" type="text/css"/>
321
+ <link href="../css/9780345493439_style.css" rel="stylesheet" type="text/css"/>
322
+ <meta content="urn:uuid:52698e83-e600-48be-b763-c64bde1e3e0c" name="Adept.expected.resource"/>
323
+ </head>
324
+ <body>
325
+ <a id="d1-d2s6d3s2"/>
326
+ <div class="page_top_padding">
327
+ <span epub:type="pagebreak" id="page_9" role="doc-pagebreak" title="9"/>
328
+ <h1 class="para-cn-chap-pg trajan-pro-3">CHAPTER 1</h1>
329
+ <div class="para-orn">
330
+ <span class="figure figure_dingbat">
331
+ <img alt="" class="height_1em" role="presentation" src="../images/Novi_9780345493439_epub3_001_r1.jpg"/></span></div>
332
+ <p class="para-pf dropcaps3line char-dropcap-DC trajan-pro-3-dc" style="text-indent:0;">The hot wind blowing into Macao was sluggish and unrefreshing, only stirring up the rotting salt smell of the harbor, the fish-corpses and great knots of black-red seaweed, the effluvia of human and dragon wastes. Even so the sailors were sitting crowded along the rails of the <i class="char-i">Allegiance</i> for a breath of the moving air, leaning against one another to get a little room. A little scuffling broke out amongst them from time to time, a dull exchange of shoving back and forth, but these quarrels died almost at once in the punishing heat.</p>
333
+ <p class="para-p">Temeraire lay disconsolately upon the dragondeck, gazing towards the white haze of the open ocean, the aviators on duty lying half-asleep in his great shadow. Laurence himself had sacrificed dignity so far as to take off his coat, as he was sitting in the crook of Temeraire\u2019s foreleg and so concealed from view.</p>
334
+ <p class="para-p">\u201CI am sure I could pull the ship out of the harbor,\u201D Temeraire said, not for the first time in the past week; and sighed when this amiable plan was again refused: in a calm he might indeed have been able to tow even the enormous dragon transport, but against a direct headwind he could only exhaust himself to no purpose.</p>
335
+ <span epub:type="pagebreak" id="page_10" role="doc-pagebreak" title="10"/>
336
+ <p class="para-p">\u201CEven in a calm you could scarcely pull her any great distance,\u201D Laurence added consolingly. \u201CA few miles may be of some use out in the open ocean, but at present we may as well stay in harbor, and be a little more comfortable; we would make very little speed even if we could get her out.\u201D</p>
337
+ <p class="para-p">\u201CIt seems a great pity to me that we must always be waiting on the wind, when everything else is ready and we are also,\u201D Temeraire said. \u201CI would so like to be home <i class="char-i">soon:</i> there is so very much to be done.\u201D His tail thumped hollowly upon the boards, for emphasis.</p>
338
+ <p class="para-p">\u201CI beg you will not raise your hopes too high,\u201D Laurence said, himself a little hopelessly: urging Temeraire to restraint had so far not produced any effect, and he did not expect a different event now. \u201CYou must be prepared to endure some delays; at home as much as here.\u201D</p>
339
+ </div>
340
+ </body>
341
+ </html>`
342
+ );
343
+ const segmentation = await getXhtmlSegmentation(
344
+ Epub.getXhtmlBody(input),
345
+ {}
346
+ );
347
+ const { markedUp: output } = markupChapter(
348
+ "chapter_one",
349
+ input,
350
+ segmentation
351
+ );
352
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
353
+ });
354
+ void it("can handle boolean-like text values", async (t) => {
355
+ const input = Epub.xhtmlParser.parse(`
356
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
357
+ <head>
358
+ </head>
359
+ <body>
360
+ <p>true</p>
361
+ </body>
362
+ </html>
363
+ `);
364
+ const segmentation = await getXhtmlSegmentation(
365
+ Epub.getXhtmlBody(input),
366
+ {}
367
+ );
368
+ const { markedUp: output } = markupChapter(
369
+ "chapter_one",
370
+ input,
371
+ segmentation
372
+ );
373
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
374
+ });
375
+ void it("can handle number-like text values", async (t) => {
376
+ const input = Epub.xhtmlParser.parse(`
377
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
378
+ <head>
379
+ </head>
380
+ <body>
381
+ <p>5.000</p>
382
+ </body>
383
+ </html>
384
+ `);
385
+ const segmentation = await getXhtmlSegmentation(
386
+ Epub.getXhtmlBody(input),
387
+ {}
388
+ );
389
+ const { markedUp: output } = markupChapter(
390
+ "chapter_one",
391
+ input,
392
+ segmentation
393
+ );
394
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
395
+ });
396
+ void it("can handle null-like text values", async (t) => {
397
+ const input = Epub.xhtmlParser.parse(`
398
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
399
+ <head>
400
+ </head>
401
+ <body>
402
+ <p>null</p>
403
+ </body>
404
+ </html>
405
+ `);
406
+ const segmentation = await getXhtmlSegmentation(
407
+ Epub.getXhtmlBody(input),
408
+ {}
409
+ );
410
+ const { markedUp: output } = markupChapter(
411
+ "chapter_one",
412
+ input,
413
+ segmentation
414
+ );
415
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
416
+ });
417
+ void it("can preserve nbsp entities", async (t) => {
418
+ const input = Epub.xhtmlParser.parse(`
419
+ <?xml version="1.0" encoding="UTF-8"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:ops="http://www.idpf.org/2007/ops" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
420
+ <head>
421
+ </head>
422
+ <body>
423
+ <p>First paragraph.</p>
424
+ <p>&nbsp;</p>
425
+ <p>&nbsp;</p>
426
+ <p>Second paragraph.</p>
427
+ </body>
428
+ </html>
429
+ `);
430
+ const segmentation = await getXhtmlSegmentation(
431
+ Epub.getXhtmlBody(input),
432
+ {}
433
+ );
434
+ const { markedUp: output } = markupChapter(
435
+ "chapter_one",
436
+ input,
437
+ segmentation
438
+ );
439
+ t.assert.snapshot(Epub.xhtmlBuilder.build(output).split("\n"));
440
+ });
441
+ });