@storyteller-platform/align 0.1.25 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +21 -9
- package/dist/align/align.js +22 -11
- package/dist/align/getSentenceRanges.cjs +0 -58
- package/dist/align/getSentenceRanges.d.cts +1 -2
- package/dist/align/getSentenceRanges.d.ts +1 -2
- package/dist/align/getSentenceRanges.js +0 -57
- package/dist/align/interpolateSentenceRanges.cjs +124 -0
- package/dist/align/interpolateSentenceRanges.d.cts +23 -0
- package/dist/align/interpolateSentenceRanges.d.ts +23 -0
- package/dist/align/interpolateSentenceRanges.js +101 -0
- package/dist/align/search.cjs +18 -7
- package/dist/align/search.js +18 -7
- package/dist/index.d.cts +1 -2
- package/dist/index.d.ts +1 -2
- package/dist/markup/markup.cjs +21 -14
- package/dist/markup/markup.d.cts +2 -4
- package/dist/markup/markup.d.ts +2 -4
- package/dist/markup/markup.js +28 -16
- package/dist/markup/model.cjs +138 -5
- package/dist/markup/model.d.cts +2 -57
- package/dist/markup/model.d.ts +2 -57
- package/dist/markup/model.js +136 -5
- package/dist/markup/parseDom.cjs +80 -25
- package/dist/markup/parseDom.d.cts +4 -4
- package/dist/markup/parseDom.d.ts +4 -4
- package/dist/markup/parseDom.js +87 -24
- package/dist/markup/resolvedPos.cjs +85 -0
- package/dist/markup/resolvedPos.d.cts +2 -0
- package/dist/markup/resolvedPos.d.ts +2 -0
- package/dist/markup/resolvedPos.js +62 -0
- package/dist/markup/segmentation.cjs +4 -8
- package/dist/markup/segmentation.d.cts +3 -8
- package/dist/markup/segmentation.d.ts +3 -8
- package/dist/markup/segmentation.js +3 -7
- package/dist/markup/serializeDom.d.cts +1 -1
- package/dist/markup/serializeDom.d.ts +1 -1
- package/dist/markup/transform.cjs +59 -2
- package/dist/markup/transform.d.cts +8 -2
- package/dist/markup/transform.d.ts +8 -2
- package/dist/markup/transform.js +58 -1
- package/dist/model-Bv3yPEdd.d.cts +96 -0
- package/dist/model-Bv3yPEdd.d.ts +96 -0
- package/dist/snapshot/snapshot.cjs +8 -6
- package/dist/snapshot/snapshot.js +9 -7
- package/package.json +1 -1
|
@@ -1,12 +1,7 @@
|
|
|
1
1
|
import { SegmentationResult } from '@echogarden/text-segmentation';
|
|
2
|
-
import { ParsedXml } from '@storyteller-platform/epub';
|
|
3
|
-
import { Mapping } from './map.js';
|
|
4
2
|
|
|
5
|
-
declare function
|
|
3
|
+
declare function segmentChapter(text: string, options: {
|
|
6
4
|
primaryLocale?: Intl.Locale | null | undefined;
|
|
7
|
-
}): Promise<
|
|
8
|
-
result: SegmentationResult["sentences"];
|
|
9
|
-
mapping: Mapping;
|
|
10
|
-
}>;
|
|
5
|
+
}): Promise<SegmentationResult["sentences"]>;
|
|
11
6
|
|
|
12
|
-
export {
|
|
7
|
+
export { segmentChapter };
|
|
@@ -2,19 +2,15 @@ import "../chunk-BIEQXUOY.js";
|
|
|
2
2
|
import {
|
|
3
3
|
segmentText
|
|
4
4
|
} from "@echogarden/text-segmentation";
|
|
5
|
-
|
|
6
|
-
import { liftText } from "./transform.js";
|
|
7
|
-
async function getXhtmlSegmentation(xml, options) {
|
|
8
|
-
const root = parseDom(xml);
|
|
9
|
-
const { result: text, mapping } = liftText(root);
|
|
5
|
+
async function segmentChapter(text, options) {
|
|
10
6
|
const result = await segmentText(text, {
|
|
11
7
|
...options.primaryLocale && {
|
|
12
8
|
language: options.primaryLocale.language
|
|
13
9
|
},
|
|
14
10
|
enableEastAsianPostprocessing: true
|
|
15
11
|
});
|
|
16
|
-
return
|
|
12
|
+
return result.sentences;
|
|
17
13
|
}
|
|
18
14
|
export {
|
|
19
|
-
|
|
15
|
+
segmentChapter
|
|
20
16
|
};
|
|
@@ -19,11 +19,14 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
19
19
|
var transform_exports = {};
|
|
20
20
|
__export(transform_exports, {
|
|
21
21
|
addMark: () => addMark,
|
|
22
|
-
|
|
22
|
+
inlineFootnotes: () => inlineFootnotes,
|
|
23
|
+
liftText: () => liftText,
|
|
24
|
+
replaceFootnotes: () => replaceFootnotes
|
|
23
25
|
});
|
|
24
26
|
module.exports = __toCommonJS(transform_exports);
|
|
25
27
|
var import_map = require("./map.cjs");
|
|
26
28
|
var import_model = require("./model.cjs");
|
|
29
|
+
var import_parseDom = require("./parseDom.cjs");
|
|
27
30
|
function addMark(root, from, to, mark) {
|
|
28
31
|
const result = root.split(from).split(to);
|
|
29
32
|
let pos = 0;
|
|
@@ -55,6 +58,11 @@ function liftText(root) {
|
|
|
55
58
|
let textLength = 0;
|
|
56
59
|
let lastTextEnd = 0;
|
|
57
60
|
(0, import_model.descendants)(root, (node, pos, parent, index) => {
|
|
61
|
+
if (node instanceof import_model.FootnoteNode) {
|
|
62
|
+
if (!text.endsWith("\n")) {
|
|
63
|
+
text += "\n";
|
|
64
|
+
}
|
|
65
|
+
}
|
|
58
66
|
if (node.isBlock) {
|
|
59
67
|
return !!node.textContent.match(/\S/);
|
|
60
68
|
}
|
|
@@ -90,8 +98,57 @@ function liftText(root) {
|
|
|
90
98
|
});
|
|
91
99
|
return { result: text, mapping };
|
|
92
100
|
}
|
|
101
|
+
function inlineFootnotes(root) {
|
|
102
|
+
const footnotePairs = (0, import_parseDom.findFootnotePairs)(root);
|
|
103
|
+
const mapping = new import_map.Mapping();
|
|
104
|
+
let transformed = root;
|
|
105
|
+
for (const [noterefPos, footnotePos] of footnotePairs.entries()) {
|
|
106
|
+
const noteref = root.resolve(noterefPos).nodeAfter;
|
|
107
|
+
const footnote = root.resolve(footnotePos).nodeAfter;
|
|
108
|
+
if (!noteref || !(footnote instanceof import_model.Node)) continue;
|
|
109
|
+
transformed = transformed.replace(mapping.map(noterefPos), footnote);
|
|
110
|
+
mapping.appendMap(
|
|
111
|
+
new import_map.StepMap([
|
|
112
|
+
mapping.map(noterefPos),
|
|
113
|
+
noteref.nodeSize,
|
|
114
|
+
footnote.nodeSize
|
|
115
|
+
])
|
|
116
|
+
);
|
|
117
|
+
transformed = transformed.replace(
|
|
118
|
+
mapping.map(footnotePos),
|
|
119
|
+
new import_model.Node(footnote.tagName)
|
|
120
|
+
);
|
|
121
|
+
mapping.appendMap(
|
|
122
|
+
new import_map.StepMap([mapping.map(footnotePos), footnote.nodeSize, 1])
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
return { root: transformed, footnotePairs, mapping };
|
|
126
|
+
}
|
|
127
|
+
function replaceFootnotes(original, root, footnotePairs, mapping) {
|
|
128
|
+
let transformed = root;
|
|
129
|
+
for (const [noterefPos, footnotePos] of footnotePairs.entries()) {
|
|
130
|
+
const noteref = original.resolve(noterefPos).nodeAfter;
|
|
131
|
+
const footnote = transformed.resolve(mapping.map(noterefPos)).nodeAfter;
|
|
132
|
+
if (!(noteref instanceof import_model.Node) || !(footnote instanceof import_model.Node)) continue;
|
|
133
|
+
transformed = transformed.replace(mapping.map(noterefPos), noteref);
|
|
134
|
+
mapping.appendMap(
|
|
135
|
+
new import_map.StepMap([
|
|
136
|
+
mapping.map(noterefPos),
|
|
137
|
+
footnote.nodeSize,
|
|
138
|
+
noteref.nodeSize
|
|
139
|
+
])
|
|
140
|
+
);
|
|
141
|
+
transformed = transformed.replace(mapping.map(footnotePos), footnote);
|
|
142
|
+
mapping.appendMap(
|
|
143
|
+
new import_map.StepMap([mapping.map(footnotePos), 1, footnote.nodeSize])
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
return transformed;
|
|
147
|
+
}
|
|
93
148
|
// Annotate the CommonJS export names for ESM import in node:
|
|
94
149
|
0 && (module.exports = {
|
|
95
150
|
addMark,
|
|
96
|
-
|
|
151
|
+
inlineFootnotes,
|
|
152
|
+
liftText,
|
|
153
|
+
replaceFootnotes
|
|
97
154
|
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Mapping } from './map.cjs';
|
|
2
|
-
import { Root, Mark } from '
|
|
2
|
+
import { R as Root, M as Mark } from '../model-Bv3yPEdd.cjs';
|
|
3
3
|
import '@storyteller-platform/epub';
|
|
4
4
|
|
|
5
5
|
declare function addMark(root: Root, from: number, to: number, mark: Mark): Root;
|
|
@@ -7,5 +7,11 @@ declare function liftText(root: Root): {
|
|
|
7
7
|
result: string;
|
|
8
8
|
mapping: Mapping;
|
|
9
9
|
};
|
|
10
|
+
declare function inlineFootnotes(root: Root): {
|
|
11
|
+
root: Root;
|
|
12
|
+
footnotePairs: Map<number, number>;
|
|
13
|
+
mapping: Mapping;
|
|
14
|
+
};
|
|
15
|
+
declare function replaceFootnotes(original: Root, root: Root, footnotePairs: Map<number, number>, mapping: Mapping): Root;
|
|
10
16
|
|
|
11
|
-
export { addMark, liftText };
|
|
17
|
+
export { addMark, inlineFootnotes, liftText, replaceFootnotes };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Mapping } from './map.js';
|
|
2
|
-
import { Root, Mark } from '
|
|
2
|
+
import { R as Root, M as Mark } from '../model-Bv3yPEdd.js';
|
|
3
3
|
import '@storyteller-platform/epub';
|
|
4
4
|
|
|
5
5
|
declare function addMark(root: Root, from: number, to: number, mark: Mark): Root;
|
|
@@ -7,5 +7,11 @@ declare function liftText(root: Root): {
|
|
|
7
7
|
result: string;
|
|
8
8
|
mapping: Mapping;
|
|
9
9
|
};
|
|
10
|
+
declare function inlineFootnotes(root: Root): {
|
|
11
|
+
root: Root;
|
|
12
|
+
footnotePairs: Map<number, number>;
|
|
13
|
+
mapping: Mapping;
|
|
14
|
+
};
|
|
15
|
+
declare function replaceFootnotes(original: Root, root: Root, footnotePairs: Map<number, number>, mapping: Mapping): Root;
|
|
10
16
|
|
|
11
|
-
export { addMark, liftText };
|
|
17
|
+
export { addMark, inlineFootnotes, liftText, replaceFootnotes };
|
package/dist/markup/transform.js
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import "../chunk-BIEQXUOY.js";
|
|
2
2
|
import { Mapping, StepMap } from "./map.js";
|
|
3
3
|
import {
|
|
4
|
+
FootnoteNode,
|
|
5
|
+
Node,
|
|
4
6
|
TextNode,
|
|
5
7
|
descendants
|
|
6
8
|
} from "./model.js";
|
|
9
|
+
import { findFootnotePairs } from "./parseDom.js";
|
|
7
10
|
function addMark(root, from, to, mark) {
|
|
8
11
|
const result = root.split(from).split(to);
|
|
9
12
|
let pos = 0;
|
|
@@ -35,6 +38,11 @@ function liftText(root) {
|
|
|
35
38
|
let textLength = 0;
|
|
36
39
|
let lastTextEnd = 0;
|
|
37
40
|
descendants(root, (node, pos, parent, index) => {
|
|
41
|
+
if (node instanceof FootnoteNode) {
|
|
42
|
+
if (!text.endsWith("\n")) {
|
|
43
|
+
text += "\n";
|
|
44
|
+
}
|
|
45
|
+
}
|
|
38
46
|
if (node.isBlock) {
|
|
39
47
|
return !!node.textContent.match(/\S/);
|
|
40
48
|
}
|
|
@@ -70,7 +78,56 @@ function liftText(root) {
|
|
|
70
78
|
});
|
|
71
79
|
return { result: text, mapping };
|
|
72
80
|
}
|
|
81
|
+
function inlineFootnotes(root) {
|
|
82
|
+
const footnotePairs = findFootnotePairs(root);
|
|
83
|
+
const mapping = new Mapping();
|
|
84
|
+
let transformed = root;
|
|
85
|
+
for (const [noterefPos, footnotePos] of footnotePairs.entries()) {
|
|
86
|
+
const noteref = root.resolve(noterefPos).nodeAfter;
|
|
87
|
+
const footnote = root.resolve(footnotePos).nodeAfter;
|
|
88
|
+
if (!noteref || !(footnote instanceof Node)) continue;
|
|
89
|
+
transformed = transformed.replace(mapping.map(noterefPos), footnote);
|
|
90
|
+
mapping.appendMap(
|
|
91
|
+
new StepMap([
|
|
92
|
+
mapping.map(noterefPos),
|
|
93
|
+
noteref.nodeSize,
|
|
94
|
+
footnote.nodeSize
|
|
95
|
+
])
|
|
96
|
+
);
|
|
97
|
+
transformed = transformed.replace(
|
|
98
|
+
mapping.map(footnotePos),
|
|
99
|
+
new Node(footnote.tagName)
|
|
100
|
+
);
|
|
101
|
+
mapping.appendMap(
|
|
102
|
+
new StepMap([mapping.map(footnotePos), footnote.nodeSize, 1])
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
return { root: transformed, footnotePairs, mapping };
|
|
106
|
+
}
|
|
107
|
+
function replaceFootnotes(original, root, footnotePairs, mapping) {
|
|
108
|
+
let transformed = root;
|
|
109
|
+
for (const [noterefPos, footnotePos] of footnotePairs.entries()) {
|
|
110
|
+
const noteref = original.resolve(noterefPos).nodeAfter;
|
|
111
|
+
const footnote = transformed.resolve(mapping.map(noterefPos)).nodeAfter;
|
|
112
|
+
if (!(noteref instanceof Node) || !(footnote instanceof Node)) continue;
|
|
113
|
+
transformed = transformed.replace(mapping.map(noterefPos), noteref);
|
|
114
|
+
mapping.appendMap(
|
|
115
|
+
new StepMap([
|
|
116
|
+
mapping.map(noterefPos),
|
|
117
|
+
footnote.nodeSize,
|
|
118
|
+
noteref.nodeSize
|
|
119
|
+
])
|
|
120
|
+
);
|
|
121
|
+
transformed = transformed.replace(mapping.map(footnotePos), footnote);
|
|
122
|
+
mapping.appendMap(
|
|
123
|
+
new StepMap([mapping.map(footnotePos), 1, footnote.nodeSize])
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
return transformed;
|
|
127
|
+
}
|
|
73
128
|
export {
|
|
74
129
|
addMark,
|
|
75
|
-
|
|
130
|
+
inlineFootnotes,
|
|
131
|
+
liftText,
|
|
132
|
+
replaceFootnotes
|
|
76
133
|
};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { ElementName } from '@storyteller-platform/epub';
|
|
2
|
+
|
|
3
|
+
declare class ResolvedPos {
|
|
4
|
+
readonly pos: number;
|
|
5
|
+
readonly path: (Node | Root | number)[];
|
|
6
|
+
readonly parentOffset: number;
|
|
7
|
+
depth: number;
|
|
8
|
+
private constructor();
|
|
9
|
+
static resolve(doc: Root, pos: number): ResolvedPos;
|
|
10
|
+
get parent(): Root | Node;
|
|
11
|
+
node(depth?: number | null): Root | Node | TextNode;
|
|
12
|
+
index(depth?: number | null): number;
|
|
13
|
+
get nodeAfter(): Node | TextNode | null;
|
|
14
|
+
before(depth?: number | null): number;
|
|
15
|
+
private resolveDepth;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
declare class Root {
|
|
19
|
+
children: (Node | TextNode)[];
|
|
20
|
+
constructor(children: (Node | TextNode)[]);
|
|
21
|
+
isInline: boolean;
|
|
22
|
+
isBlock: boolean;
|
|
23
|
+
get border(): number;
|
|
24
|
+
get textContent(): string;
|
|
25
|
+
get nodeSize(): number;
|
|
26
|
+
get contentSize(): number;
|
|
27
|
+
split(at: number): Root;
|
|
28
|
+
copy(opts?: {
|
|
29
|
+
children?: (Node | TextNode)[];
|
|
30
|
+
}): Root;
|
|
31
|
+
findIndex(pos: number): {
|
|
32
|
+
index: number;
|
|
33
|
+
offset: number;
|
|
34
|
+
};
|
|
35
|
+
replace(at: number, withNode: Node): Root;
|
|
36
|
+
cut(pos: number): Node | TextNode | null;
|
|
37
|
+
resolve(pos: number): ResolvedPos;
|
|
38
|
+
}
|
|
39
|
+
declare class Node {
|
|
40
|
+
tagName: ElementName;
|
|
41
|
+
attrs: Record<string, string>;
|
|
42
|
+
children: (Node | TextNode)[];
|
|
43
|
+
marks: Mark[];
|
|
44
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>, children?: (Node | TextNode)[], marks?: Mark[]);
|
|
45
|
+
get isLeaf(): boolean;
|
|
46
|
+
get isInline(): boolean;
|
|
47
|
+
get isBlock(): boolean;
|
|
48
|
+
get border(): 0 | 1;
|
|
49
|
+
get nodeSize(): number;
|
|
50
|
+
get contentSize(): number;
|
|
51
|
+
get textContent(): string;
|
|
52
|
+
split(at: number): Node;
|
|
53
|
+
static instance(): typeof Node;
|
|
54
|
+
private static create;
|
|
55
|
+
copy(opts?: {
|
|
56
|
+
attrs?: Record<string, string>;
|
|
57
|
+
children?: (Node | TextNode)[];
|
|
58
|
+
marks?: Mark[];
|
|
59
|
+
}): Node;
|
|
60
|
+
replace(at: number, withNode: Node): Node;
|
|
61
|
+
cut(pos: number): Node | TextNode | null;
|
|
62
|
+
findIndex(pos: number): {
|
|
63
|
+
index: number;
|
|
64
|
+
offset: number;
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
declare class NoterefNode extends Node {
|
|
68
|
+
}
|
|
69
|
+
declare class FootnoteNode extends Node {
|
|
70
|
+
}
|
|
71
|
+
declare class Mark {
|
|
72
|
+
tagName: ElementName;
|
|
73
|
+
attrs: Record<string, string>;
|
|
74
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>);
|
|
75
|
+
eq(other: Mark | undefined): boolean;
|
|
76
|
+
}
|
|
77
|
+
declare class TextNode {
|
|
78
|
+
text: string;
|
|
79
|
+
marks: Mark[];
|
|
80
|
+
constructor(text: string, marks?: Mark[]);
|
|
81
|
+
isLeaf: boolean;
|
|
82
|
+
isInline: boolean;
|
|
83
|
+
isBlock: boolean;
|
|
84
|
+
border: number;
|
|
85
|
+
get nodeSize(): number;
|
|
86
|
+
get contentSize(): number;
|
|
87
|
+
get textContent(): string;
|
|
88
|
+
split(at: number): TextNode[];
|
|
89
|
+
copy(opts?: {
|
|
90
|
+
marks?: Mark[];
|
|
91
|
+
}): TextNode;
|
|
92
|
+
cut(pos: number): TextNode;
|
|
93
|
+
}
|
|
94
|
+
declare function descendants(node: Root | Node, cb: (node: Node | TextNode, pos: number, parent: Node | Root, index: number) => boolean, pos?: number): void;
|
|
95
|
+
|
|
96
|
+
export { FootnoteNode as F, Mark as M, Node as N, Root as R, TextNode as T, ResolvedPos as a, NoterefNode as b, descendants as d };
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { ElementName } from '@storyteller-platform/epub';
|
|
2
|
+
|
|
3
|
+
declare class ResolvedPos {
|
|
4
|
+
readonly pos: number;
|
|
5
|
+
readonly path: (Node | Root | number)[];
|
|
6
|
+
readonly parentOffset: number;
|
|
7
|
+
depth: number;
|
|
8
|
+
private constructor();
|
|
9
|
+
static resolve(doc: Root, pos: number): ResolvedPos;
|
|
10
|
+
get parent(): Root | Node;
|
|
11
|
+
node(depth?: number | null): Root | Node | TextNode;
|
|
12
|
+
index(depth?: number | null): number;
|
|
13
|
+
get nodeAfter(): Node | TextNode | null;
|
|
14
|
+
before(depth?: number | null): number;
|
|
15
|
+
private resolveDepth;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
declare class Root {
|
|
19
|
+
children: (Node | TextNode)[];
|
|
20
|
+
constructor(children: (Node | TextNode)[]);
|
|
21
|
+
isInline: boolean;
|
|
22
|
+
isBlock: boolean;
|
|
23
|
+
get border(): number;
|
|
24
|
+
get textContent(): string;
|
|
25
|
+
get nodeSize(): number;
|
|
26
|
+
get contentSize(): number;
|
|
27
|
+
split(at: number): Root;
|
|
28
|
+
copy(opts?: {
|
|
29
|
+
children?: (Node | TextNode)[];
|
|
30
|
+
}): Root;
|
|
31
|
+
findIndex(pos: number): {
|
|
32
|
+
index: number;
|
|
33
|
+
offset: number;
|
|
34
|
+
};
|
|
35
|
+
replace(at: number, withNode: Node): Root;
|
|
36
|
+
cut(pos: number): Node | TextNode | null;
|
|
37
|
+
resolve(pos: number): ResolvedPos;
|
|
38
|
+
}
|
|
39
|
+
declare class Node {
|
|
40
|
+
tagName: ElementName;
|
|
41
|
+
attrs: Record<string, string>;
|
|
42
|
+
children: (Node | TextNode)[];
|
|
43
|
+
marks: Mark[];
|
|
44
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>, children?: (Node | TextNode)[], marks?: Mark[]);
|
|
45
|
+
get isLeaf(): boolean;
|
|
46
|
+
get isInline(): boolean;
|
|
47
|
+
get isBlock(): boolean;
|
|
48
|
+
get border(): 0 | 1;
|
|
49
|
+
get nodeSize(): number;
|
|
50
|
+
get contentSize(): number;
|
|
51
|
+
get textContent(): string;
|
|
52
|
+
split(at: number): Node;
|
|
53
|
+
static instance(): typeof Node;
|
|
54
|
+
private static create;
|
|
55
|
+
copy(opts?: {
|
|
56
|
+
attrs?: Record<string, string>;
|
|
57
|
+
children?: (Node | TextNode)[];
|
|
58
|
+
marks?: Mark[];
|
|
59
|
+
}): Node;
|
|
60
|
+
replace(at: number, withNode: Node): Node;
|
|
61
|
+
cut(pos: number): Node | TextNode | null;
|
|
62
|
+
findIndex(pos: number): {
|
|
63
|
+
index: number;
|
|
64
|
+
offset: number;
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
declare class NoterefNode extends Node {
|
|
68
|
+
}
|
|
69
|
+
declare class FootnoteNode extends Node {
|
|
70
|
+
}
|
|
71
|
+
declare class Mark {
|
|
72
|
+
tagName: ElementName;
|
|
73
|
+
attrs: Record<string, string>;
|
|
74
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>);
|
|
75
|
+
eq(other: Mark | undefined): boolean;
|
|
76
|
+
}
|
|
77
|
+
declare class TextNode {
|
|
78
|
+
text: string;
|
|
79
|
+
marks: Mark[];
|
|
80
|
+
constructor(text: string, marks?: Mark[]);
|
|
81
|
+
isLeaf: boolean;
|
|
82
|
+
isInline: boolean;
|
|
83
|
+
isBlock: boolean;
|
|
84
|
+
border: number;
|
|
85
|
+
get nodeSize(): number;
|
|
86
|
+
get contentSize(): number;
|
|
87
|
+
get textContent(): string;
|
|
88
|
+
split(at: number): TextNode[];
|
|
89
|
+
copy(opts?: {
|
|
90
|
+
marks?: Mark[];
|
|
91
|
+
}): TextNode;
|
|
92
|
+
cut(pos: number): TextNode;
|
|
93
|
+
}
|
|
94
|
+
declare function descendants(node: Root | Node, cb: (node: Node | TextNode, pos: number, parent: Node | Root, index: number) => boolean, pos?: number): void;
|
|
95
|
+
|
|
96
|
+
export { FootnoteNode as F, Mark as M, Node as N, Root as R, TextNode as T, ResolvedPos as a, NoterefNode as b, descendants as d };
|
|
@@ -71,7 +71,9 @@ var import_promises = require("node:fs/promises");
|
|
|
71
71
|
var import_node_path = require("node:path");
|
|
72
72
|
var import_posix = require("node:path/posix");
|
|
73
73
|
var import_epub = require("@storyteller-platform/epub");
|
|
74
|
+
var import_parseDom = require("../markup/parseDom.cjs");
|
|
74
75
|
var import_segmentation = require("../markup/segmentation.cjs");
|
|
76
|
+
var import_transform = require("../markup/transform.cjs");
|
|
75
77
|
async function snapshotAlignment(epubPath, transcriptionsPath, outputPath) {
|
|
76
78
|
var _stack = [];
|
|
77
79
|
try {
|
|
@@ -117,12 +119,12 @@ async function createAlignmentSnapshot(epub, transcriptionFilepaths, textRef) {
|
|
|
117
119
|
"utf-8"
|
|
118
120
|
);
|
|
119
121
|
const chapterXml = import_epub.Epub.xhtmlParser.parse(chapterContents);
|
|
120
|
-
const
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
);
|
|
122
|
+
const original = (0, import_parseDom.parseDom)(import_epub.Epub.getXhtmlBody(chapterXml));
|
|
123
|
+
const inlined = (0, import_transform.inlineFootnotes)(original);
|
|
124
|
+
const lifted = (0, import_transform.liftText)(inlined.root);
|
|
125
|
+
const segmentation = await (0, import_segmentation.segmentChapter)(lifted.result, {
|
|
126
|
+
primaryLocale: new Intl.Locale("en-US")
|
|
127
|
+
});
|
|
126
128
|
let lastChapterSentence = -1;
|
|
127
129
|
const chapterSentences = segmentation.filter((s) => s.text.match(/\S/));
|
|
128
130
|
for (const par of import_epub.Epub.getXmlChildren(seq)) {
|
|
@@ -9,7 +9,9 @@ import {
|
|
|
9
9
|
extname as posixExtname
|
|
10
10
|
} from "node:path/posix";
|
|
11
11
|
import { Epub } from "@storyteller-platform/epub";
|
|
12
|
-
import {
|
|
12
|
+
import { parseDom } from "../markup/parseDom.js";
|
|
13
|
+
import { segmentChapter } from "../markup/segmentation.js";
|
|
14
|
+
import { inlineFootnotes, liftText } from "../markup/transform.js";
|
|
13
15
|
async function snapshotAlignment(epubPath, transcriptionsPath, outputPath) {
|
|
14
16
|
var _stack = [];
|
|
15
17
|
try {
|
|
@@ -55,12 +57,12 @@ async function createAlignmentSnapshot(epub, transcriptionFilepaths, textRef) {
|
|
|
55
57
|
"utf-8"
|
|
56
58
|
);
|
|
57
59
|
const chapterXml = Epub.xhtmlParser.parse(chapterContents);
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
);
|
|
60
|
+
const original = parseDom(Epub.getXhtmlBody(chapterXml));
|
|
61
|
+
const inlined = inlineFootnotes(original);
|
|
62
|
+
const lifted = liftText(inlined.root);
|
|
63
|
+
const segmentation = await segmentChapter(lifted.result, {
|
|
64
|
+
primaryLocale: new Intl.Locale("en-US")
|
|
65
|
+
});
|
|
64
66
|
let lastChapterSentence = -1;
|
|
65
67
|
const chapterSentences = segmentation.filter((s) => s.text.match(/\S/));
|
|
66
68
|
for (const par of Epub.getXmlChildren(seq)) {
|
package/package.json
CHANGED