@storyteller-platform/align 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +6 -5
- package/dist/align/__tests__/align.test.js +6 -5
- package/dist/align/align.cjs +133 -81
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +133 -81
- package/dist/align/getSentenceRanges.cjs +78 -149
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +78 -149
- package/dist/align/slugify.cjs +2 -0
- package/dist/align/slugify.js +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
- package/dist/errorAlign/__tests__/native.test.cjs +118 -0
- package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/native.test.js +107 -0
- package/dist/errorAlign/backtraceGraph.cjs +298 -0
- package/dist/errorAlign/backtraceGraph.d.cts +103 -0
- package/dist/errorAlign/backtraceGraph.d.ts +103 -0
- package/dist/errorAlign/backtraceGraph.js +270 -0
- package/dist/errorAlign/beamSearch.cjs +302 -0
- package/dist/errorAlign/beamSearch.d.cts +53 -0
- package/dist/errorAlign/beamSearch.d.ts +53 -0
- package/dist/errorAlign/beamSearch.js +268 -0
- package/dist/errorAlign/core.cjs +33 -0
- package/dist/errorAlign/core.d.cts +5 -0
- package/dist/errorAlign/core.d.ts +5 -0
- package/dist/errorAlign/core.js +11 -0
- package/dist/errorAlign/editDistance.cjs +115 -0
- package/dist/errorAlign/editDistance.d.cts +46 -0
- package/dist/errorAlign/editDistance.d.ts +46 -0
- package/dist/errorAlign/editDistance.js +90 -0
- package/dist/errorAlign/errorAlign.cjs +159 -0
- package/dist/errorAlign/errorAlign.d.cts +15 -0
- package/dist/errorAlign/errorAlign.d.ts +15 -0
- package/dist/errorAlign/errorAlign.js +145 -0
- package/dist/errorAlign/graphMetadata.cjs +97 -0
- package/dist/errorAlign/graphMetadata.d.cts +44 -0
- package/dist/errorAlign/graphMetadata.d.ts +44 -0
- package/dist/errorAlign/graphMetadata.js +64 -0
- package/dist/errorAlign/hash.cjs +173 -0
- package/dist/errorAlign/hash.d.cts +28 -0
- package/dist/errorAlign/hash.d.ts +28 -0
- package/dist/errorAlign/hash.js +150 -0
- package/dist/errorAlign/native.cjs +60 -0
- package/dist/errorAlign/native.d.cts +18 -0
- package/dist/errorAlign/native.d.ts +18 -0
- package/dist/errorAlign/native.js +24 -0
- package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
- package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
- package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
- package/dist/errorAlign/node-gyp-build.d.js +0 -0
- package/dist/errorAlign/pathToAlignment.cjs +122 -0
- package/dist/errorAlign/pathToAlignment.d.cts +11 -0
- package/dist/errorAlign/pathToAlignment.d.ts +11 -0
- package/dist/errorAlign/pathToAlignment.js +89 -0
- package/dist/errorAlign/utils.cjs +301 -0
- package/dist/errorAlign/utils.d.cts +107 -0
- package/dist/errorAlign/utils.d.ts +107 -0
- package/dist/errorAlign/utils.js +248 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/__tests__/markup.test.cjs +108 -81
- package/dist/markup/__tests__/markup.test.js +109 -82
- package/dist/markup/__tests__/parseDom.test.cjs +112 -0
- package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
- package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
- package/dist/markup/__tests__/parseDom.test.js +89 -0
- package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
- package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
- package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
- package/dist/markup/__tests__/serializeDom.test.js +97 -0
- package/dist/markup/__tests__/transform.test.cjs +122 -0
- package/dist/markup/__tests__/transform.test.d.cts +2 -0
- package/dist/markup/__tests__/transform.test.d.ts +2 -0
- package/dist/markup/__tests__/transform.test.js +99 -0
- package/dist/markup/map.cjs +261 -0
- package/dist/markup/map.d.cts +50 -0
- package/dist/markup/map.d.ts +50 -0
- package/dist/markup/map.js +236 -0
- package/dist/markup/markup.cjs +23 -201
- package/dist/markup/markup.d.cts +5 -9
- package/dist/markup/markup.d.ts +5 -9
- package/dist/markup/markup.js +24 -203
- package/dist/markup/model.cjs +172 -0
- package/dist/markup/model.d.cts +57 -0
- package/dist/markup/model.d.ts +57 -0
- package/dist/markup/model.js +145 -0
- package/dist/markup/parseDom.cjs +59 -0
- package/dist/markup/parseDom.d.cts +7 -0
- package/dist/markup/parseDom.d.ts +7 -0
- package/dist/markup/parseDom.js +35 -0
- package/dist/markup/segmentation.cjs +11 -57
- package/dist/markup/segmentation.d.cts +6 -2
- package/dist/markup/segmentation.d.ts +6 -2
- package/dist/markup/segmentation.js +11 -58
- package/dist/markup/serializeDom.cjs +87 -0
- package/dist/markup/serializeDom.d.cts +7 -0
- package/dist/markup/serializeDom.d.ts +7 -0
- package/dist/markup/serializeDom.js +63 -0
- package/dist/markup/transform.cjs +92 -0
- package/dist/markup/transform.d.cts +11 -0
- package/dist/markup/transform.d.ts +11 -0
- package/dist/markup/transform.js +71 -0
- package/dist/types/node-gyp-build.d.cjs +1 -0
- package/dist/types/node-gyp-build.d.d.cts +3 -0
- package/dist/types/node-gyp-build.d.d.ts +3 -0
- package/dist/types/node-gyp-build.d.js +0 -0
- package/package.json +11 -4
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var model_exports = {};
|
|
20
|
+
__export(model_exports, {
|
|
21
|
+
Mark: () => Mark,
|
|
22
|
+
Node: () => Node,
|
|
23
|
+
Root: () => Root,
|
|
24
|
+
TextNode: () => TextNode,
|
|
25
|
+
descendants: () => descendants
|
|
26
|
+
});
|
|
27
|
+
module.exports = __toCommonJS(model_exports);
|
|
28
|
+
var import_itertools = require("itertools");
|
|
29
|
+
var import_semantics = require("./semantics.cjs");
|
|
30
|
+
class Root {
|
|
31
|
+
constructor(children) {
|
|
32
|
+
this.children = children;
|
|
33
|
+
}
|
|
34
|
+
isInline = false;
|
|
35
|
+
isBlock = true;
|
|
36
|
+
get border() {
|
|
37
|
+
return 0;
|
|
38
|
+
}
|
|
39
|
+
get textContent() {
|
|
40
|
+
return this.children.reduce((acc, child) => acc + child.textContent, "");
|
|
41
|
+
}
|
|
42
|
+
split(at) {
|
|
43
|
+
const children = [];
|
|
44
|
+
let pos = this.border;
|
|
45
|
+
for (const child of this.children) {
|
|
46
|
+
if (at > pos && at < pos + child.nodeSize) {
|
|
47
|
+
children.push(
|
|
48
|
+
...child instanceof TextNode ? child.split(at - pos) : [child.split(at - pos)]
|
|
49
|
+
);
|
|
50
|
+
} else {
|
|
51
|
+
children.push(child);
|
|
52
|
+
}
|
|
53
|
+
pos += child.nodeSize;
|
|
54
|
+
}
|
|
55
|
+
return this.copy({ children });
|
|
56
|
+
}
|
|
57
|
+
copy(opts = {}) {
|
|
58
|
+
return new Root(opts.children ?? this.children);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
class Node {
|
|
62
|
+
constructor(tagName, attrs = {}, children = [], marks = []) {
|
|
63
|
+
this.tagName = tagName;
|
|
64
|
+
this.attrs = attrs;
|
|
65
|
+
this.children = children;
|
|
66
|
+
this.marks = marks;
|
|
67
|
+
}
|
|
68
|
+
get isLeaf() {
|
|
69
|
+
return !this.children.length;
|
|
70
|
+
}
|
|
71
|
+
get isInline() {
|
|
72
|
+
return !this.isBlock;
|
|
73
|
+
}
|
|
74
|
+
get isBlock() {
|
|
75
|
+
return import_semantics.BLOCKS.includes(this.tagName);
|
|
76
|
+
}
|
|
77
|
+
get border() {
|
|
78
|
+
return this.isLeaf ? 0 : 1;
|
|
79
|
+
}
|
|
80
|
+
get nodeSize() {
|
|
81
|
+
return this.border + (this.children.reduce((acc, child) => acc + child.nodeSize, 0) || 1) + this.border;
|
|
82
|
+
}
|
|
83
|
+
get textContent() {
|
|
84
|
+
return this.children.reduce((acc, child) => acc + child.textContent, "");
|
|
85
|
+
}
|
|
86
|
+
split(at) {
|
|
87
|
+
if (at === this.border) return this;
|
|
88
|
+
if (at === this.nodeSize - this.border) return this;
|
|
89
|
+
const children = [];
|
|
90
|
+
let pos = this.border;
|
|
91
|
+
for (const child of this.children) {
|
|
92
|
+
if (at > pos && at < pos + child.nodeSize) {
|
|
93
|
+
if (child instanceof TextNode) {
|
|
94
|
+
children.push(...child.split(at - pos));
|
|
95
|
+
} else {
|
|
96
|
+
children.push(child.split(at - pos));
|
|
97
|
+
}
|
|
98
|
+
} else {
|
|
99
|
+
children.push(child);
|
|
100
|
+
}
|
|
101
|
+
pos += child.nodeSize;
|
|
102
|
+
}
|
|
103
|
+
return this.copy({ children });
|
|
104
|
+
}
|
|
105
|
+
copy(opts = {}) {
|
|
106
|
+
return new Node(
|
|
107
|
+
this.tagName,
|
|
108
|
+
opts.attrs ?? this.attrs,
|
|
109
|
+
opts.children ?? this.children,
|
|
110
|
+
opts.marks ?? this.marks
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
class Mark {
|
|
115
|
+
constructor(tagName, attrs = {}) {
|
|
116
|
+
this.tagName = tagName;
|
|
117
|
+
this.attrs = attrs;
|
|
118
|
+
}
|
|
119
|
+
eq(other) {
|
|
120
|
+
if (!other) return false;
|
|
121
|
+
if (Object.keys(this.attrs).length !== Object.keys(other.attrs).length)
|
|
122
|
+
return false;
|
|
123
|
+
for (const [key, value] of Object.entries(this.attrs)) {
|
|
124
|
+
if (other.attrs[key] !== value) return false;
|
|
125
|
+
}
|
|
126
|
+
return this.tagName === other.tagName;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
class TextNode {
|
|
130
|
+
constructor(text, marks = []) {
|
|
131
|
+
this.text = text;
|
|
132
|
+
this.marks = marks;
|
|
133
|
+
}
|
|
134
|
+
isLeaf = true;
|
|
135
|
+
isInline = true;
|
|
136
|
+
isBlock = false;
|
|
137
|
+
border = 0;
|
|
138
|
+
get nodeSize() {
|
|
139
|
+
return this.text.length;
|
|
140
|
+
}
|
|
141
|
+
get textContent() {
|
|
142
|
+
return this.text;
|
|
143
|
+
}
|
|
144
|
+
split(at) {
|
|
145
|
+
if (at === 0) return [this];
|
|
146
|
+
if (at === this.text.length) return [this];
|
|
147
|
+
return [
|
|
148
|
+
new TextNode(this.text.slice(0, at), this.marks),
|
|
149
|
+
new TextNode(this.text.slice(at), this.marks)
|
|
150
|
+
];
|
|
151
|
+
}
|
|
152
|
+
copy(opts = {}) {
|
|
153
|
+
return new TextNode(this.text, opts.marks ?? this.marks);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
function descendants(root, cb, pos = 0) {
|
|
157
|
+
for (const [i, child] of (0, import_itertools.enumerate)(root.children)) {
|
|
158
|
+
const descend = cb(child, pos, root, i);
|
|
159
|
+
if (descend && !child.isLeaf) {
|
|
160
|
+
descendants(child, cb, pos + child.border);
|
|
161
|
+
}
|
|
162
|
+
pos += child.nodeSize;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
166
|
+
0 && (module.exports = {
|
|
167
|
+
Mark,
|
|
168
|
+
Node,
|
|
169
|
+
Root,
|
|
170
|
+
TextNode,
|
|
171
|
+
descendants
|
|
172
|
+
});
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { ElementName } from '@storyteller-platform/epub';
|
|
2
|
+
|
|
3
|
+
declare class Root {
|
|
4
|
+
children: (Node | TextNode)[];
|
|
5
|
+
constructor(children: (Node | TextNode)[]);
|
|
6
|
+
isInline: boolean;
|
|
7
|
+
isBlock: boolean;
|
|
8
|
+
get border(): number;
|
|
9
|
+
get textContent(): string;
|
|
10
|
+
split(at: number): Root;
|
|
11
|
+
copy(opts?: {
|
|
12
|
+
children?: (Node | TextNode)[];
|
|
13
|
+
}): Root;
|
|
14
|
+
}
|
|
15
|
+
declare class Node {
|
|
16
|
+
tagName: ElementName;
|
|
17
|
+
attrs: Record<string, string>;
|
|
18
|
+
children: (Node | TextNode)[];
|
|
19
|
+
marks: Mark[];
|
|
20
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>, children?: (Node | TextNode)[], marks?: Mark[]);
|
|
21
|
+
get isLeaf(): boolean;
|
|
22
|
+
get isInline(): boolean;
|
|
23
|
+
get isBlock(): boolean;
|
|
24
|
+
get border(): 0 | 1;
|
|
25
|
+
get nodeSize(): number;
|
|
26
|
+
get textContent(): string;
|
|
27
|
+
split(at: number): Node;
|
|
28
|
+
copy(opts?: {
|
|
29
|
+
attrs?: Record<string, string>;
|
|
30
|
+
children?: (Node | TextNode)[];
|
|
31
|
+
marks?: Mark[];
|
|
32
|
+
}): Node;
|
|
33
|
+
}
|
|
34
|
+
declare class Mark {
|
|
35
|
+
tagName: ElementName;
|
|
36
|
+
attrs: Record<string, string>;
|
|
37
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>);
|
|
38
|
+
eq(other: Mark | undefined): boolean;
|
|
39
|
+
}
|
|
40
|
+
declare class TextNode {
|
|
41
|
+
text: string;
|
|
42
|
+
marks: Mark[];
|
|
43
|
+
constructor(text: string, marks?: Mark[]);
|
|
44
|
+
isLeaf: boolean;
|
|
45
|
+
isInline: boolean;
|
|
46
|
+
isBlock: boolean;
|
|
47
|
+
border: number;
|
|
48
|
+
get nodeSize(): number;
|
|
49
|
+
get textContent(): string;
|
|
50
|
+
split(at: number): TextNode[];
|
|
51
|
+
copy(opts?: {
|
|
52
|
+
marks?: Mark[];
|
|
53
|
+
}): TextNode;
|
|
54
|
+
}
|
|
55
|
+
declare function descendants(root: Root | Node, cb: (node: Node | TextNode, pos: number, parent: Node | Root, index: number) => boolean, pos?: number): void;
|
|
56
|
+
|
|
57
|
+
export { Mark, Node, Root, TextNode, descendants };
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { ElementName } from '@storyteller-platform/epub';
|
|
2
|
+
|
|
3
|
+
declare class Root {
|
|
4
|
+
children: (Node | TextNode)[];
|
|
5
|
+
constructor(children: (Node | TextNode)[]);
|
|
6
|
+
isInline: boolean;
|
|
7
|
+
isBlock: boolean;
|
|
8
|
+
get border(): number;
|
|
9
|
+
get textContent(): string;
|
|
10
|
+
split(at: number): Root;
|
|
11
|
+
copy(opts?: {
|
|
12
|
+
children?: (Node | TextNode)[];
|
|
13
|
+
}): Root;
|
|
14
|
+
}
|
|
15
|
+
declare class Node {
|
|
16
|
+
tagName: ElementName;
|
|
17
|
+
attrs: Record<string, string>;
|
|
18
|
+
children: (Node | TextNode)[];
|
|
19
|
+
marks: Mark[];
|
|
20
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>, children?: (Node | TextNode)[], marks?: Mark[]);
|
|
21
|
+
get isLeaf(): boolean;
|
|
22
|
+
get isInline(): boolean;
|
|
23
|
+
get isBlock(): boolean;
|
|
24
|
+
get border(): 0 | 1;
|
|
25
|
+
get nodeSize(): number;
|
|
26
|
+
get textContent(): string;
|
|
27
|
+
split(at: number): Node;
|
|
28
|
+
copy(opts?: {
|
|
29
|
+
attrs?: Record<string, string>;
|
|
30
|
+
children?: (Node | TextNode)[];
|
|
31
|
+
marks?: Mark[];
|
|
32
|
+
}): Node;
|
|
33
|
+
}
|
|
34
|
+
declare class Mark {
|
|
35
|
+
tagName: ElementName;
|
|
36
|
+
attrs: Record<string, string>;
|
|
37
|
+
constructor(tagName: ElementName, attrs?: Record<string, string>);
|
|
38
|
+
eq(other: Mark | undefined): boolean;
|
|
39
|
+
}
|
|
40
|
+
declare class TextNode {
|
|
41
|
+
text: string;
|
|
42
|
+
marks: Mark[];
|
|
43
|
+
constructor(text: string, marks?: Mark[]);
|
|
44
|
+
isLeaf: boolean;
|
|
45
|
+
isInline: boolean;
|
|
46
|
+
isBlock: boolean;
|
|
47
|
+
border: number;
|
|
48
|
+
get nodeSize(): number;
|
|
49
|
+
get textContent(): string;
|
|
50
|
+
split(at: number): TextNode[];
|
|
51
|
+
copy(opts?: {
|
|
52
|
+
marks?: Mark[];
|
|
53
|
+
}): TextNode;
|
|
54
|
+
}
|
|
55
|
+
declare function descendants(root: Root | Node, cb: (node: Node | TextNode, pos: number, parent: Node | Root, index: number) => boolean, pos?: number): void;
|
|
56
|
+
|
|
57
|
+
export { Mark, Node, Root, TextNode, descendants };
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import { enumerate } from "itertools";
|
|
3
|
+
import { BLOCKS } from "./semantics.js";
|
|
4
|
+
class Root {
|
|
5
|
+
constructor(children) {
|
|
6
|
+
this.children = children;
|
|
7
|
+
}
|
|
8
|
+
isInline = false;
|
|
9
|
+
isBlock = true;
|
|
10
|
+
get border() {
|
|
11
|
+
return 0;
|
|
12
|
+
}
|
|
13
|
+
get textContent() {
|
|
14
|
+
return this.children.reduce((acc, child) => acc + child.textContent, "");
|
|
15
|
+
}
|
|
16
|
+
split(at) {
|
|
17
|
+
const children = [];
|
|
18
|
+
let pos = this.border;
|
|
19
|
+
for (const child of this.children) {
|
|
20
|
+
if (at > pos && at < pos + child.nodeSize) {
|
|
21
|
+
children.push(
|
|
22
|
+
...child instanceof TextNode ? child.split(at - pos) : [child.split(at - pos)]
|
|
23
|
+
);
|
|
24
|
+
} else {
|
|
25
|
+
children.push(child);
|
|
26
|
+
}
|
|
27
|
+
pos += child.nodeSize;
|
|
28
|
+
}
|
|
29
|
+
return this.copy({ children });
|
|
30
|
+
}
|
|
31
|
+
copy(opts = {}) {
|
|
32
|
+
return new Root(opts.children ?? this.children);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
class Node {
|
|
36
|
+
constructor(tagName, attrs = {}, children = [], marks = []) {
|
|
37
|
+
this.tagName = tagName;
|
|
38
|
+
this.attrs = attrs;
|
|
39
|
+
this.children = children;
|
|
40
|
+
this.marks = marks;
|
|
41
|
+
}
|
|
42
|
+
get isLeaf() {
|
|
43
|
+
return !this.children.length;
|
|
44
|
+
}
|
|
45
|
+
get isInline() {
|
|
46
|
+
return !this.isBlock;
|
|
47
|
+
}
|
|
48
|
+
get isBlock() {
|
|
49
|
+
return BLOCKS.includes(this.tagName);
|
|
50
|
+
}
|
|
51
|
+
get border() {
|
|
52
|
+
return this.isLeaf ? 0 : 1;
|
|
53
|
+
}
|
|
54
|
+
get nodeSize() {
|
|
55
|
+
return this.border + (this.children.reduce((acc, child) => acc + child.nodeSize, 0) || 1) + this.border;
|
|
56
|
+
}
|
|
57
|
+
get textContent() {
|
|
58
|
+
return this.children.reduce((acc, child) => acc + child.textContent, "");
|
|
59
|
+
}
|
|
60
|
+
split(at) {
|
|
61
|
+
if (at === this.border) return this;
|
|
62
|
+
if (at === this.nodeSize - this.border) return this;
|
|
63
|
+
const children = [];
|
|
64
|
+
let pos = this.border;
|
|
65
|
+
for (const child of this.children) {
|
|
66
|
+
if (at > pos && at < pos + child.nodeSize) {
|
|
67
|
+
if (child instanceof TextNode) {
|
|
68
|
+
children.push(...child.split(at - pos));
|
|
69
|
+
} else {
|
|
70
|
+
children.push(child.split(at - pos));
|
|
71
|
+
}
|
|
72
|
+
} else {
|
|
73
|
+
children.push(child);
|
|
74
|
+
}
|
|
75
|
+
pos += child.nodeSize;
|
|
76
|
+
}
|
|
77
|
+
return this.copy({ children });
|
|
78
|
+
}
|
|
79
|
+
copy(opts = {}) {
|
|
80
|
+
return new Node(
|
|
81
|
+
this.tagName,
|
|
82
|
+
opts.attrs ?? this.attrs,
|
|
83
|
+
opts.children ?? this.children,
|
|
84
|
+
opts.marks ?? this.marks
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
class Mark {
|
|
89
|
+
constructor(tagName, attrs = {}) {
|
|
90
|
+
this.tagName = tagName;
|
|
91
|
+
this.attrs = attrs;
|
|
92
|
+
}
|
|
93
|
+
eq(other) {
|
|
94
|
+
if (!other) return false;
|
|
95
|
+
if (Object.keys(this.attrs).length !== Object.keys(other.attrs).length)
|
|
96
|
+
return false;
|
|
97
|
+
for (const [key, value] of Object.entries(this.attrs)) {
|
|
98
|
+
if (other.attrs[key] !== value) return false;
|
|
99
|
+
}
|
|
100
|
+
return this.tagName === other.tagName;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
class TextNode {
|
|
104
|
+
constructor(text, marks = []) {
|
|
105
|
+
this.text = text;
|
|
106
|
+
this.marks = marks;
|
|
107
|
+
}
|
|
108
|
+
isLeaf = true;
|
|
109
|
+
isInline = true;
|
|
110
|
+
isBlock = false;
|
|
111
|
+
border = 0;
|
|
112
|
+
get nodeSize() {
|
|
113
|
+
return this.text.length;
|
|
114
|
+
}
|
|
115
|
+
get textContent() {
|
|
116
|
+
return this.text;
|
|
117
|
+
}
|
|
118
|
+
split(at) {
|
|
119
|
+
if (at === 0) return [this];
|
|
120
|
+
if (at === this.text.length) return [this];
|
|
121
|
+
return [
|
|
122
|
+
new TextNode(this.text.slice(0, at), this.marks),
|
|
123
|
+
new TextNode(this.text.slice(at), this.marks)
|
|
124
|
+
];
|
|
125
|
+
}
|
|
126
|
+
copy(opts = {}) {
|
|
127
|
+
return new TextNode(this.text, opts.marks ?? this.marks);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
function descendants(root, cb, pos = 0) {
|
|
131
|
+
for (const [i, child] of enumerate(root.children)) {
|
|
132
|
+
const descend = cb(child, pos, root, i);
|
|
133
|
+
if (descend && !child.isLeaf) {
|
|
134
|
+
descendants(child, cb, pos + child.border);
|
|
135
|
+
}
|
|
136
|
+
pos += child.nodeSize;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
export {
|
|
140
|
+
Mark,
|
|
141
|
+
Node,
|
|
142
|
+
Root,
|
|
143
|
+
TextNode,
|
|
144
|
+
descendants
|
|
145
|
+
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var parseDom_exports = {};
|
|
20
|
+
__export(parseDom_exports, {
|
|
21
|
+
parseDom: () => parseDom,
|
|
22
|
+
parseDomNode: () => parseDomNode
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(parseDom_exports);
|
|
25
|
+
var import_epub = require("@storyteller-platform/epub");
|
|
26
|
+
var import_model = require("./model.cjs");
|
|
27
|
+
var import_semantics = require("./semantics.cjs");
|
|
28
|
+
function parseDom(xml) {
|
|
29
|
+
const children = xml.flatMap((node) => parseDomNode(node));
|
|
30
|
+
return new import_model.Root(children);
|
|
31
|
+
}
|
|
32
|
+
function parseDomNode(xmlNode, marks) {
|
|
33
|
+
if (import_epub.Epub.isXmlTextNode(xmlNode)) {
|
|
34
|
+
return new import_model.TextNode(xmlNode["#text"], marks);
|
|
35
|
+
}
|
|
36
|
+
const tagName = import_epub.Epub.getXmlElementName(xmlNode);
|
|
37
|
+
if (import_semantics.BLOCKS.includes(tagName)) {
|
|
38
|
+
return new import_model.Node(
|
|
39
|
+
tagName,
|
|
40
|
+
import_epub.Epub.getXmlAttributes(xmlNode),
|
|
41
|
+
import_epub.Epub.getXmlChildren(xmlNode).flatMap((child) => parseDomNode(child)),
|
|
42
|
+
marks
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
if (!import_epub.Epub.getXmlChildren(xmlNode).length) {
|
|
46
|
+
return new import_model.Node(tagName, import_epub.Epub.getXmlAttributes(xmlNode), [], marks);
|
|
47
|
+
}
|
|
48
|
+
return import_epub.Epub.getXmlChildren(xmlNode).flatMap(
|
|
49
|
+
(child) => parseDomNode(child, [
|
|
50
|
+
...marks ?? [],
|
|
51
|
+
new import_model.Mark(tagName, import_epub.Epub.getXmlAttributes(xmlNode))
|
|
52
|
+
])
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
56
|
+
0 && (module.exports = {
|
|
57
|
+
parseDom,
|
|
58
|
+
parseDomNode
|
|
59
|
+
});
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ParsedXml, XmlNode } from '@storyteller-platform/epub';
|
|
2
|
+
import { Root, Mark, Node, TextNode } from './model.cjs';
|
|
3
|
+
|
|
4
|
+
declare function parseDom(xml: ParsedXml): Root;
|
|
5
|
+
declare function parseDomNode(xmlNode: XmlNode, marks?: Mark[]): Node | TextNode | (Node | TextNode)[];
|
|
6
|
+
|
|
7
|
+
export { parseDom, parseDomNode };
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ParsedXml, XmlNode } from '@storyteller-platform/epub';
|
|
2
|
+
import { Root, Mark, Node, TextNode } from './model.js';
|
|
3
|
+
|
|
4
|
+
declare function parseDom(xml: ParsedXml): Root;
|
|
5
|
+
declare function parseDomNode(xmlNode: XmlNode, marks?: Mark[]): Node | TextNode | (Node | TextNode)[];
|
|
6
|
+
|
|
7
|
+
export { parseDom, parseDomNode };
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import { Epub } from "@storyteller-platform/epub";
|
|
3
|
+
import { Mark, Node, Root, TextNode } from "./model.js";
|
|
4
|
+
import { BLOCKS } from "./semantics.js";
|
|
5
|
+
function parseDom(xml) {
|
|
6
|
+
const children = xml.flatMap((node) => parseDomNode(node));
|
|
7
|
+
return new Root(children);
|
|
8
|
+
}
|
|
9
|
+
function parseDomNode(xmlNode, marks) {
|
|
10
|
+
if (Epub.isXmlTextNode(xmlNode)) {
|
|
11
|
+
return new TextNode(xmlNode["#text"], marks);
|
|
12
|
+
}
|
|
13
|
+
const tagName = Epub.getXmlElementName(xmlNode);
|
|
14
|
+
if (BLOCKS.includes(tagName)) {
|
|
15
|
+
return new Node(
|
|
16
|
+
tagName,
|
|
17
|
+
Epub.getXmlAttributes(xmlNode),
|
|
18
|
+
Epub.getXmlChildren(xmlNode).flatMap((child) => parseDomNode(child)),
|
|
19
|
+
marks
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
if (!Epub.getXmlChildren(xmlNode).length) {
|
|
23
|
+
return new Node(tagName, Epub.getXmlAttributes(xmlNode), [], marks);
|
|
24
|
+
}
|
|
25
|
+
return Epub.getXmlChildren(xmlNode).flatMap(
|
|
26
|
+
(child) => parseDomNode(child, [
|
|
27
|
+
...marks ?? [],
|
|
28
|
+
new Mark(tagName, Epub.getXmlAttributes(xmlNode))
|
|
29
|
+
])
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
export {
|
|
33
|
+
parseDom,
|
|
34
|
+
parseDomNode
|
|
35
|
+
};
|
|
@@ -22,64 +22,18 @@ __export(segmentation_exports, {
|
|
|
22
22
|
});
|
|
23
23
|
module.exports = __toCommonJS(segmentation_exports);
|
|
24
24
|
var import_text_segmentation = require("@echogarden/text-segmentation");
|
|
25
|
-
var
|
|
26
|
-
var
|
|
25
|
+
var import_parseDom = require("./parseDom.cjs");
|
|
26
|
+
var import_transform = require("./transform.cjs");
|
|
27
27
|
async function getXhtmlSegmentation(xml, options) {
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
continue;
|
|
38
|
-
}
|
|
39
|
-
const childName = import_epub.Epub.getXmlElementName(child);
|
|
40
|
-
if (!import_semantics.BLOCKS.includes(childName)) {
|
|
41
|
-
stagedText += import_epub.Epub.getXhtmlTextContent(import_epub.Epub.getXmlChildren(child));
|
|
42
|
-
continue;
|
|
43
|
-
}
|
|
44
|
-
mergeSegmentations(
|
|
45
|
-
result,
|
|
46
|
-
await (0, import_text_segmentation.segmentText)(collapseWhitespace(stagedText), {
|
|
47
|
-
...options.primaryLocale && {
|
|
48
|
-
language: options.primaryLocale.language
|
|
49
|
-
},
|
|
50
|
-
enableEastAsianPostprocessing: true
|
|
51
|
-
})
|
|
52
|
-
);
|
|
53
|
-
stagedText = "";
|
|
54
|
-
mergeSegmentations(
|
|
55
|
-
result,
|
|
56
|
-
await getXhtmlSegmentation(import_epub.Epub.getXmlChildren(child), options)
|
|
57
|
-
);
|
|
58
|
-
}
|
|
59
|
-
mergeSegmentations(
|
|
60
|
-
result,
|
|
61
|
-
await (0, import_text_segmentation.segmentText)(collapseWhitespace(stagedText), {
|
|
62
|
-
...options.primaryLocale && {
|
|
63
|
-
language: options.primaryLocale.language
|
|
64
|
-
},
|
|
65
|
-
enableEastAsianPostprocessing: true
|
|
66
|
-
})
|
|
67
|
-
);
|
|
68
|
-
return result;
|
|
69
|
-
}
|
|
70
|
-
function collapseWhitespace(text) {
|
|
71
|
-
return text.replace(/^\s*/, "").replace(/\s*$/, "").replaceAll(/\s+/g, " ");
|
|
72
|
-
}
|
|
73
|
-
function mergeSegmentations(first, second) {
|
|
74
|
-
for (const wordEntry of second.words.entries) {
|
|
75
|
-
first.words.addWord(
|
|
76
|
-
wordEntry.text,
|
|
77
|
-
wordEntry.startOffset,
|
|
78
|
-
wordEntry.isPunctuation
|
|
79
|
-
);
|
|
80
|
-
}
|
|
81
|
-
first.sentences.push(...second.sentences);
|
|
82
|
-
first.segmentSentenceRanges.push(...second.segmentSentenceRanges);
|
|
28
|
+
const root = (0, import_parseDom.parseDom)(xml);
|
|
29
|
+
const { result: text, mapping } = (0, import_transform.liftText)(root);
|
|
30
|
+
const result = await (0, import_text_segmentation.segmentText)(text, {
|
|
31
|
+
...options.primaryLocale && {
|
|
32
|
+
language: options.primaryLocale.language
|
|
33
|
+
},
|
|
34
|
+
enableEastAsianPostprocessing: true
|
|
35
|
+
});
|
|
36
|
+
return { result: result.sentences, mapping };
|
|
83
37
|
}
|
|
84
38
|
// Annotate the CommonJS export names for ESM import in node:
|
|
85
39
|
0 && (module.exports = {
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { SegmentationResult } from '@echogarden/text-segmentation';
|
|
2
2
|
import { ParsedXml } from '@storyteller-platform/epub';
|
|
3
|
+
import { Mapping } from './map.cjs';
|
|
3
4
|
|
|
4
5
|
declare function getXhtmlSegmentation(xml: ParsedXml, options: {
|
|
5
|
-
primaryLocale?: Intl.Locale | null;
|
|
6
|
-
}): Promise<
|
|
6
|
+
primaryLocale?: Intl.Locale | null | undefined;
|
|
7
|
+
}): Promise<{
|
|
8
|
+
result: SegmentationResult["sentences"];
|
|
9
|
+
mapping: Mapping;
|
|
10
|
+
}>;
|
|
7
11
|
|
|
8
12
|
export { getXhtmlSegmentation };
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { SegmentationResult } from '@echogarden/text-segmentation';
|
|
2
2
|
import { ParsedXml } from '@storyteller-platform/epub';
|
|
3
|
+
import { Mapping } from './map.js';
|
|
3
4
|
|
|
4
5
|
declare function getXhtmlSegmentation(xml: ParsedXml, options: {
|
|
5
|
-
primaryLocale?: Intl.Locale | null;
|
|
6
|
-
}): Promise<
|
|
6
|
+
primaryLocale?: Intl.Locale | null | undefined;
|
|
7
|
+
}): Promise<{
|
|
8
|
+
result: SegmentationResult["sentences"];
|
|
9
|
+
mapping: Mapping;
|
|
10
|
+
}>;
|
|
7
11
|
|
|
8
12
|
export { getXhtmlSegmentation };
|