@storyteller-platform/align 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +6 -5
- package/dist/align/__tests__/align.test.js +6 -5
- package/dist/align/align.cjs +133 -81
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +133 -81
- package/dist/align/getSentenceRanges.cjs +78 -149
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +78 -149
- package/dist/align/slugify.cjs +2 -0
- package/dist/align/slugify.js +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
- package/dist/errorAlign/__tests__/native.test.cjs +118 -0
- package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/native.test.js +107 -0
- package/dist/errorAlign/backtraceGraph.cjs +298 -0
- package/dist/errorAlign/backtraceGraph.d.cts +103 -0
- package/dist/errorAlign/backtraceGraph.d.ts +103 -0
- package/dist/errorAlign/backtraceGraph.js +270 -0
- package/dist/errorAlign/beamSearch.cjs +302 -0
- package/dist/errorAlign/beamSearch.d.cts +53 -0
- package/dist/errorAlign/beamSearch.d.ts +53 -0
- package/dist/errorAlign/beamSearch.js +268 -0
- package/dist/errorAlign/core.cjs +33 -0
- package/dist/errorAlign/core.d.cts +5 -0
- package/dist/errorAlign/core.d.ts +5 -0
- package/dist/errorAlign/core.js +11 -0
- package/dist/errorAlign/editDistance.cjs +115 -0
- package/dist/errorAlign/editDistance.d.cts +46 -0
- package/dist/errorAlign/editDistance.d.ts +46 -0
- package/dist/errorAlign/editDistance.js +90 -0
- package/dist/errorAlign/errorAlign.cjs +159 -0
- package/dist/errorAlign/errorAlign.d.cts +15 -0
- package/dist/errorAlign/errorAlign.d.ts +15 -0
- package/dist/errorAlign/errorAlign.js +145 -0
- package/dist/errorAlign/graphMetadata.cjs +97 -0
- package/dist/errorAlign/graphMetadata.d.cts +44 -0
- package/dist/errorAlign/graphMetadata.d.ts +44 -0
- package/dist/errorAlign/graphMetadata.js +64 -0
- package/dist/errorAlign/hash.cjs +173 -0
- package/dist/errorAlign/hash.d.cts +28 -0
- package/dist/errorAlign/hash.d.ts +28 -0
- package/dist/errorAlign/hash.js +150 -0
- package/dist/errorAlign/native.cjs +60 -0
- package/dist/errorAlign/native.d.cts +18 -0
- package/dist/errorAlign/native.d.ts +18 -0
- package/dist/errorAlign/native.js +24 -0
- package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
- package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
- package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
- package/dist/errorAlign/node-gyp-build.d.js +0 -0
- package/dist/errorAlign/pathToAlignment.cjs +122 -0
- package/dist/errorAlign/pathToAlignment.d.cts +11 -0
- package/dist/errorAlign/pathToAlignment.d.ts +11 -0
- package/dist/errorAlign/pathToAlignment.js +89 -0
- package/dist/errorAlign/utils.cjs +301 -0
- package/dist/errorAlign/utils.d.cts +107 -0
- package/dist/errorAlign/utils.d.ts +107 -0
- package/dist/errorAlign/utils.js +248 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/__tests__/markup.test.cjs +108 -81
- package/dist/markup/__tests__/markup.test.js +109 -82
- package/dist/markup/__tests__/parseDom.test.cjs +112 -0
- package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
- package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
- package/dist/markup/__tests__/parseDom.test.js +89 -0
- package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
- package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
- package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
- package/dist/markup/__tests__/serializeDom.test.js +97 -0
- package/dist/markup/__tests__/transform.test.cjs +122 -0
- package/dist/markup/__tests__/transform.test.d.cts +2 -0
- package/dist/markup/__tests__/transform.test.d.ts +2 -0
- package/dist/markup/__tests__/transform.test.js +99 -0
- package/dist/markup/map.cjs +261 -0
- package/dist/markup/map.d.cts +50 -0
- package/dist/markup/map.d.ts +50 -0
- package/dist/markup/map.js +236 -0
- package/dist/markup/markup.cjs +23 -201
- package/dist/markup/markup.d.cts +5 -9
- package/dist/markup/markup.d.ts +5 -9
- package/dist/markup/markup.js +24 -203
- package/dist/markup/model.cjs +172 -0
- package/dist/markup/model.d.cts +57 -0
- package/dist/markup/model.d.ts +57 -0
- package/dist/markup/model.js +145 -0
- package/dist/markup/parseDom.cjs +59 -0
- package/dist/markup/parseDom.d.cts +7 -0
- package/dist/markup/parseDom.d.ts +7 -0
- package/dist/markup/parseDom.js +35 -0
- package/dist/markup/segmentation.cjs +11 -57
- package/dist/markup/segmentation.d.cts +6 -2
- package/dist/markup/segmentation.d.ts +6 -2
- package/dist/markup/segmentation.js +11 -58
- package/dist/markup/serializeDom.cjs +87 -0
- package/dist/markup/serializeDom.d.cts +7 -0
- package/dist/markup/serializeDom.d.ts +7 -0
- package/dist/markup/serializeDom.js +63 -0
- package/dist/markup/transform.cjs +92 -0
- package/dist/markup/transform.d.cts +11 -0
- package/dist/markup/transform.d.ts +11 -0
- package/dist/markup/transform.js +71 -0
- package/dist/types/node-gyp-build.d.cjs +1 -0
- package/dist/types/node-gyp-build.d.d.cts +3 -0
- package/dist/types/node-gyp-build.d.d.ts +3 -0
- package/dist/types/node-gyp-build.d.js +0 -0
- package/package.json +11 -4
|
@@ -1,66 +1,19 @@
|
|
|
1
1
|
import "../chunk-BIEQXUOY.js";
|
|
2
2
|
import {
|
|
3
|
-
WordSequence,
|
|
4
3
|
segmentText
|
|
5
4
|
} from "@echogarden/text-segmentation";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
5
|
+
import { parseDom } from "./parseDom.js";
|
|
6
|
+
import { liftText } from "./transform.js";
|
|
8
7
|
async function getXhtmlSegmentation(xml, options) {
|
|
9
|
-
const
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
continue;
|
|
19
|
-
}
|
|
20
|
-
const childName = Epub.getXmlElementName(child);
|
|
21
|
-
if (!BLOCKS.includes(childName)) {
|
|
22
|
-
stagedText += Epub.getXhtmlTextContent(Epub.getXmlChildren(child));
|
|
23
|
-
continue;
|
|
24
|
-
}
|
|
25
|
-
mergeSegmentations(
|
|
26
|
-
result,
|
|
27
|
-
await segmentText(collapseWhitespace(stagedText), {
|
|
28
|
-
...options.primaryLocale && {
|
|
29
|
-
language: options.primaryLocale.language
|
|
30
|
-
},
|
|
31
|
-
enableEastAsianPostprocessing: true
|
|
32
|
-
})
|
|
33
|
-
);
|
|
34
|
-
stagedText = "";
|
|
35
|
-
mergeSegmentations(
|
|
36
|
-
result,
|
|
37
|
-
await getXhtmlSegmentation(Epub.getXmlChildren(child), options)
|
|
38
|
-
);
|
|
39
|
-
}
|
|
40
|
-
mergeSegmentations(
|
|
41
|
-
result,
|
|
42
|
-
await segmentText(collapseWhitespace(stagedText), {
|
|
43
|
-
...options.primaryLocale && {
|
|
44
|
-
language: options.primaryLocale.language
|
|
45
|
-
},
|
|
46
|
-
enableEastAsianPostprocessing: true
|
|
47
|
-
})
|
|
48
|
-
);
|
|
49
|
-
return result;
|
|
50
|
-
}
|
|
51
|
-
function collapseWhitespace(text) {
|
|
52
|
-
return text.replace(/^\s*/, "").replace(/\s*$/, "").replaceAll(/\s+/g, " ");
|
|
53
|
-
}
|
|
54
|
-
function mergeSegmentations(first, second) {
|
|
55
|
-
for (const wordEntry of second.words.entries) {
|
|
56
|
-
first.words.addWord(
|
|
57
|
-
wordEntry.text,
|
|
58
|
-
wordEntry.startOffset,
|
|
59
|
-
wordEntry.isPunctuation
|
|
60
|
-
);
|
|
61
|
-
}
|
|
62
|
-
first.sentences.push(...second.sentences);
|
|
63
|
-
first.segmentSentenceRanges.push(...second.segmentSentenceRanges);
|
|
8
|
+
const root = parseDom(xml);
|
|
9
|
+
const { result: text, mapping } = liftText(root);
|
|
10
|
+
const result = await segmentText(text, {
|
|
11
|
+
...options.primaryLocale && {
|
|
12
|
+
language: options.primaryLocale.language
|
|
13
|
+
},
|
|
14
|
+
enableEastAsianPostprocessing: true
|
|
15
|
+
});
|
|
16
|
+
return { result: result.sentences, mapping };
|
|
64
17
|
}
|
|
65
18
|
export {
|
|
66
19
|
getXhtmlSegmentation
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var serializeDom_exports = {};
|
|
20
|
+
__export(serializeDom_exports, {
|
|
21
|
+
serializeDom: () => serializeDom,
|
|
22
|
+
serializeDomNode: () => serializeDomNode
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(serializeDom_exports);
|
|
25
|
+
var import_epub = require("@storyteller-platform/epub");
|
|
26
|
+
var import_model = require("./model.cjs");
|
|
27
|
+
function serializeDom(doc) {
|
|
28
|
+
return doc.children.map((child) => serializeDomNode(child));
|
|
29
|
+
}
|
|
30
|
+
function serializeDomNode(node) {
|
|
31
|
+
if (node instanceof import_model.TextNode) {
|
|
32
|
+
return import_epub.Epub.createXmlTextNode(node.text);
|
|
33
|
+
}
|
|
34
|
+
return import_epub.Epub.createXmlElement(
|
|
35
|
+
node.tagName,
|
|
36
|
+
node.attrs,
|
|
37
|
+
serializeDomNodes(node.children)
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
function serializeDomNodes(nodes) {
|
|
41
|
+
const partitioned = nodes.reduce((acc, child) => {
|
|
42
|
+
const lastPartition = acc.at(-1);
|
|
43
|
+
if (!lastPartition) {
|
|
44
|
+
return [[child]];
|
|
45
|
+
}
|
|
46
|
+
const lastChild = lastPartition.at(-1);
|
|
47
|
+
if (!lastChild) {
|
|
48
|
+
return [...acc.slice(0, acc.length), [child]];
|
|
49
|
+
}
|
|
50
|
+
const childFirstMark = child.marks[0];
|
|
51
|
+
const lastChildFirstMark = lastChild.marks[0];
|
|
52
|
+
if (childFirstMark === lastChildFirstMark || (childFirstMark == null ? void 0 : childFirstMark.eq(lastChildFirstMark))) {
|
|
53
|
+
return [
|
|
54
|
+
...acc.slice(0, acc.length - 1),
|
|
55
|
+
[...lastPartition.slice(0, lastPartition.length), child]
|
|
56
|
+
];
|
|
57
|
+
}
|
|
58
|
+
return [...acc, [child]];
|
|
59
|
+
}, []);
|
|
60
|
+
const xmlChildren = [];
|
|
61
|
+
for (const partition of partitioned) {
|
|
62
|
+
xmlChildren.push(...serializePartition(partition));
|
|
63
|
+
}
|
|
64
|
+
return xmlChildren;
|
|
65
|
+
}
|
|
66
|
+
function serializePartition(nodes) {
|
|
67
|
+
const firstChild = nodes[0];
|
|
68
|
+
if (!firstChild) return [];
|
|
69
|
+
const firstMark = firstChild.marks[0];
|
|
70
|
+
if (!firstMark) {
|
|
71
|
+
return nodes.map((child) => serializeDomNode(child));
|
|
72
|
+
}
|
|
73
|
+
return [
|
|
74
|
+
import_epub.Epub.createXmlElement(
|
|
75
|
+
firstMark.tagName,
|
|
76
|
+
firstMark.attrs,
|
|
77
|
+
serializeDomNodes(
|
|
78
|
+
nodes.map((node) => node.copy({ marks: node.marks.slice(1) }))
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
];
|
|
82
|
+
}
|
|
83
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
84
|
+
0 && (module.exports = {
|
|
85
|
+
serializeDom,
|
|
86
|
+
serializeDomNode
|
|
87
|
+
});
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ParsedXml, XmlNode } from '@storyteller-platform/epub';
|
|
2
|
+
import { Root, Node, TextNode } from './model.cjs';
|
|
3
|
+
|
|
4
|
+
declare function serializeDom(doc: Root): ParsedXml;
|
|
5
|
+
declare function serializeDomNode(node: Node | TextNode): XmlNode;
|
|
6
|
+
|
|
7
|
+
export { serializeDom, serializeDomNode };
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ParsedXml, XmlNode } from '@storyteller-platform/epub';
|
|
2
|
+
import { Root, Node, TextNode } from './model.js';
|
|
3
|
+
|
|
4
|
+
declare function serializeDom(doc: Root): ParsedXml;
|
|
5
|
+
declare function serializeDomNode(node: Node | TextNode): XmlNode;
|
|
6
|
+
|
|
7
|
+
export { serializeDom, serializeDomNode };
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import { Epub } from "@storyteller-platform/epub";
|
|
3
|
+
import { TextNode } from "./model.js";
|
|
4
|
+
function serializeDom(doc) {
|
|
5
|
+
return doc.children.map((child) => serializeDomNode(child));
|
|
6
|
+
}
|
|
7
|
+
function serializeDomNode(node) {
|
|
8
|
+
if (node instanceof TextNode) {
|
|
9
|
+
return Epub.createXmlTextNode(node.text);
|
|
10
|
+
}
|
|
11
|
+
return Epub.createXmlElement(
|
|
12
|
+
node.tagName,
|
|
13
|
+
node.attrs,
|
|
14
|
+
serializeDomNodes(node.children)
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
function serializeDomNodes(nodes) {
|
|
18
|
+
const partitioned = nodes.reduce((acc, child) => {
|
|
19
|
+
const lastPartition = acc.at(-1);
|
|
20
|
+
if (!lastPartition) {
|
|
21
|
+
return [[child]];
|
|
22
|
+
}
|
|
23
|
+
const lastChild = lastPartition.at(-1);
|
|
24
|
+
if (!lastChild) {
|
|
25
|
+
return [...acc.slice(0, acc.length), [child]];
|
|
26
|
+
}
|
|
27
|
+
const childFirstMark = child.marks[0];
|
|
28
|
+
const lastChildFirstMark = lastChild.marks[0];
|
|
29
|
+
if (childFirstMark === lastChildFirstMark || (childFirstMark == null ? void 0 : childFirstMark.eq(lastChildFirstMark))) {
|
|
30
|
+
return [
|
|
31
|
+
...acc.slice(0, acc.length - 1),
|
|
32
|
+
[...lastPartition.slice(0, lastPartition.length), child]
|
|
33
|
+
];
|
|
34
|
+
}
|
|
35
|
+
return [...acc, [child]];
|
|
36
|
+
}, []);
|
|
37
|
+
const xmlChildren = [];
|
|
38
|
+
for (const partition of partitioned) {
|
|
39
|
+
xmlChildren.push(...serializePartition(partition));
|
|
40
|
+
}
|
|
41
|
+
return xmlChildren;
|
|
42
|
+
}
|
|
43
|
+
function serializePartition(nodes) {
|
|
44
|
+
const firstChild = nodes[0];
|
|
45
|
+
if (!firstChild) return [];
|
|
46
|
+
const firstMark = firstChild.marks[0];
|
|
47
|
+
if (!firstMark) {
|
|
48
|
+
return nodes.map((child) => serializeDomNode(child));
|
|
49
|
+
}
|
|
50
|
+
return [
|
|
51
|
+
Epub.createXmlElement(
|
|
52
|
+
firstMark.tagName,
|
|
53
|
+
firstMark.attrs,
|
|
54
|
+
serializeDomNodes(
|
|
55
|
+
nodes.map((node) => node.copy({ marks: node.marks.slice(1) }))
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
];
|
|
59
|
+
}
|
|
60
|
+
export {
|
|
61
|
+
serializeDom,
|
|
62
|
+
serializeDomNode
|
|
63
|
+
};
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var transform_exports = {};
|
|
20
|
+
__export(transform_exports, {
|
|
21
|
+
addMark: () => addMark,
|
|
22
|
+
liftText: () => liftText
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(transform_exports);
|
|
25
|
+
var import_map = require("./map.cjs");
|
|
26
|
+
var import_model = require("./model.cjs");
|
|
27
|
+
function addMark(root, from, to, mark) {
|
|
28
|
+
const result = root.split(from).split(to);
|
|
29
|
+
let pos = 0;
|
|
30
|
+
const children = [];
|
|
31
|
+
for (const child of result.children) {
|
|
32
|
+
children.push(addMarkToNode(child, pos, from, to, mark));
|
|
33
|
+
pos += child.nodeSize;
|
|
34
|
+
}
|
|
35
|
+
return result.copy({ children });
|
|
36
|
+
}
|
|
37
|
+
function addMarkToNode(node, pos, from, to, mark) {
|
|
38
|
+
if (from >= pos + node.nodeSize || to <= pos) {
|
|
39
|
+
return node;
|
|
40
|
+
}
|
|
41
|
+
if (node.isLeaf) {
|
|
42
|
+
return node.copy({ marks: [mark, ...node.marks] });
|
|
43
|
+
}
|
|
44
|
+
let childPos = node.border;
|
|
45
|
+
const children = [];
|
|
46
|
+
for (const child of node.children) {
|
|
47
|
+
children.push(addMarkToNode(child, pos + childPos, from, to, mark));
|
|
48
|
+
childPos += child.nodeSize;
|
|
49
|
+
}
|
|
50
|
+
return node.copy({ children });
|
|
51
|
+
}
|
|
52
|
+
function liftText(root) {
|
|
53
|
+
const mapping = new import_map.Mapping();
|
|
54
|
+
let text = "";
|
|
55
|
+
let textLength = 0;
|
|
56
|
+
let lastTextEnd = 0;
|
|
57
|
+
(0, import_model.descendants)(root, (node, pos, parent, index) => {
|
|
58
|
+
if (node.isBlock) {
|
|
59
|
+
return !!node.textContent.match(/\S/);
|
|
60
|
+
}
|
|
61
|
+
if (!(node instanceof import_model.TextNode)) return true;
|
|
62
|
+
if (mapping.map(pos) - mapping.map(lastTextEnd)) {
|
|
63
|
+
mapping.appendMap(
|
|
64
|
+
new import_map.StepMap([
|
|
65
|
+
mapping.map(lastTextEnd),
|
|
66
|
+
mapping.map(pos) - mapping.map(lastTextEnd),
|
|
67
|
+
0
|
|
68
|
+
])
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
lastTextEnd = pos + node.nodeSize;
|
|
72
|
+
let result = node.text.replaceAll(/\n/g, " ");
|
|
73
|
+
const hasBlockSiblings = parent.children.some((child) => child.isBlock);
|
|
74
|
+
if (hasBlockSiblings && !result.match(/\S/)) {
|
|
75
|
+
mapping.appendMap(new import_map.StepMap([textLength, result.length, 0]));
|
|
76
|
+
result = "";
|
|
77
|
+
}
|
|
78
|
+
if (parent.isBlock && index === parent.children.length - 1 && !(text + result).endsWith("\n")) {
|
|
79
|
+
result += "\n";
|
|
80
|
+
textLength--;
|
|
81
|
+
}
|
|
82
|
+
text += result;
|
|
83
|
+
textLength += result.length;
|
|
84
|
+
return true;
|
|
85
|
+
});
|
|
86
|
+
return { result: text, mapping };
|
|
87
|
+
}
|
|
88
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
89
|
+
0 && (module.exports = {
|
|
90
|
+
addMark,
|
|
91
|
+
liftText
|
|
92
|
+
});
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Mapping } from './map.cjs';
|
|
2
|
+
import { Root, Mark } from './model.cjs';
|
|
3
|
+
import '@storyteller-platform/epub';
|
|
4
|
+
|
|
5
|
+
declare function addMark(root: Root, from: number, to: number, mark: Mark): Root;
|
|
6
|
+
declare function liftText(root: Root): {
|
|
7
|
+
result: string;
|
|
8
|
+
mapping: Mapping;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export { addMark, liftText };
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Mapping } from './map.js';
|
|
2
|
+
import { Root, Mark } from './model.js';
|
|
3
|
+
import '@storyteller-platform/epub';
|
|
4
|
+
|
|
5
|
+
declare function addMark(root: Root, from: number, to: number, mark: Mark): Root;
|
|
6
|
+
declare function liftText(root: Root): {
|
|
7
|
+
result: string;
|
|
8
|
+
mapping: Mapping;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export { addMark, liftText };
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import { Mapping, StepMap } from "./map.js";
|
|
3
|
+
import {
|
|
4
|
+
TextNode,
|
|
5
|
+
descendants
|
|
6
|
+
} from "./model.js";
|
|
7
|
+
function addMark(root, from, to, mark) {
|
|
8
|
+
const result = root.split(from).split(to);
|
|
9
|
+
let pos = 0;
|
|
10
|
+
const children = [];
|
|
11
|
+
for (const child of result.children) {
|
|
12
|
+
children.push(addMarkToNode(child, pos, from, to, mark));
|
|
13
|
+
pos += child.nodeSize;
|
|
14
|
+
}
|
|
15
|
+
return result.copy({ children });
|
|
16
|
+
}
|
|
17
|
+
function addMarkToNode(node, pos, from, to, mark) {
|
|
18
|
+
if (from >= pos + node.nodeSize || to <= pos) {
|
|
19
|
+
return node;
|
|
20
|
+
}
|
|
21
|
+
if (node.isLeaf) {
|
|
22
|
+
return node.copy({ marks: [mark, ...node.marks] });
|
|
23
|
+
}
|
|
24
|
+
let childPos = node.border;
|
|
25
|
+
const children = [];
|
|
26
|
+
for (const child of node.children) {
|
|
27
|
+
children.push(addMarkToNode(child, pos + childPos, from, to, mark));
|
|
28
|
+
childPos += child.nodeSize;
|
|
29
|
+
}
|
|
30
|
+
return node.copy({ children });
|
|
31
|
+
}
|
|
32
|
+
function liftText(root) {
|
|
33
|
+
const mapping = new Mapping();
|
|
34
|
+
let text = "";
|
|
35
|
+
let textLength = 0;
|
|
36
|
+
let lastTextEnd = 0;
|
|
37
|
+
descendants(root, (node, pos, parent, index) => {
|
|
38
|
+
if (node.isBlock) {
|
|
39
|
+
return !!node.textContent.match(/\S/);
|
|
40
|
+
}
|
|
41
|
+
if (!(node instanceof TextNode)) return true;
|
|
42
|
+
if (mapping.map(pos) - mapping.map(lastTextEnd)) {
|
|
43
|
+
mapping.appendMap(
|
|
44
|
+
new StepMap([
|
|
45
|
+
mapping.map(lastTextEnd),
|
|
46
|
+
mapping.map(pos) - mapping.map(lastTextEnd),
|
|
47
|
+
0
|
|
48
|
+
])
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
lastTextEnd = pos + node.nodeSize;
|
|
52
|
+
let result = node.text.replaceAll(/\n/g, " ");
|
|
53
|
+
const hasBlockSiblings = parent.children.some((child) => child.isBlock);
|
|
54
|
+
if (hasBlockSiblings && !result.match(/\S/)) {
|
|
55
|
+
mapping.appendMap(new StepMap([textLength, result.length, 0]));
|
|
56
|
+
result = "";
|
|
57
|
+
}
|
|
58
|
+
if (parent.isBlock && index === parent.children.length - 1 && !(text + result).endsWith("\n")) {
|
|
59
|
+
result += "\n";
|
|
60
|
+
textLength--;
|
|
61
|
+
}
|
|
62
|
+
text += result;
|
|
63
|
+
textLength += result.length;
|
|
64
|
+
return true;
|
|
65
|
+
});
|
|
66
|
+
return { result: text, mapping };
|
|
67
|
+
}
|
|
68
|
+
export {
|
|
69
|
+
addMark,
|
|
70
|
+
liftText
|
|
71
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";
|
|
File without changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@storyteller-platform/align",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.10",
|
|
4
4
|
"description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
|
|
5
5
|
"author": "Shane Friedman",
|
|
6
6
|
"license": "MIT",
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
"scripts": {
|
|
47
47
|
"compile": "NODE_OPTIONS=--experimental-import-meta-resolve ./scripts/bundle.js && node --build-sea sea-config.json",
|
|
48
48
|
"build": "tsup",
|
|
49
|
+
"install": "node-gyp-build",
|
|
49
50
|
"prepack": "yarn build",
|
|
50
51
|
"test": "yarn tsx --test"
|
|
51
52
|
},
|
|
@@ -56,17 +57,20 @@
|
|
|
56
57
|
"@esfx/async-semaphore": "^1.0.0",
|
|
57
58
|
"@optique/core": "^0.10.7",
|
|
58
59
|
"@optique/run": "^0.10.7",
|
|
59
|
-
"@storyteller-platform/audiobook": "^0.3.
|
|
60
|
-
"@storyteller-platform/epub": "^0.4.
|
|
61
|
-
"@storyteller-platform/ghost-story": "^0.1.
|
|
60
|
+
"@storyteller-platform/audiobook": "^0.3.9",
|
|
61
|
+
"@storyteller-platform/epub": "^0.4.8",
|
|
62
|
+
"@storyteller-platform/ghost-story": "^0.1.5",
|
|
62
63
|
"@storyteller-platform/transliteration": "^3.1.0",
|
|
63
64
|
"chalk": "^5.4.1",
|
|
64
65
|
"cli-progress": "^3.12.0",
|
|
65
66
|
"esbuild": "^0.27.3",
|
|
67
|
+
"itertools": "^2.6.0",
|
|
66
68
|
"locale-currency": "^1.0.0",
|
|
67
69
|
"memoize": "^10.2.0",
|
|
70
|
+
"node-gyp-build": "^4.8.4",
|
|
68
71
|
"pino": "^10.3.1",
|
|
69
72
|
"pino-pretty": "^13.1.3",
|
|
73
|
+
"runes2": "^1.1.4",
|
|
70
74
|
"to-words": "^5.3.0",
|
|
71
75
|
"zod": "^3.24.0"
|
|
72
76
|
},
|
|
@@ -77,6 +81,9 @@
|
|
|
77
81
|
"@types/cli-progress": "^3",
|
|
78
82
|
"@types/node": "^24.0.0",
|
|
79
83
|
"eslint": "^8.0.0",
|
|
84
|
+
"node-addon-api": "^8.3.1",
|
|
85
|
+
"node-gyp": "^11.2.0",
|
|
86
|
+
"prebuildify": "^6.0.1",
|
|
80
87
|
"tsup": "^8.5.0",
|
|
81
88
|
"tsx": "^4.19.2",
|
|
82
89
|
"typescript": "~5.8.3"
|