@storyteller-platform/align 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/align/slugify.cjs +2 -0
  12. package/dist/align/slugify.js +2 -0
  13. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  15. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  16. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  17. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  18. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  19. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  20. package/dist/errorAlign/__tests__/native.test.js +107 -0
  21. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  22. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  23. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  24. package/dist/errorAlign/backtraceGraph.js +270 -0
  25. package/dist/errorAlign/beamSearch.cjs +302 -0
  26. package/dist/errorAlign/beamSearch.d.cts +53 -0
  27. package/dist/errorAlign/beamSearch.d.ts +53 -0
  28. package/dist/errorAlign/beamSearch.js +268 -0
  29. package/dist/errorAlign/core.cjs +33 -0
  30. package/dist/errorAlign/core.d.cts +5 -0
  31. package/dist/errorAlign/core.d.ts +5 -0
  32. package/dist/errorAlign/core.js +11 -0
  33. package/dist/errorAlign/editDistance.cjs +115 -0
  34. package/dist/errorAlign/editDistance.d.cts +46 -0
  35. package/dist/errorAlign/editDistance.d.ts +46 -0
  36. package/dist/errorAlign/editDistance.js +90 -0
  37. package/dist/errorAlign/errorAlign.cjs +159 -0
  38. package/dist/errorAlign/errorAlign.d.cts +15 -0
  39. package/dist/errorAlign/errorAlign.d.ts +15 -0
  40. package/dist/errorAlign/errorAlign.js +145 -0
  41. package/dist/errorAlign/graphMetadata.cjs +97 -0
  42. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  43. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  44. package/dist/errorAlign/graphMetadata.js +64 -0
  45. package/dist/errorAlign/hash.cjs +173 -0
  46. package/dist/errorAlign/hash.d.cts +28 -0
  47. package/dist/errorAlign/hash.d.ts +28 -0
  48. package/dist/errorAlign/hash.js +150 -0
  49. package/dist/errorAlign/native.cjs +60 -0
  50. package/dist/errorAlign/native.d.cts +18 -0
  51. package/dist/errorAlign/native.d.ts +18 -0
  52. package/dist/errorAlign/native.js +24 -0
  53. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  54. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  55. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  56. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  57. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  58. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  59. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  60. package/dist/errorAlign/pathToAlignment.js +89 -0
  61. package/dist/errorAlign/utils.cjs +301 -0
  62. package/dist/errorAlign/utils.d.cts +107 -0
  63. package/dist/errorAlign/utils.d.ts +107 -0
  64. package/dist/errorAlign/utils.js +248 -0
  65. package/dist/index.d.cts +1 -0
  66. package/dist/index.d.ts +1 -0
  67. package/dist/markup/__tests__/markup.test.cjs +108 -81
  68. package/dist/markup/__tests__/markup.test.js +109 -82
  69. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  70. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  71. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  72. package/dist/markup/__tests__/parseDom.test.js +89 -0
  73. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  74. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  75. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  76. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  77. package/dist/markup/__tests__/transform.test.cjs +122 -0
  78. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  79. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  80. package/dist/markup/__tests__/transform.test.js +99 -0
  81. package/dist/markup/map.cjs +261 -0
  82. package/dist/markup/map.d.cts +50 -0
  83. package/dist/markup/map.d.ts +50 -0
  84. package/dist/markup/map.js +236 -0
  85. package/dist/markup/markup.cjs +23 -201
  86. package/dist/markup/markup.d.cts +5 -9
  87. package/dist/markup/markup.d.ts +5 -9
  88. package/dist/markup/markup.js +24 -203
  89. package/dist/markup/model.cjs +172 -0
  90. package/dist/markup/model.d.cts +57 -0
  91. package/dist/markup/model.d.ts +57 -0
  92. package/dist/markup/model.js +145 -0
  93. package/dist/markup/parseDom.cjs +59 -0
  94. package/dist/markup/parseDom.d.cts +7 -0
  95. package/dist/markup/parseDom.d.ts +7 -0
  96. package/dist/markup/parseDom.js +35 -0
  97. package/dist/markup/segmentation.cjs +11 -57
  98. package/dist/markup/segmentation.d.cts +6 -2
  99. package/dist/markup/segmentation.d.ts +6 -2
  100. package/dist/markup/segmentation.js +11 -58
  101. package/dist/markup/serializeDom.cjs +87 -0
  102. package/dist/markup/serializeDom.d.cts +7 -0
  103. package/dist/markup/serializeDom.d.ts +7 -0
  104. package/dist/markup/serializeDom.js +63 -0
  105. package/dist/markup/transform.cjs +92 -0
  106. package/dist/markup/transform.d.cts +11 -0
  107. package/dist/markup/transform.d.ts +11 -0
  108. package/dist/markup/transform.js +71 -0
  109. package/dist/types/node-gyp-build.d.cjs +1 -0
  110. package/dist/types/node-gyp-build.d.d.cts +3 -0
  111. package/dist/types/node-gyp-build.d.d.ts +3 -0
  112. package/dist/types/node-gyp-build.d.js +0 -0
  113. package/package.json +11 -4
@@ -0,0 +1,172 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var model_exports = {};
20
+ __export(model_exports, {
21
+ Mark: () => Mark,
22
+ Node: () => Node,
23
+ Root: () => Root,
24
+ TextNode: () => TextNode,
25
+ descendants: () => descendants
26
+ });
27
+ module.exports = __toCommonJS(model_exports);
28
+ var import_itertools = require("itertools");
29
+ var import_semantics = require("./semantics.cjs");
30
+ class Root {
31
+ constructor(children) {
32
+ this.children = children;
33
+ }
34
+ isInline = false;
35
+ isBlock = true;
36
+ get border() {
37
+ return 0;
38
+ }
39
+ get textContent() {
40
+ return this.children.reduce((acc, child) => acc + child.textContent, "");
41
+ }
42
+ split(at) {
43
+ const children = [];
44
+ let pos = this.border;
45
+ for (const child of this.children) {
46
+ if (at > pos && at < pos + child.nodeSize) {
47
+ children.push(
48
+ ...child instanceof TextNode ? child.split(at - pos) : [child.split(at - pos)]
49
+ );
50
+ } else {
51
+ children.push(child);
52
+ }
53
+ pos += child.nodeSize;
54
+ }
55
+ return this.copy({ children });
56
+ }
57
+ copy(opts = {}) {
58
+ return new Root(opts.children ?? this.children);
59
+ }
60
+ }
61
+ class Node {
62
+ constructor(tagName, attrs = {}, children = [], marks = []) {
63
+ this.tagName = tagName;
64
+ this.attrs = attrs;
65
+ this.children = children;
66
+ this.marks = marks;
67
+ }
68
+ get isLeaf() {
69
+ return !this.children.length;
70
+ }
71
+ get isInline() {
72
+ return !this.isBlock;
73
+ }
74
+ get isBlock() {
75
+ return import_semantics.BLOCKS.includes(this.tagName);
76
+ }
77
+ get border() {
78
+ return this.isLeaf ? 0 : 1;
79
+ }
80
+ get nodeSize() {
81
+ return this.border + (this.children.reduce((acc, child) => acc + child.nodeSize, 0) || 1) + this.border;
82
+ }
83
+ get textContent() {
84
+ return this.children.reduce((acc, child) => acc + child.textContent, "");
85
+ }
86
+ split(at) {
87
+ if (at === this.border) return this;
88
+ if (at === this.nodeSize - this.border) return this;
89
+ const children = [];
90
+ let pos = this.border;
91
+ for (const child of this.children) {
92
+ if (at > pos && at < pos + child.nodeSize) {
93
+ if (child instanceof TextNode) {
94
+ children.push(...child.split(at - pos));
95
+ } else {
96
+ children.push(child.split(at - pos));
97
+ }
98
+ } else {
99
+ children.push(child);
100
+ }
101
+ pos += child.nodeSize;
102
+ }
103
+ return this.copy({ children });
104
+ }
105
+ copy(opts = {}) {
106
+ return new Node(
107
+ this.tagName,
108
+ opts.attrs ?? this.attrs,
109
+ opts.children ?? this.children,
110
+ opts.marks ?? this.marks
111
+ );
112
+ }
113
+ }
114
+ class Mark {
115
+ constructor(tagName, attrs = {}) {
116
+ this.tagName = tagName;
117
+ this.attrs = attrs;
118
+ }
119
+ eq(other) {
120
+ if (!other) return false;
121
+ if (Object.keys(this.attrs).length !== Object.keys(other.attrs).length)
122
+ return false;
123
+ for (const [key, value] of Object.entries(this.attrs)) {
124
+ if (other.attrs[key] !== value) return false;
125
+ }
126
+ return this.tagName === other.tagName;
127
+ }
128
+ }
129
+ class TextNode {
130
+ constructor(text, marks = []) {
131
+ this.text = text;
132
+ this.marks = marks;
133
+ }
134
+ isLeaf = true;
135
+ isInline = true;
136
+ isBlock = false;
137
+ border = 0;
138
+ get nodeSize() {
139
+ return this.text.length;
140
+ }
141
+ get textContent() {
142
+ return this.text;
143
+ }
144
+ split(at) {
145
+ if (at === 0) return [this];
146
+ if (at === this.text.length) return [this];
147
+ return [
148
+ new TextNode(this.text.slice(0, at), this.marks),
149
+ new TextNode(this.text.slice(at), this.marks)
150
+ ];
151
+ }
152
+ copy(opts = {}) {
153
+ return new TextNode(this.text, opts.marks ?? this.marks);
154
+ }
155
+ }
156
+ function descendants(root, cb, pos = 0) {
157
+ for (const [i, child] of (0, import_itertools.enumerate)(root.children)) {
158
+ const descend = cb(child, pos, root, i);
159
+ if (descend && !child.isLeaf) {
160
+ descendants(child, cb, pos + child.border);
161
+ }
162
+ pos += child.nodeSize;
163
+ }
164
+ }
165
+ // Annotate the CommonJS export names for ESM import in node:
166
+ 0 && (module.exports = {
167
+ Mark,
168
+ Node,
169
+ Root,
170
+ TextNode,
171
+ descendants
172
+ });
@@ -0,0 +1,57 @@
1
+ import { ElementName } from '@storyteller-platform/epub';
2
+
3
+ declare class Root {
4
+ children: (Node | TextNode)[];
5
+ constructor(children: (Node | TextNode)[]);
6
+ isInline: boolean;
7
+ isBlock: boolean;
8
+ get border(): number;
9
+ get textContent(): string;
10
+ split(at: number): Root;
11
+ copy(opts?: {
12
+ children?: (Node | TextNode)[];
13
+ }): Root;
14
+ }
15
+ declare class Node {
16
+ tagName: ElementName;
17
+ attrs: Record<string, string>;
18
+ children: (Node | TextNode)[];
19
+ marks: Mark[];
20
+ constructor(tagName: ElementName, attrs?: Record<string, string>, children?: (Node | TextNode)[], marks?: Mark[]);
21
+ get isLeaf(): boolean;
22
+ get isInline(): boolean;
23
+ get isBlock(): boolean;
24
+ get border(): 0 | 1;
25
+ get nodeSize(): number;
26
+ get textContent(): string;
27
+ split(at: number): Node;
28
+ copy(opts?: {
29
+ attrs?: Record<string, string>;
30
+ children?: (Node | TextNode)[];
31
+ marks?: Mark[];
32
+ }): Node;
33
+ }
34
+ declare class Mark {
35
+ tagName: ElementName;
36
+ attrs: Record<string, string>;
37
+ constructor(tagName: ElementName, attrs?: Record<string, string>);
38
+ eq(other: Mark | undefined): boolean;
39
+ }
40
+ declare class TextNode {
41
+ text: string;
42
+ marks: Mark[];
43
+ constructor(text: string, marks?: Mark[]);
44
+ isLeaf: boolean;
45
+ isInline: boolean;
46
+ isBlock: boolean;
47
+ border: number;
48
+ get nodeSize(): number;
49
+ get textContent(): string;
50
+ split(at: number): TextNode[];
51
+ copy(opts?: {
52
+ marks?: Mark[];
53
+ }): TextNode;
54
+ }
55
+ declare function descendants(root: Root | Node, cb: (node: Node | TextNode, pos: number, parent: Node | Root, index: number) => boolean, pos?: number): void;
56
+
57
+ export { Mark, Node, Root, TextNode, descendants };
@@ -0,0 +1,57 @@
1
+ import { ElementName } from '@storyteller-platform/epub';
2
+
3
+ declare class Root {
4
+ children: (Node | TextNode)[];
5
+ constructor(children: (Node | TextNode)[]);
6
+ isInline: boolean;
7
+ isBlock: boolean;
8
+ get border(): number;
9
+ get textContent(): string;
10
+ split(at: number): Root;
11
+ copy(opts?: {
12
+ children?: (Node | TextNode)[];
13
+ }): Root;
14
+ }
15
+ declare class Node {
16
+ tagName: ElementName;
17
+ attrs: Record<string, string>;
18
+ children: (Node | TextNode)[];
19
+ marks: Mark[];
20
+ constructor(tagName: ElementName, attrs?: Record<string, string>, children?: (Node | TextNode)[], marks?: Mark[]);
21
+ get isLeaf(): boolean;
22
+ get isInline(): boolean;
23
+ get isBlock(): boolean;
24
+ get border(): 0 | 1;
25
+ get nodeSize(): number;
26
+ get textContent(): string;
27
+ split(at: number): Node;
28
+ copy(opts?: {
29
+ attrs?: Record<string, string>;
30
+ children?: (Node | TextNode)[];
31
+ marks?: Mark[];
32
+ }): Node;
33
+ }
34
+ declare class Mark {
35
+ tagName: ElementName;
36
+ attrs: Record<string, string>;
37
+ constructor(tagName: ElementName, attrs?: Record<string, string>);
38
+ eq(other: Mark | undefined): boolean;
39
+ }
40
+ declare class TextNode {
41
+ text: string;
42
+ marks: Mark[];
43
+ constructor(text: string, marks?: Mark[]);
44
+ isLeaf: boolean;
45
+ isInline: boolean;
46
+ isBlock: boolean;
47
+ border: number;
48
+ get nodeSize(): number;
49
+ get textContent(): string;
50
+ split(at: number): TextNode[];
51
+ copy(opts?: {
52
+ marks?: Mark[];
53
+ }): TextNode;
54
+ }
55
+ declare function descendants(root: Root | Node, cb: (node: Node | TextNode, pos: number, parent: Node | Root, index: number) => boolean, pos?: number): void;
56
+
57
+ export { Mark, Node, Root, TextNode, descendants };
@@ -0,0 +1,145 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import { enumerate } from "itertools";
3
+ import { BLOCKS } from "./semantics.js";
4
+ class Root {
5
+ constructor(children) {
6
+ this.children = children;
7
+ }
8
+ isInline = false;
9
+ isBlock = true;
10
+ get border() {
11
+ return 0;
12
+ }
13
+ get textContent() {
14
+ return this.children.reduce((acc, child) => acc + child.textContent, "");
15
+ }
16
+ split(at) {
17
+ const children = [];
18
+ let pos = this.border;
19
+ for (const child of this.children) {
20
+ if (at > pos && at < pos + child.nodeSize) {
21
+ children.push(
22
+ ...child instanceof TextNode ? child.split(at - pos) : [child.split(at - pos)]
23
+ );
24
+ } else {
25
+ children.push(child);
26
+ }
27
+ pos += child.nodeSize;
28
+ }
29
+ return this.copy({ children });
30
+ }
31
+ copy(opts = {}) {
32
+ return new Root(opts.children ?? this.children);
33
+ }
34
+ }
35
+ class Node {
36
+ constructor(tagName, attrs = {}, children = [], marks = []) {
37
+ this.tagName = tagName;
38
+ this.attrs = attrs;
39
+ this.children = children;
40
+ this.marks = marks;
41
+ }
42
+ get isLeaf() {
43
+ return !this.children.length;
44
+ }
45
+ get isInline() {
46
+ return !this.isBlock;
47
+ }
48
+ get isBlock() {
49
+ return BLOCKS.includes(this.tagName);
50
+ }
51
+ get border() {
52
+ return this.isLeaf ? 0 : 1;
53
+ }
54
+ get nodeSize() {
55
+ return this.border + (this.children.reduce((acc, child) => acc + child.nodeSize, 0) || 1) + this.border;
56
+ }
57
+ get textContent() {
58
+ return this.children.reduce((acc, child) => acc + child.textContent, "");
59
+ }
60
+ split(at) {
61
+ if (at === this.border) return this;
62
+ if (at === this.nodeSize - this.border) return this;
63
+ const children = [];
64
+ let pos = this.border;
65
+ for (const child of this.children) {
66
+ if (at > pos && at < pos + child.nodeSize) {
67
+ if (child instanceof TextNode) {
68
+ children.push(...child.split(at - pos));
69
+ } else {
70
+ children.push(child.split(at - pos));
71
+ }
72
+ } else {
73
+ children.push(child);
74
+ }
75
+ pos += child.nodeSize;
76
+ }
77
+ return this.copy({ children });
78
+ }
79
+ copy(opts = {}) {
80
+ return new Node(
81
+ this.tagName,
82
+ opts.attrs ?? this.attrs,
83
+ opts.children ?? this.children,
84
+ opts.marks ?? this.marks
85
+ );
86
+ }
87
+ }
88
+ class Mark {
89
+ constructor(tagName, attrs = {}) {
90
+ this.tagName = tagName;
91
+ this.attrs = attrs;
92
+ }
93
+ eq(other) {
94
+ if (!other) return false;
95
+ if (Object.keys(this.attrs).length !== Object.keys(other.attrs).length)
96
+ return false;
97
+ for (const [key, value] of Object.entries(this.attrs)) {
98
+ if (other.attrs[key] !== value) return false;
99
+ }
100
+ return this.tagName === other.tagName;
101
+ }
102
+ }
103
+ class TextNode {
104
+ constructor(text, marks = []) {
105
+ this.text = text;
106
+ this.marks = marks;
107
+ }
108
+ isLeaf = true;
109
+ isInline = true;
110
+ isBlock = false;
111
+ border = 0;
112
+ get nodeSize() {
113
+ return this.text.length;
114
+ }
115
+ get textContent() {
116
+ return this.text;
117
+ }
118
+ split(at) {
119
+ if (at === 0) return [this];
120
+ if (at === this.text.length) return [this];
121
+ return [
122
+ new TextNode(this.text.slice(0, at), this.marks),
123
+ new TextNode(this.text.slice(at), this.marks)
124
+ ];
125
+ }
126
+ copy(opts = {}) {
127
+ return new TextNode(this.text, opts.marks ?? this.marks);
128
+ }
129
+ }
130
+ function descendants(root, cb, pos = 0) {
131
+ for (const [i, child] of enumerate(root.children)) {
132
+ const descend = cb(child, pos, root, i);
133
+ if (descend && !child.isLeaf) {
134
+ descendants(child, cb, pos + child.border);
135
+ }
136
+ pos += child.nodeSize;
137
+ }
138
+ }
139
+ export {
140
+ Mark,
141
+ Node,
142
+ Root,
143
+ TextNode,
144
+ descendants
145
+ };
@@ -0,0 +1,59 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var parseDom_exports = {};
20
+ __export(parseDom_exports, {
21
+ parseDom: () => parseDom,
22
+ parseDomNode: () => parseDomNode
23
+ });
24
+ module.exports = __toCommonJS(parseDom_exports);
25
+ var import_epub = require("@storyteller-platform/epub");
26
+ var import_model = require("./model.cjs");
27
+ var import_semantics = require("./semantics.cjs");
28
+ function parseDom(xml) {
29
+ const children = xml.flatMap((node) => parseDomNode(node));
30
+ return new import_model.Root(children);
31
+ }
32
+ function parseDomNode(xmlNode, marks) {
33
+ if (import_epub.Epub.isXmlTextNode(xmlNode)) {
34
+ return new import_model.TextNode(xmlNode["#text"], marks);
35
+ }
36
+ const tagName = import_epub.Epub.getXmlElementName(xmlNode);
37
+ if (import_semantics.BLOCKS.includes(tagName)) {
38
+ return new import_model.Node(
39
+ tagName,
40
+ import_epub.Epub.getXmlAttributes(xmlNode),
41
+ import_epub.Epub.getXmlChildren(xmlNode).flatMap((child) => parseDomNode(child)),
42
+ marks
43
+ );
44
+ }
45
+ if (!import_epub.Epub.getXmlChildren(xmlNode).length) {
46
+ return new import_model.Node(tagName, import_epub.Epub.getXmlAttributes(xmlNode), [], marks);
47
+ }
48
+ return import_epub.Epub.getXmlChildren(xmlNode).flatMap(
49
+ (child) => parseDomNode(child, [
50
+ ...marks ?? [],
51
+ new import_model.Mark(tagName, import_epub.Epub.getXmlAttributes(xmlNode))
52
+ ])
53
+ );
54
+ }
55
+ // Annotate the CommonJS export names for ESM import in node:
56
+ 0 && (module.exports = {
57
+ parseDom,
58
+ parseDomNode
59
+ });
@@ -0,0 +1,7 @@
1
+ import { ParsedXml, XmlNode } from '@storyteller-platform/epub';
2
+ import { Root, Mark, Node, TextNode } from './model.cjs';
3
+
4
+ declare function parseDom(xml: ParsedXml): Root;
5
+ declare function parseDomNode(xmlNode: XmlNode, marks?: Mark[]): Node | TextNode | (Node | TextNode)[];
6
+
7
+ export { parseDom, parseDomNode };
@@ -0,0 +1,7 @@
1
+ import { ParsedXml, XmlNode } from '@storyteller-platform/epub';
2
+ import { Root, Mark, Node, TextNode } from './model.js';
3
+
4
+ declare function parseDom(xml: ParsedXml): Root;
5
+ declare function parseDomNode(xmlNode: XmlNode, marks?: Mark[]): Node | TextNode | (Node | TextNode)[];
6
+
7
+ export { parseDom, parseDomNode };
@@ -0,0 +1,35 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import { Epub } from "@storyteller-platform/epub";
3
+ import { Mark, Node, Root, TextNode } from "./model.js";
4
+ import { BLOCKS } from "./semantics.js";
5
+ function parseDom(xml) {
6
+ const children = xml.flatMap((node) => parseDomNode(node));
7
+ return new Root(children);
8
+ }
9
+ function parseDomNode(xmlNode, marks) {
10
+ if (Epub.isXmlTextNode(xmlNode)) {
11
+ return new TextNode(xmlNode["#text"], marks);
12
+ }
13
+ const tagName = Epub.getXmlElementName(xmlNode);
14
+ if (BLOCKS.includes(tagName)) {
15
+ return new Node(
16
+ tagName,
17
+ Epub.getXmlAttributes(xmlNode),
18
+ Epub.getXmlChildren(xmlNode).flatMap((child) => parseDomNode(child)),
19
+ marks
20
+ );
21
+ }
22
+ if (!Epub.getXmlChildren(xmlNode).length) {
23
+ return new Node(tagName, Epub.getXmlAttributes(xmlNode), [], marks);
24
+ }
25
+ return Epub.getXmlChildren(xmlNode).flatMap(
26
+ (child) => parseDomNode(child, [
27
+ ...marks ?? [],
28
+ new Mark(tagName, Epub.getXmlAttributes(xmlNode))
29
+ ])
30
+ );
31
+ }
32
+ export {
33
+ parseDom,
34
+ parseDomNode
35
+ };
@@ -22,64 +22,18 @@ __export(segmentation_exports, {
22
22
  });
23
23
  module.exports = __toCommonJS(segmentation_exports);
24
24
  var import_text_segmentation = require("@echogarden/text-segmentation");
25
- var import_epub = require("@storyteller-platform/epub");
26
- var import_semantics = require("./semantics.cjs");
25
+ var import_parseDom = require("./parseDom.cjs");
26
+ var import_transform = require("./transform.cjs");
27
27
  async function getXhtmlSegmentation(xml, options) {
28
- const result = {
29
- words: new import_text_segmentation.WordSequence(),
30
- sentences: [],
31
- segmentSentenceRanges: []
32
- };
33
- let stagedText = "";
34
- for (const child of xml) {
35
- if (import_epub.Epub.isXmlTextNode(child)) {
36
- stagedText += child["#text"];
37
- continue;
38
- }
39
- const childName = import_epub.Epub.getXmlElementName(child);
40
- if (!import_semantics.BLOCKS.includes(childName)) {
41
- stagedText += import_epub.Epub.getXhtmlTextContent(import_epub.Epub.getXmlChildren(child));
42
- continue;
43
- }
44
- mergeSegmentations(
45
- result,
46
- await (0, import_text_segmentation.segmentText)(collapseWhitespace(stagedText), {
47
- ...options.primaryLocale && {
48
- language: options.primaryLocale.language
49
- },
50
- enableEastAsianPostprocessing: true
51
- })
52
- );
53
- stagedText = "";
54
- mergeSegmentations(
55
- result,
56
- await getXhtmlSegmentation(import_epub.Epub.getXmlChildren(child), options)
57
- );
58
- }
59
- mergeSegmentations(
60
- result,
61
- await (0, import_text_segmentation.segmentText)(collapseWhitespace(stagedText), {
62
- ...options.primaryLocale && {
63
- language: options.primaryLocale.language
64
- },
65
- enableEastAsianPostprocessing: true
66
- })
67
- );
68
- return result;
69
- }
70
- function collapseWhitespace(text) {
71
- return text.replace(/^\s*/, "").replace(/\s*$/, "").replaceAll(/\s+/g, " ");
72
- }
73
- function mergeSegmentations(first, second) {
74
- for (const wordEntry of second.words.entries) {
75
- first.words.addWord(
76
- wordEntry.text,
77
- wordEntry.startOffset,
78
- wordEntry.isPunctuation
79
- );
80
- }
81
- first.sentences.push(...second.sentences);
82
- first.segmentSentenceRanges.push(...second.segmentSentenceRanges);
28
+ const root = (0, import_parseDom.parseDom)(xml);
29
+ const { result: text, mapping } = (0, import_transform.liftText)(root);
30
+ const result = await (0, import_text_segmentation.segmentText)(text, {
31
+ ...options.primaryLocale && {
32
+ language: options.primaryLocale.language
33
+ },
34
+ enableEastAsianPostprocessing: true
35
+ });
36
+ return { result: result.sentences, mapping };
83
37
  }
84
38
  // Annotate the CommonJS export names for ESM import in node:
85
39
  0 && (module.exports = {
@@ -1,8 +1,12 @@
1
1
  import { SegmentationResult } from '@echogarden/text-segmentation';
2
2
  import { ParsedXml } from '@storyteller-platform/epub';
3
+ import { Mapping } from './map.cjs';
3
4
 
4
5
  declare function getXhtmlSegmentation(xml: ParsedXml, options: {
5
- primaryLocale?: Intl.Locale | null;
6
- }): Promise<SegmentationResult>;
6
+ primaryLocale?: Intl.Locale | null | undefined;
7
+ }): Promise<{
8
+ result: SegmentationResult["sentences"];
9
+ mapping: Mapping;
10
+ }>;
7
11
 
8
12
  export { getXhtmlSegmentation };
@@ -1,8 +1,12 @@
1
1
  import { SegmentationResult } from '@echogarden/text-segmentation';
2
2
  import { ParsedXml } from '@storyteller-platform/epub';
3
+ import { Mapping } from './map.js';
3
4
 
4
5
  declare function getXhtmlSegmentation(xml: ParsedXml, options: {
5
- primaryLocale?: Intl.Locale | null;
6
- }): Promise<SegmentationResult>;
6
+ primaryLocale?: Intl.Locale | null | undefined;
7
+ }): Promise<{
8
+ result: SegmentationResult["sentences"];
9
+ mapping: Mapping;
10
+ }>;
7
11
 
8
12
  export { getXhtmlSegmentation };