document-ir 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
3
+ export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
4
+ protected sticker(node: StickerNode): Promise<Node | null>;
5
+ protected bubble(node: BubbleNode): Promise<Node | null>;
6
+ protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
7
+ protected columns(node: ColumnsNode): Promise<Node | null>;
8
+ protected quote(node: QuoteNode): Promise<Node | null>;
9
+ protected image(node: ImageNode): Promise<Node | null>;
10
+ protected figureImage(node: FigureImageNode): Promise<Node | null>;
11
+ protected video(node: VideoNode): Promise<Node | null>;
12
+ protected definitionList(node: DefinitionListNode): Promise<Node | null>;
13
+ protected redacted(_node: RedactedNode): Promise<Node | null>;
14
+ protected note(node: NoteNode): Promise<Node | null>;
15
+ protected card(node: CardNode): Promise<Node | null>;
16
+ }
@@ -0,0 +1,246 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ export class DocumentThinningTransformer extends ArrayCollapseTransformer {
3
+ async sticker(node) {
4
+ if (node.content.length == 0) {
5
+ return null;
6
+ }
7
+ const content = await this.chooseChildren(node.content);
8
+ if (!content) {
9
+ return null;
10
+ }
11
+ return {
12
+ type: "paragraph",
13
+ content,
14
+ };
15
+ }
16
+ async bubble(node) {
17
+ if (node.content.length == 0) {
18
+ return null;
19
+ }
20
+ const content = await this.chooseChildren(node.content);
21
+ if (!content) {
22
+ return null;
23
+ }
24
+ return {
25
+ type: "paragraph",
26
+ content,
27
+ };
28
+ }
29
+ async highTechAlert(node) {
30
+ if (node.content.length == 0) {
31
+ return null;
32
+ }
33
+ const content = await this.chooseChildren(node.content);
34
+ if (!content) {
35
+ return null;
36
+ }
37
+ return {
38
+ type: "array",
39
+ content,
40
+ };
41
+ }
42
+ async columns(node) {
43
+ const flattened = node.columns.flat();
44
+ if (flattened.length == 0) {
45
+ return null;
46
+ }
47
+ const content = await this.chooseChildren(flattened);
48
+ if (!content) {
49
+ return null;
50
+ }
51
+ return {
52
+ type: "array",
53
+ content,
54
+ };
55
+ }
56
+ async quote(node) {
57
+ if (node.content.length == 0) {
58
+ return null;
59
+ }
60
+ const content = await this.chooseChildren(node.content);
61
+ if (!content) {
62
+ return null;
63
+ }
64
+ return {
65
+ type: "array",
66
+ content,
67
+ };
68
+ }
69
+ image(node) {
70
+ return Promise.resolve({
71
+ type: "paragraph",
72
+ content: [{
73
+ type: "text",
74
+ text: "inline image: ",
75
+ }, {
76
+ type: "text",
77
+ text: node.alt,
78
+ }],
79
+ });
80
+ }
81
+ async figureImage(node) {
82
+ const image = {
83
+ type: "paragraph",
84
+ content: [{
85
+ type: "text",
86
+ text: "inline image: ",
87
+ }, {
88
+ type: "text",
89
+ text: node.alt,
90
+ }],
91
+ };
92
+ if (node.content) {
93
+ const content = await this.chooseChildren(node.content);
94
+ return {
95
+ type: "array",
96
+ content: [
97
+ image,
98
+ ...content,
99
+ ],
100
+ };
101
+ }
102
+ else {
103
+ return image;
104
+ }
105
+ }
106
+ async video(node) {
107
+ const video = {
108
+ type: "paragraph",
109
+ content: [{
110
+ type: "text",
111
+ text: "inline video: ",
112
+ }, {
113
+ type: "text",
114
+ text: node.alt,
115
+ }],
116
+ };
117
+ if (node.content) {
118
+ const content = await this.chooseChildren(node.content);
119
+ return {
120
+ type: "array",
121
+ content: [
122
+ video,
123
+ ...content,
124
+ ],
125
+ };
126
+ }
127
+ else {
128
+ return video;
129
+ }
130
+ }
131
+ async definitionList(node) {
132
+ const content = [];
133
+ for (const d of node.content) {
134
+ const defContent = [];
135
+ const title = await this.chooseChildren(d.title);
136
+ if (title) {
137
+ for (const n of title) {
138
+ defContent.push(n);
139
+ }
140
+ }
141
+ defContent.push({ type: "text", text: " " });
142
+ const abbreviation = await this.chooseChildren(d.abbreviation);
143
+ if (abbreviation) {
144
+ for (const n of abbreviation) {
145
+ defContent.push(n);
146
+ }
147
+ }
148
+ if (d.content.length > 0 && d.content[0].type != "paragraph") {
149
+ defContent.push({ type: "text", text: " " });
150
+ const def = await this.chooseChildren(d.content);
151
+ if (def) {
152
+ for (const n of def) {
153
+ defContent.push(n);
154
+ }
155
+ }
156
+ }
157
+ content.push({
158
+ type: "paragraph",
159
+ content: defContent,
160
+ });
161
+ if (d.content.length > 0 && d.content[0].type == "paragraph") {
162
+ const def = await this.chooseChildren(d.content);
163
+ if (def) {
164
+ for (const n of def) {
165
+ content.push(n);
166
+ }
167
+ }
168
+ }
169
+ }
170
+ return {
171
+ type: "array",
172
+ content,
173
+ };
174
+ }
175
+ // deno-lint-ignore require-await
176
+ async redacted(_node) {
177
+ return null;
178
+ }
179
+ async note(node) {
180
+ if (node.content.length == 0) {
181
+ return null;
182
+ }
183
+ const content = await this.chooseChildren(node.content);
184
+ if (!content) {
185
+ return null;
186
+ }
187
+ return {
188
+ type: "paragraph",
189
+ content: [
190
+ { type: "text", text: "Note: " },
191
+ ...content,
192
+ ],
193
+ };
194
+ }
195
+ async card(node) {
196
+ const content = [];
197
+ if (node.header) {
198
+ const title = await this.chooseChildren(node.header.title);
199
+ if (title.length > 0) {
200
+ content.push({
201
+ type: "paragraph",
202
+ content: title,
203
+ });
204
+ }
205
+ }
206
+ if (node.content) {
207
+ const card = await this.chooseChildren(node.content.content);
208
+ for (const c of card) {
209
+ content.push(c);
210
+ }
211
+ }
212
+ if (node.media) {
213
+ for (const media of node.media.content) {
214
+ const m = await this.choose(media);
215
+ if (m) {
216
+ content.push(m);
217
+ }
218
+ }
219
+ }
220
+ if (node.attribution) {
221
+ const attribution = [];
222
+ if (node.attribution.title) {
223
+ const title = await this.chooseChildren(node.attribution.title);
224
+ for (const n of title) {
225
+ content.push(n);
226
+ }
227
+ }
228
+ if (node.attribution.date) {
229
+ if (content.length > 0) {
230
+ content.push({ type: "text", text: " " });
231
+ }
232
+ content.push({ type: "text", text: `${node.attribution.date}` });
233
+ }
234
+ if (attribution.length > 0) {
235
+ content.push({
236
+ type: "paragraph",
237
+ content: attribution,
238
+ });
239
+ }
240
+ }
241
+ return {
242
+ type: "array",
243
+ content: content,
244
+ };
245
+ }
246
+ }
@@ -0,0 +1,6 @@
1
+ import { IdentityTransformer } from "./index.js";
2
+ import { DocumentNode } from "./types.js";
3
+ export declare class WordCounterTransformer extends IdentityTransformer {
4
+ constructor();
5
+ transform(node: DocumentNode): Promise<DocumentNode>;
6
+ }
@@ -0,0 +1,83 @@
1
+ import { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
2
+ import { WordCounterVisitor } from "./WordCounterVisitor.js";
3
+ import { IdentityTransformer, TextVisitor } from "./index.js";
4
+ function convertHierarchy(parent) {
5
+ const docHierarchy = {
6
+ headerText: parent.header,
7
+ headerId: parent.headerId,
8
+ words: 0,
9
+ totalWords: 0,
10
+ children: [],
11
+ };
12
+ const visitor = new WordCounterVisitor();
13
+ for (const node of parent.nodes) {
14
+ visitor.visit(node);
15
+ }
16
+ docHierarchy.words = visitor.getCount();
17
+ docHierarchy.totalWords = docHierarchy.words;
18
+ for (const child of parent.children) {
19
+ const childHierarchy = convertHierarchy(child);
20
+ docHierarchy.children.push(childHierarchy);
21
+ docHierarchy.totalWords += childHierarchy.totalWords;
22
+ }
23
+ return docHierarchy;
24
+ }
25
+ export class WordCounterTransformer extends IdentityTransformer {
26
+ constructor() {
27
+ super();
28
+ }
29
+ async transform(node) {
30
+ // Isolate it
31
+ const jsonNode = JSON.parse(JSON.stringify(node));
32
+ const thinned = await new DocumentThinningTransformer().transform(jsonNode);
33
+ const stack = [];
34
+ const root = {
35
+ header: node.title,
36
+ headerId: "title",
37
+ nodes: [],
38
+ children: [],
39
+ depth: 1,
40
+ };
41
+ stack.push(root);
42
+ let depth = 1;
43
+ for (const node of thinned.content) {
44
+ if (node.type == "header") {
45
+ if (node.level == 1) {
46
+ // never pop the root
47
+ continue;
48
+ }
49
+ else if (node.level <= depth) {
50
+ for (let i = stack.length - 1; i > 0; i--) {
51
+ if (stack[i].depth >= node.level) {
52
+ stack.pop();
53
+ }
54
+ }
55
+ }
56
+ const visitor = new TextVisitor();
57
+ visitor.visit(node);
58
+ const h = {
59
+ header: visitor.getText(),
60
+ depth: node.level,
61
+ children: [],
62
+ nodes: [],
63
+ };
64
+ if (node.htmlId) {
65
+ h.headerId = node.htmlId;
66
+ }
67
+ stack[stack.length - 1].children.push(h);
68
+ stack.push(h);
69
+ depth = node.level;
70
+ }
71
+ else {
72
+ stack[stack.length - 1].nodes.push(node);
73
+ }
74
+ }
75
+ // The transformer does not actually walk through the document tree
76
+ // We just append a newly calculated hierarchy object
77
+ const doc = {
78
+ ...node,
79
+ };
80
+ doc.hierarchy = convertHierarchy(root);
81
+ return doc;
82
+ }
83
+ }
@@ -0,0 +1,13 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ import { DocumentNode, TextNode } from "./types.js";
3
+ export declare class WordCounterVisitor extends NodeVisitor {
4
+ private count;
5
+ private texts;
6
+ constructor();
7
+ private countText;
8
+ protected beforeBlock(): void;
9
+ protected afterBlock(): void;
10
+ protected text(node: TextNode): void;
11
+ protected document(node: DocumentNode): void;
12
+ getCount(): number;
13
+ }
@@ -0,0 +1,43 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ export class WordCounterVisitor extends NodeVisitor {
3
+ constructor() {
4
+ super();
5
+ Object.defineProperty(this, "count", {
6
+ enumerable: true,
7
+ configurable: true,
8
+ writable: true,
9
+ value: void 0
10
+ });
11
+ Object.defineProperty(this, "texts", {
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true,
15
+ value: void 0
16
+ });
17
+ this.count = 0;
18
+ this.texts = [];
19
+ }
20
+ countText() {
21
+ if (this.texts.length > 0) {
22
+ this.count += this.texts.join("").split(" ").length;
23
+ this.texts = [];
24
+ }
25
+ }
26
+ beforeBlock() {
27
+ this.countText();
28
+ }
29
+ afterBlock() {
30
+ this.countText();
31
+ }
32
+ text(node) {
33
+ this.texts.push(node.text);
34
+ }
35
+ document(node) {
36
+ super.document(node);
37
+ this.countText();
38
+ }
39
+ getCount() {
40
+ this.countText();
41
+ return this.count;
42
+ }
43
+ }
package/esm/index.d.ts CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/esm/index.js CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "module": "./esm/index.js",
3
3
  "main": "./script/index.js",
4
4
  "name": "document-ir",
5
- "version": "0.0.11",
5
+ "version": "0.0.12",
6
6
  "description": "Intermediate representation and transformers for documents",
7
7
  "license": "MIT",
8
8
  "repository": {
@@ -0,0 +1,16 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
3
+ export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
4
+ protected sticker(node: StickerNode): Promise<Node | null>;
5
+ protected bubble(node: BubbleNode): Promise<Node | null>;
6
+ protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
7
+ protected columns(node: ColumnsNode): Promise<Node | null>;
8
+ protected quote(node: QuoteNode): Promise<Node | null>;
9
+ protected image(node: ImageNode): Promise<Node | null>;
10
+ protected figureImage(node: FigureImageNode): Promise<Node | null>;
11
+ protected video(node: VideoNode): Promise<Node | null>;
12
+ protected definitionList(node: DefinitionListNode): Promise<Node | null>;
13
+ protected redacted(_node: RedactedNode): Promise<Node | null>;
14
+ protected note(node: NoteNode): Promise<Node | null>;
15
+ protected card(node: CardNode): Promise<Node | null>;
16
+ }
@@ -0,0 +1,250 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DocumentThinningTransformer = void 0;
4
+ const index_js_1 = require("./index.js");
5
+ class DocumentThinningTransformer extends index_js_1.ArrayCollapseTransformer {
6
+ async sticker(node) {
7
+ if (node.content.length == 0) {
8
+ return null;
9
+ }
10
+ const content = await this.chooseChildren(node.content);
11
+ if (!content) {
12
+ return null;
13
+ }
14
+ return {
15
+ type: "paragraph",
16
+ content,
17
+ };
18
+ }
19
+ async bubble(node) {
20
+ if (node.content.length == 0) {
21
+ return null;
22
+ }
23
+ const content = await this.chooseChildren(node.content);
24
+ if (!content) {
25
+ return null;
26
+ }
27
+ return {
28
+ type: "paragraph",
29
+ content,
30
+ };
31
+ }
32
+ async highTechAlert(node) {
33
+ if (node.content.length == 0) {
34
+ return null;
35
+ }
36
+ const content = await this.chooseChildren(node.content);
37
+ if (!content) {
38
+ return null;
39
+ }
40
+ return {
41
+ type: "array",
42
+ content,
43
+ };
44
+ }
45
+ async columns(node) {
46
+ const flattened = node.columns.flat();
47
+ if (flattened.length == 0) {
48
+ return null;
49
+ }
50
+ const content = await this.chooseChildren(flattened);
51
+ if (!content) {
52
+ return null;
53
+ }
54
+ return {
55
+ type: "array",
56
+ content,
57
+ };
58
+ }
59
+ async quote(node) {
60
+ if (node.content.length == 0) {
61
+ return null;
62
+ }
63
+ const content = await this.chooseChildren(node.content);
64
+ if (!content) {
65
+ return null;
66
+ }
67
+ return {
68
+ type: "array",
69
+ content,
70
+ };
71
+ }
72
+ image(node) {
73
+ return Promise.resolve({
74
+ type: "paragraph",
75
+ content: [{
76
+ type: "text",
77
+ text: "inline image: ",
78
+ }, {
79
+ type: "text",
80
+ text: node.alt,
81
+ }],
82
+ });
83
+ }
84
+ async figureImage(node) {
85
+ const image = {
86
+ type: "paragraph",
87
+ content: [{
88
+ type: "text",
89
+ text: "inline image: ",
90
+ }, {
91
+ type: "text",
92
+ text: node.alt,
93
+ }],
94
+ };
95
+ if (node.content) {
96
+ const content = await this.chooseChildren(node.content);
97
+ return {
98
+ type: "array",
99
+ content: [
100
+ image,
101
+ ...content,
102
+ ],
103
+ };
104
+ }
105
+ else {
106
+ return image;
107
+ }
108
+ }
109
+ async video(node) {
110
+ const video = {
111
+ type: "paragraph",
112
+ content: [{
113
+ type: "text",
114
+ text: "inline video: ",
115
+ }, {
116
+ type: "text",
117
+ text: node.alt,
118
+ }],
119
+ };
120
+ if (node.content) {
121
+ const content = await this.chooseChildren(node.content);
122
+ return {
123
+ type: "array",
124
+ content: [
125
+ video,
126
+ ...content,
127
+ ],
128
+ };
129
+ }
130
+ else {
131
+ return video;
132
+ }
133
+ }
134
+ async definitionList(node) {
135
+ const content = [];
136
+ for (const d of node.content) {
137
+ const defContent = [];
138
+ const title = await this.chooseChildren(d.title);
139
+ if (title) {
140
+ for (const n of title) {
141
+ defContent.push(n);
142
+ }
143
+ }
144
+ defContent.push({ type: "text", text: " " });
145
+ const abbreviation = await this.chooseChildren(d.abbreviation);
146
+ if (abbreviation) {
147
+ for (const n of abbreviation) {
148
+ defContent.push(n);
149
+ }
150
+ }
151
+ if (d.content.length > 0 && d.content[0].type != "paragraph") {
152
+ defContent.push({ type: "text", text: " " });
153
+ const def = await this.chooseChildren(d.content);
154
+ if (def) {
155
+ for (const n of def) {
156
+ defContent.push(n);
157
+ }
158
+ }
159
+ }
160
+ content.push({
161
+ type: "paragraph",
162
+ content: defContent,
163
+ });
164
+ if (d.content.length > 0 && d.content[0].type == "paragraph") {
165
+ const def = await this.chooseChildren(d.content);
166
+ if (def) {
167
+ for (const n of def) {
168
+ content.push(n);
169
+ }
170
+ }
171
+ }
172
+ }
173
+ return {
174
+ type: "array",
175
+ content,
176
+ };
177
+ }
178
+ // deno-lint-ignore require-await
179
+ async redacted(_node) {
180
+ return null;
181
+ }
182
+ async note(node) {
183
+ if (node.content.length == 0) {
184
+ return null;
185
+ }
186
+ const content = await this.chooseChildren(node.content);
187
+ if (!content) {
188
+ return null;
189
+ }
190
+ return {
191
+ type: "paragraph",
192
+ content: [
193
+ { type: "text", text: "Note: " },
194
+ ...content,
195
+ ],
196
+ };
197
+ }
198
+ async card(node) {
199
+ const content = [];
200
+ if (node.header) {
201
+ const title = await this.chooseChildren(node.header.title);
202
+ if (title.length > 0) {
203
+ content.push({
204
+ type: "paragraph",
205
+ content: title,
206
+ });
207
+ }
208
+ }
209
+ if (node.content) {
210
+ const card = await this.chooseChildren(node.content.content);
211
+ for (const c of card) {
212
+ content.push(c);
213
+ }
214
+ }
215
+ if (node.media) {
216
+ for (const media of node.media.content) {
217
+ const m = await this.choose(media);
218
+ if (m) {
219
+ content.push(m);
220
+ }
221
+ }
222
+ }
223
+ if (node.attribution) {
224
+ const attribution = [];
225
+ if (node.attribution.title) {
226
+ const title = await this.chooseChildren(node.attribution.title);
227
+ for (const n of title) {
228
+ content.push(n);
229
+ }
230
+ }
231
+ if (node.attribution.date) {
232
+ if (content.length > 0) {
233
+ content.push({ type: "text", text: " " });
234
+ }
235
+ content.push({ type: "text", text: `${node.attribution.date}` });
236
+ }
237
+ if (attribution.length > 0) {
238
+ content.push({
239
+ type: "paragraph",
240
+ content: attribution,
241
+ });
242
+ }
243
+ }
244
+ return {
245
+ type: "array",
246
+ content: content,
247
+ };
248
+ }
249
+ }
250
+ exports.DocumentThinningTransformer = DocumentThinningTransformer;
@@ -0,0 +1,6 @@
1
+ import { IdentityTransformer } from "./index.js";
2
+ import { DocumentNode } from "./types.js";
3
+ export declare class WordCounterTransformer extends IdentityTransformer {
4
+ constructor();
5
+ transform(node: DocumentNode): Promise<DocumentNode>;
6
+ }
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WordCounterTransformer = void 0;
4
+ const DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
5
+ const WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
6
+ const index_js_1 = require("./index.js");
7
+ function convertHierarchy(parent) {
8
+ const docHierarchy = {
9
+ headerText: parent.header,
10
+ headerId: parent.headerId,
11
+ words: 0,
12
+ totalWords: 0,
13
+ children: [],
14
+ };
15
+ const visitor = new WordCounterVisitor_js_1.WordCounterVisitor();
16
+ for (const node of parent.nodes) {
17
+ visitor.visit(node);
18
+ }
19
+ docHierarchy.words = visitor.getCount();
20
+ docHierarchy.totalWords = docHierarchy.words;
21
+ for (const child of parent.children) {
22
+ const childHierarchy = convertHierarchy(child);
23
+ docHierarchy.children.push(childHierarchy);
24
+ docHierarchy.totalWords += childHierarchy.totalWords;
25
+ }
26
+ return docHierarchy;
27
+ }
28
+ class WordCounterTransformer extends index_js_1.IdentityTransformer {
29
+ constructor() {
30
+ super();
31
+ }
32
+ async transform(node) {
33
+ // Isolate it
34
+ const jsonNode = JSON.parse(JSON.stringify(node));
35
+ const thinned = await new DocumentThinningTransformer_js_1.DocumentThinningTransformer().transform(jsonNode);
36
+ const stack = [];
37
+ const root = {
38
+ header: node.title,
39
+ headerId: "title",
40
+ nodes: [],
41
+ children: [],
42
+ depth: 1,
43
+ };
44
+ stack.push(root);
45
+ let depth = 1;
46
+ for (const node of thinned.content) {
47
+ if (node.type == "header") {
48
+ if (node.level == 1) {
49
+ // never pop the root
50
+ continue;
51
+ }
52
+ else if (node.level <= depth) {
53
+ for (let i = stack.length - 1; i > 0; i--) {
54
+ if (stack[i].depth >= node.level) {
55
+ stack.pop();
56
+ }
57
+ }
58
+ }
59
+ const visitor = new index_js_1.TextVisitor();
60
+ visitor.visit(node);
61
+ const h = {
62
+ header: visitor.getText(),
63
+ depth: node.level,
64
+ children: [],
65
+ nodes: [],
66
+ };
67
+ if (node.htmlId) {
68
+ h.headerId = node.htmlId;
69
+ }
70
+ stack[stack.length - 1].children.push(h);
71
+ stack.push(h);
72
+ depth = node.level;
73
+ }
74
+ else {
75
+ stack[stack.length - 1].nodes.push(node);
76
+ }
77
+ }
78
+ // The transformer does not actually walk through the document tree
79
+ // We just append a newly calculated hierarchy object
80
+ const doc = {
81
+ ...node,
82
+ };
83
+ doc.hierarchy = convertHierarchy(root);
84
+ return doc;
85
+ }
86
+ }
87
+ exports.WordCounterTransformer = WordCounterTransformer;
@@ -0,0 +1,13 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ import { DocumentNode, TextNode } from "./types.js";
3
+ export declare class WordCounterVisitor extends NodeVisitor {
4
+ private count;
5
+ private texts;
6
+ constructor();
7
+ private countText;
8
+ protected beforeBlock(): void;
9
+ protected afterBlock(): void;
10
+ protected text(node: TextNode): void;
11
+ protected document(node: DocumentNode): void;
12
+ getCount(): number;
13
+ }
@@ -0,0 +1,47 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WordCounterVisitor = void 0;
4
+ const index_js_1 = require("./index.js");
5
+ class WordCounterVisitor extends index_js_1.NodeVisitor {
6
+ constructor() {
7
+ super();
8
+ Object.defineProperty(this, "count", {
9
+ enumerable: true,
10
+ configurable: true,
11
+ writable: true,
12
+ value: void 0
13
+ });
14
+ Object.defineProperty(this, "texts", {
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true,
18
+ value: void 0
19
+ });
20
+ this.count = 0;
21
+ this.texts = [];
22
+ }
23
+ countText() {
24
+ if (this.texts.length > 0) {
25
+ this.count += this.texts.join("").split(" ").length;
26
+ this.texts = [];
27
+ }
28
+ }
29
+ beforeBlock() {
30
+ this.countText();
31
+ }
32
+ afterBlock() {
33
+ this.countText();
34
+ }
35
+ text(node) {
36
+ this.texts.push(node.text);
37
+ }
38
+ document(node) {
39
+ super.document(node);
40
+ this.countText();
41
+ }
42
+ getCount() {
43
+ this.countText();
44
+ return this.count;
45
+ }
46
+ }
47
+ exports.WordCounterVisitor = WordCounterVisitor;
package/script/index.d.ts CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/script/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
17
+ exports.DocumentThinningTransformer = exports.WordCounterVisitor = exports.WordCounterTransformer = exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
18
18
  __exportStar(require("./types.js"), exports);
19
19
  var ArrayCollapseTransformer_js_1 = require("./ArrayCollapseTransformer.js");
20
20
  Object.defineProperty(exports, "ArrayCollapseTransformer", { enumerable: true, get: function () { return ArrayCollapseTransformer_js_1.ArrayCollapseTransformer; } });
@@ -30,3 +30,9 @@ var NodeVisitor_js_1 = require("./NodeVisitor.js");
30
30
  Object.defineProperty(exports, "NodeVisitor", { enumerable: true, get: function () { return NodeVisitor_js_1.NodeVisitor; } });
31
31
  var TextVisitor_js_1 = require("./TextVisitor.js");
32
32
  Object.defineProperty(exports, "TextVisitor", { enumerable: true, get: function () { return TextVisitor_js_1.TextVisitor; } });
33
+ var WordCountTransformer_js_1 = require("./WordCountTransformer.js");
34
+ Object.defineProperty(exports, "WordCounterTransformer", { enumerable: true, get: function () { return WordCountTransformer_js_1.WordCounterTransformer; } });
35
+ var WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
36
+ Object.defineProperty(exports, "WordCounterVisitor", { enumerable: true, get: function () { return WordCounterVisitor_js_1.WordCounterVisitor; } });
37
+ var DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
38
+ Object.defineProperty(exports, "DocumentThinningTransformer", { enumerable: true, get: function () { return DocumentThinningTransformer_js_1.DocumentThinningTransformer; } });