document-ir 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
3
+ export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
4
+ protected sticker(node: StickerNode): Promise<Node | null>;
5
+ protected bubble(node: BubbleNode): Promise<Node | null>;
6
+ protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
7
+ protected columns(node: ColumnsNode): Promise<Node | null>;
8
+ protected quote(node: QuoteNode): Promise<Node | null>;
9
+ protected image(node: ImageNode): Promise<Node | null>;
10
+ protected figureImage(node: FigureImageNode): Promise<Node | null>;
11
+ protected video(node: VideoNode): Promise<Node | null>;
12
+ protected definitionList(node: DefinitionListNode): Promise<Node | null>;
13
+ protected redacted(_node: RedactedNode): Promise<Node | null>;
14
+ protected note(node: NoteNode): Promise<Node | null>;
15
+ protected card(node: CardNode): Promise<Node | null>;
16
+ }
@@ -0,0 +1,246 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ export class DocumentThinningTransformer extends ArrayCollapseTransformer {
3
+ async sticker(node) {
4
+ if (node.content.length == 0) {
5
+ return null;
6
+ }
7
+ const content = await this.chooseChildren(node.content);
8
+ if (!content) {
9
+ return null;
10
+ }
11
+ return {
12
+ type: "paragraph",
13
+ content,
14
+ };
15
+ }
16
+ async bubble(node) {
17
+ if (node.content.length == 0) {
18
+ return null;
19
+ }
20
+ const content = await this.chooseChildren(node.content);
21
+ if (!content) {
22
+ return null;
23
+ }
24
+ return {
25
+ type: "paragraph",
26
+ content,
27
+ };
28
+ }
29
+ async highTechAlert(node) {
30
+ if (node.content.length == 0) {
31
+ return null;
32
+ }
33
+ const content = await this.chooseChildren(node.content);
34
+ if (!content) {
35
+ return null;
36
+ }
37
+ return {
38
+ type: "array",
39
+ content,
40
+ };
41
+ }
42
+ async columns(node) {
43
+ const flattened = node.columns.flat();
44
+ if (flattened.length == 0) {
45
+ return null;
46
+ }
47
+ const content = await this.chooseChildren(flattened);
48
+ if (!content) {
49
+ return null;
50
+ }
51
+ return {
52
+ type: "array",
53
+ content,
54
+ };
55
+ }
56
+ async quote(node) {
57
+ if (node.content.length == 0) {
58
+ return null;
59
+ }
60
+ const content = await this.chooseChildren(node.content);
61
+ if (!content) {
62
+ return null;
63
+ }
64
+ return {
65
+ type: "array",
66
+ content,
67
+ };
68
+ }
69
+ image(node) {
70
+ return Promise.resolve({
71
+ type: "paragraph",
72
+ content: [{
73
+ type: "text",
74
+ text: "inline image: ",
75
+ }, {
76
+ type: "text",
77
+ text: node.alt,
78
+ }],
79
+ });
80
+ }
81
+ async figureImage(node) {
82
+ const image = {
83
+ type: "paragraph",
84
+ content: [{
85
+ type: "text",
86
+ text: "inline image: ",
87
+ }, {
88
+ type: "text",
89
+ text: node.alt,
90
+ }],
91
+ };
92
+ if (node.content) {
93
+ const content = await this.chooseChildren(node.content);
94
+ return {
95
+ type: "array",
96
+ content: [
97
+ image,
98
+ ...content,
99
+ ],
100
+ };
101
+ }
102
+ else {
103
+ return image;
104
+ }
105
+ }
106
+ async video(node) {
107
+ const video = {
108
+ type: "paragraph",
109
+ content: [{
110
+ type: "text",
111
+ text: "inline video: ",
112
+ }, {
113
+ type: "text",
114
+ text: node.alt,
115
+ }],
116
+ };
117
+ if (node.content) {
118
+ const content = await this.chooseChildren(node.content);
119
+ return {
120
+ type: "array",
121
+ content: [
122
+ video,
123
+ ...content,
124
+ ],
125
+ };
126
+ }
127
+ else {
128
+ return video;
129
+ }
130
+ }
131
+ async definitionList(node) {
132
+ const content = [];
133
+ for (const d of node.content) {
134
+ const defContent = [];
135
+ const title = await this.chooseChildren(d.title);
136
+ if (title) {
137
+ for (const n of title) {
138
+ defContent.push(n);
139
+ }
140
+ }
141
+ defContent.push({ type: "text", text: " " });
142
+ const abbreviation = await this.chooseChildren(d.abbreviation);
143
+ if (abbreviation) {
144
+ for (const n of abbreviation) {
145
+ defContent.push(n);
146
+ }
147
+ }
148
+ if (d.content.length > 0 && d.content[0].type != "paragraph") {
149
+ defContent.push({ type: "text", text: " " });
150
+ const def = await this.chooseChildren(d.content);
151
+ if (def) {
152
+ for (const n of def) {
153
+ defContent.push(n);
154
+ }
155
+ }
156
+ }
157
+ content.push({
158
+ type: "paragraph",
159
+ content: defContent,
160
+ });
161
+ if (d.content.length > 0 && d.content[0].type == "paragraph") {
162
+ const def = await this.chooseChildren(d.content);
163
+ if (def) {
164
+ for (const n of def) {
165
+ content.push(n);
166
+ }
167
+ }
168
+ }
169
+ }
170
+ return {
171
+ type: "array",
172
+ content,
173
+ };
174
+ }
175
+ // deno-lint-ignore require-await
176
+ async redacted(_node) {
177
+ return null;
178
+ }
179
+ async note(node) {
180
+ if (node.content.length == 0) {
181
+ return null;
182
+ }
183
+ const content = await this.chooseChildren(node.content);
184
+ if (!content) {
185
+ return null;
186
+ }
187
+ return {
188
+ type: "paragraph",
189
+ content: [
190
+ { type: "text", text: "Note: " },
191
+ ...content,
192
+ ],
193
+ };
194
+ }
195
+ async card(node) {
196
+ const content = [];
197
+ if (node.header) {
198
+ const title = await this.chooseChildren(node.header.title);
199
+ if (title.length > 0) {
200
+ content.push({
201
+ type: "paragraph",
202
+ content: title,
203
+ });
204
+ }
205
+ }
206
+ if (node.content) {
207
+ const card = await this.chooseChildren(node.content.content);
208
+ for (const c of card) {
209
+ content.push(c);
210
+ }
211
+ }
212
+ if (node.media) {
213
+ for (const media of node.media.content) {
214
+ const m = await this.choose(media);
215
+ if (m) {
216
+ content.push(m);
217
+ }
218
+ }
219
+ }
220
+ if (node.attribution) {
221
+ const attribution = [];
222
+ if (node.attribution.title) {
223
+ const title = await this.chooseChildren(node.attribution.title);
224
+ for (const n of title) {
225
+ content.push(n);
226
+ }
227
+ }
228
+ if (node.attribution.date) {
229
+ if (content.length > 0) {
230
+ content.push({ type: "text", text: " " });
231
+ }
232
+ content.push({ type: "text", text: `${node.attribution.date}` });
233
+ }
234
+ if (attribution.length > 0) {
235
+ content.push({
236
+ type: "paragraph",
237
+ content: attribution,
238
+ });
239
+ }
240
+ }
241
+ return {
242
+ type: "array",
243
+ content: content,
244
+ };
245
+ }
246
+ }
@@ -207,11 +207,15 @@ export class IdentityTransformer {
207
207
  await this.beforeBlock();
208
208
  const content = await this.chooseChildren(node.content);
209
209
  await this.afterBlock();
210
- return {
210
+ const result = {
211
211
  type: "header",
212
212
  content,
213
213
  level: node.level || 2,
214
214
  };
215
+ if (node.htmlId) {
216
+ result.htmlId = node.htmlId;
217
+ }
218
+ return result;
215
219
  }
216
220
  async highTechAlert(node) {
217
221
  await this.beforeBlock();
@@ -0,0 +1,6 @@
1
+ import { IdentityTransformer } from "./index.js";
2
+ import { DocumentNode } from "./types.js";
3
+ export declare class WordCounterTransformer extends IdentityTransformer {
4
+ constructor();
5
+ transform(node: DocumentNode): Promise<DocumentNode>;
6
+ }
@@ -0,0 +1,83 @@
1
+ import { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
2
+ import { WordCounterVisitor } from "./WordCounterVisitor.js";
3
+ import { IdentityTransformer, TextVisitor } from "./index.js";
4
+ function convertHierarchy(parent) {
5
+ const docHierarchy = {
6
+ headerText: parent.header,
7
+ headerId: parent.headerId,
8
+ words: 0,
9
+ totalWords: 0,
10
+ children: [],
11
+ };
12
+ const visitor = new WordCounterVisitor();
13
+ for (const node of parent.nodes) {
14
+ visitor.visit(node);
15
+ }
16
+ docHierarchy.words = visitor.getCount();
17
+ docHierarchy.totalWords = docHierarchy.words;
18
+ for (const child of parent.children) {
19
+ const childHierarchy = convertHierarchy(child);
20
+ docHierarchy.children.push(childHierarchy);
21
+ docHierarchy.totalWords += childHierarchy.totalWords;
22
+ }
23
+ return docHierarchy;
24
+ }
25
+ export class WordCounterTransformer extends IdentityTransformer {
26
+ constructor() {
27
+ super();
28
+ }
29
+ async transform(node) {
30
+ // Isolate it
31
+ const jsonNode = JSON.parse(JSON.stringify(node));
32
+ const thinned = await new DocumentThinningTransformer().transform(jsonNode);
33
+ const stack = [];
34
+ const root = {
35
+ header: node.title,
36
+ headerId: "title",
37
+ nodes: [],
38
+ children: [],
39
+ depth: 1,
40
+ };
41
+ stack.push(root);
42
+ let depth = 1;
43
+ for (const node of thinned.content) {
44
+ if (node.type == "header") {
45
+ if (node.level == 1) {
46
+ // never pop the root
47
+ continue;
48
+ }
49
+ else if (node.level <= depth) {
50
+ for (let i = stack.length - 1; i > 0; i--) {
51
+ if (stack[i].depth >= node.level) {
52
+ stack.pop();
53
+ }
54
+ }
55
+ }
56
+ const visitor = new TextVisitor();
57
+ visitor.visit(node);
58
+ const h = {
59
+ header: visitor.getText(),
60
+ depth: node.level,
61
+ children: [],
62
+ nodes: [],
63
+ };
64
+ if (node.htmlId) {
65
+ h.headerId = node.htmlId;
66
+ }
67
+ stack[stack.length - 1].children.push(h);
68
+ stack.push(h);
69
+ depth = node.level;
70
+ }
71
+ else {
72
+ stack[stack.length - 1].nodes.push(node);
73
+ }
74
+ }
75
+ // The transformer does not actually walk through the document tree
76
+ // We just append a newly calculated hierarchy object
77
+ const doc = {
78
+ ...node,
79
+ };
80
+ doc.hierarchy = convertHierarchy(root);
81
+ return doc;
82
+ }
83
+ }
@@ -0,0 +1,13 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ import { DocumentNode, TextNode } from "./types.js";
3
+ export declare class WordCounterVisitor extends NodeVisitor {
4
+ private count;
5
+ private texts;
6
+ constructor();
7
+ private countText;
8
+ protected beforeBlock(): void;
9
+ protected afterBlock(): void;
10
+ protected text(node: TextNode): void;
11
+ protected document(node: DocumentNode): void;
12
+ getCount(): number;
13
+ }
@@ -0,0 +1,43 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ export class WordCounterVisitor extends NodeVisitor {
3
+ constructor() {
4
+ super();
5
+ Object.defineProperty(this, "count", {
6
+ enumerable: true,
7
+ configurable: true,
8
+ writable: true,
9
+ value: void 0
10
+ });
11
+ Object.defineProperty(this, "texts", {
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true,
15
+ value: void 0
16
+ });
17
+ this.count = 0;
18
+ this.texts = [];
19
+ }
20
+ countText() {
21
+ if (this.texts.length > 0) {
22
+ this.count += this.texts.join("").split(" ").length;
23
+ this.texts = [];
24
+ }
25
+ }
26
+ beforeBlock() {
27
+ this.countText();
28
+ }
29
+ afterBlock() {
30
+ this.countText();
31
+ }
32
+ text(node) {
33
+ this.texts.push(node.text);
34
+ }
35
+ document(node) {
36
+ super.document(node);
37
+ this.countText();
38
+ }
39
+ getCount() {
40
+ this.countText();
41
+ return this.count;
42
+ }
43
+ }
package/esm/index.d.ts CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/esm/index.js CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "module": "./esm/index.js",
3
3
  "main": "./script/index.js",
4
4
  "name": "document-ir",
5
- "version": "0.0.10",
5
+ "version": "0.0.12",
6
6
  "description": "Intermediate representation and transformers for documents",
7
7
  "license": "MIT",
8
8
  "repository": {
@@ -0,0 +1,16 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
3
+ export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
4
+ protected sticker(node: StickerNode): Promise<Node | null>;
5
+ protected bubble(node: BubbleNode): Promise<Node | null>;
6
+ protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
7
+ protected columns(node: ColumnsNode): Promise<Node | null>;
8
+ protected quote(node: QuoteNode): Promise<Node | null>;
9
+ protected image(node: ImageNode): Promise<Node | null>;
10
+ protected figureImage(node: FigureImageNode): Promise<Node | null>;
11
+ protected video(node: VideoNode): Promise<Node | null>;
12
+ protected definitionList(node: DefinitionListNode): Promise<Node | null>;
13
+ protected redacted(_node: RedactedNode): Promise<Node | null>;
14
+ protected note(node: NoteNode): Promise<Node | null>;
15
+ protected card(node: CardNode): Promise<Node | null>;
16
+ }
@@ -0,0 +1,250 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DocumentThinningTransformer = void 0;
4
+ const index_js_1 = require("./index.js");
5
+ class DocumentThinningTransformer extends index_js_1.ArrayCollapseTransformer {
6
+ async sticker(node) {
7
+ if (node.content.length == 0) {
8
+ return null;
9
+ }
10
+ const content = await this.chooseChildren(node.content);
11
+ if (!content) {
12
+ return null;
13
+ }
14
+ return {
15
+ type: "paragraph",
16
+ content,
17
+ };
18
+ }
19
+ async bubble(node) {
20
+ if (node.content.length == 0) {
21
+ return null;
22
+ }
23
+ const content = await this.chooseChildren(node.content);
24
+ if (!content) {
25
+ return null;
26
+ }
27
+ return {
28
+ type: "paragraph",
29
+ content,
30
+ };
31
+ }
32
+ async highTechAlert(node) {
33
+ if (node.content.length == 0) {
34
+ return null;
35
+ }
36
+ const content = await this.chooseChildren(node.content);
37
+ if (!content) {
38
+ return null;
39
+ }
40
+ return {
41
+ type: "array",
42
+ content,
43
+ };
44
+ }
45
+ async columns(node) {
46
+ const flattened = node.columns.flat();
47
+ if (flattened.length == 0) {
48
+ return null;
49
+ }
50
+ const content = await this.chooseChildren(flattened);
51
+ if (!content) {
52
+ return null;
53
+ }
54
+ return {
55
+ type: "array",
56
+ content,
57
+ };
58
+ }
59
+ async quote(node) {
60
+ if (node.content.length == 0) {
61
+ return null;
62
+ }
63
+ const content = await this.chooseChildren(node.content);
64
+ if (!content) {
65
+ return null;
66
+ }
67
+ return {
68
+ type: "array",
69
+ content,
70
+ };
71
+ }
72
+ image(node) {
73
+ return Promise.resolve({
74
+ type: "paragraph",
75
+ content: [{
76
+ type: "text",
77
+ text: "inline image: ",
78
+ }, {
79
+ type: "text",
80
+ text: node.alt,
81
+ }],
82
+ });
83
+ }
84
+ async figureImage(node) {
85
+ const image = {
86
+ type: "paragraph",
87
+ content: [{
88
+ type: "text",
89
+ text: "inline image: ",
90
+ }, {
91
+ type: "text",
92
+ text: node.alt,
93
+ }],
94
+ };
95
+ if (node.content) {
96
+ const content = await this.chooseChildren(node.content);
97
+ return {
98
+ type: "array",
99
+ content: [
100
+ image,
101
+ ...content,
102
+ ],
103
+ };
104
+ }
105
+ else {
106
+ return image;
107
+ }
108
+ }
109
+ async video(node) {
110
+ const video = {
111
+ type: "paragraph",
112
+ content: [{
113
+ type: "text",
114
+ text: "inline video: ",
115
+ }, {
116
+ type: "text",
117
+ text: node.alt,
118
+ }],
119
+ };
120
+ if (node.content) {
121
+ const content = await this.chooseChildren(node.content);
122
+ return {
123
+ type: "array",
124
+ content: [
125
+ video,
126
+ ...content,
127
+ ],
128
+ };
129
+ }
130
+ else {
131
+ return video;
132
+ }
133
+ }
134
+ async definitionList(node) {
135
+ const content = [];
136
+ for (const d of node.content) {
137
+ const defContent = [];
138
+ const title = await this.chooseChildren(d.title);
139
+ if (title) {
140
+ for (const n of title) {
141
+ defContent.push(n);
142
+ }
143
+ }
144
+ defContent.push({ type: "text", text: " " });
145
+ const abbreviation = await this.chooseChildren(d.abbreviation);
146
+ if (abbreviation) {
147
+ for (const n of abbreviation) {
148
+ defContent.push(n);
149
+ }
150
+ }
151
+ if (d.content.length > 0 && d.content[0].type != "paragraph") {
152
+ defContent.push({ type: "text", text: " " });
153
+ const def = await this.chooseChildren(d.content);
154
+ if (def) {
155
+ for (const n of def) {
156
+ defContent.push(n);
157
+ }
158
+ }
159
+ }
160
+ content.push({
161
+ type: "paragraph",
162
+ content: defContent,
163
+ });
164
+ if (d.content.length > 0 && d.content[0].type == "paragraph") {
165
+ const def = await this.chooseChildren(d.content);
166
+ if (def) {
167
+ for (const n of def) {
168
+ content.push(n);
169
+ }
170
+ }
171
+ }
172
+ }
173
+ return {
174
+ type: "array",
175
+ content,
176
+ };
177
+ }
178
+ // deno-lint-ignore require-await
179
+ async redacted(_node) {
180
+ return null;
181
+ }
182
+ async note(node) {
183
+ if (node.content.length == 0) {
184
+ return null;
185
+ }
186
+ const content = await this.chooseChildren(node.content);
187
+ if (!content) {
188
+ return null;
189
+ }
190
+ return {
191
+ type: "paragraph",
192
+ content: [
193
+ { type: "text", text: "Note: " },
194
+ ...content,
195
+ ],
196
+ };
197
+ }
198
+ async card(node) {
199
+ const content = [];
200
+ if (node.header) {
201
+ const title = await this.chooseChildren(node.header.title);
202
+ if (title.length > 0) {
203
+ content.push({
204
+ type: "paragraph",
205
+ content: title,
206
+ });
207
+ }
208
+ }
209
+ if (node.content) {
210
+ const card = await this.chooseChildren(node.content.content);
211
+ for (const c of card) {
212
+ content.push(c);
213
+ }
214
+ }
215
+ if (node.media) {
216
+ for (const media of node.media.content) {
217
+ const m = await this.choose(media);
218
+ if (m) {
219
+ content.push(m);
220
+ }
221
+ }
222
+ }
223
+ if (node.attribution) {
224
+ const attribution = [];
225
+ if (node.attribution.title) {
226
+ const title = await this.chooseChildren(node.attribution.title);
227
+ for (const n of title) {
228
+ content.push(n);
229
+ }
230
+ }
231
+ if (node.attribution.date) {
232
+ if (content.length > 0) {
233
+ content.push({ type: "text", text: " " });
234
+ }
235
+ content.push({ type: "text", text: `${node.attribution.date}` });
236
+ }
237
+ if (attribution.length > 0) {
238
+ content.push({
239
+ type: "paragraph",
240
+ content: attribution,
241
+ });
242
+ }
243
+ }
244
+ return {
245
+ type: "array",
246
+ content: content,
247
+ };
248
+ }
249
+ }
250
+ exports.DocumentThinningTransformer = DocumentThinningTransformer;
@@ -210,11 +210,15 @@ class IdentityTransformer {
210
210
  await this.beforeBlock();
211
211
  const content = await this.chooseChildren(node.content);
212
212
  await this.afterBlock();
213
- return {
213
+ const result = {
214
214
  type: "header",
215
215
  content,
216
216
  level: node.level || 2,
217
217
  };
218
+ if (node.htmlId) {
219
+ result.htmlId = node.htmlId;
220
+ }
221
+ return result;
218
222
  }
219
223
  async highTechAlert(node) {
220
224
  await this.beforeBlock();
@@ -0,0 +1,6 @@
1
+ import { IdentityTransformer } from "./index.js";
2
+ import { DocumentNode } from "./types.js";
3
+ export declare class WordCounterTransformer extends IdentityTransformer {
4
+ constructor();
5
+ transform(node: DocumentNode): Promise<DocumentNode>;
6
+ }
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WordCounterTransformer = void 0;
4
+ const DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
5
+ const WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
6
+ const index_js_1 = require("./index.js");
7
+ function convertHierarchy(parent) {
8
+ const docHierarchy = {
9
+ headerText: parent.header,
10
+ headerId: parent.headerId,
11
+ words: 0,
12
+ totalWords: 0,
13
+ children: [],
14
+ };
15
+ const visitor = new WordCounterVisitor_js_1.WordCounterVisitor();
16
+ for (const node of parent.nodes) {
17
+ visitor.visit(node);
18
+ }
19
+ docHierarchy.words = visitor.getCount();
20
+ docHierarchy.totalWords = docHierarchy.words;
21
+ for (const child of parent.children) {
22
+ const childHierarchy = convertHierarchy(child);
23
+ docHierarchy.children.push(childHierarchy);
24
+ docHierarchy.totalWords += childHierarchy.totalWords;
25
+ }
26
+ return docHierarchy;
27
+ }
28
+ class WordCounterTransformer extends index_js_1.IdentityTransformer {
29
+ constructor() {
30
+ super();
31
+ }
32
+ async transform(node) {
33
+ // Isolate it
34
+ const jsonNode = JSON.parse(JSON.stringify(node));
35
+ const thinned = await new DocumentThinningTransformer_js_1.DocumentThinningTransformer().transform(jsonNode);
36
+ const stack = [];
37
+ const root = {
38
+ header: node.title,
39
+ headerId: "title",
40
+ nodes: [],
41
+ children: [],
42
+ depth: 1,
43
+ };
44
+ stack.push(root);
45
+ let depth = 1;
46
+ for (const node of thinned.content) {
47
+ if (node.type == "header") {
48
+ if (node.level == 1) {
49
+ // never pop the root
50
+ continue;
51
+ }
52
+ else if (node.level <= depth) {
53
+ for (let i = stack.length - 1; i > 0; i--) {
54
+ if (stack[i].depth >= node.level) {
55
+ stack.pop();
56
+ }
57
+ }
58
+ }
59
+ const visitor = new index_js_1.TextVisitor();
60
+ visitor.visit(node);
61
+ const h = {
62
+ header: visitor.getText(),
63
+ depth: node.level,
64
+ children: [],
65
+ nodes: [],
66
+ };
67
+ if (node.htmlId) {
68
+ h.headerId = node.htmlId;
69
+ }
70
+ stack[stack.length - 1].children.push(h);
71
+ stack.push(h);
72
+ depth = node.level;
73
+ }
74
+ else {
75
+ stack[stack.length - 1].nodes.push(node);
76
+ }
77
+ }
78
+ // The transformer does not actually walk through the document tree
79
+ // We just append a newly calculated hierarchy object
80
+ const doc = {
81
+ ...node,
82
+ };
83
+ doc.hierarchy = convertHierarchy(root);
84
+ return doc;
85
+ }
86
+ }
87
+ exports.WordCounterTransformer = WordCounterTransformer;
@@ -0,0 +1,13 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ import { DocumentNode, TextNode } from "./types.js";
3
+ export declare class WordCounterVisitor extends NodeVisitor {
4
+ private count;
5
+ private texts;
6
+ constructor();
7
+ private countText;
8
+ protected beforeBlock(): void;
9
+ protected afterBlock(): void;
10
+ protected text(node: TextNode): void;
11
+ protected document(node: DocumentNode): void;
12
+ getCount(): number;
13
+ }
@@ -0,0 +1,47 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WordCounterVisitor = void 0;
4
+ const index_js_1 = require("./index.js");
5
+ class WordCounterVisitor extends index_js_1.NodeVisitor {
6
+ constructor() {
7
+ super();
8
+ Object.defineProperty(this, "count", {
9
+ enumerable: true,
10
+ configurable: true,
11
+ writable: true,
12
+ value: void 0
13
+ });
14
+ Object.defineProperty(this, "texts", {
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true,
18
+ value: void 0
19
+ });
20
+ this.count = 0;
21
+ this.texts = [];
22
+ }
23
+ countText() {
24
+ if (this.texts.length > 0) {
25
+ this.count += this.texts.join("").split(" ").length;
26
+ this.texts = [];
27
+ }
28
+ }
29
+ beforeBlock() {
30
+ this.countText();
31
+ }
32
+ afterBlock() {
33
+ this.countText();
34
+ }
35
+ text(node) {
36
+ this.texts.push(node.text);
37
+ }
38
+ document(node) {
39
+ super.document(node);
40
+ this.countText();
41
+ }
42
+ getCount() {
43
+ this.countText();
44
+ return this.count;
45
+ }
46
+ }
47
+ exports.WordCounterVisitor = WordCounterVisitor;
package/script/index.d.ts CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/script/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
17
+ exports.DocumentThinningTransformer = exports.WordCounterVisitor = exports.WordCounterTransformer = exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
18
18
  __exportStar(require("./types.js"), exports);
19
19
  var ArrayCollapseTransformer_js_1 = require("./ArrayCollapseTransformer.js");
20
20
  Object.defineProperty(exports, "ArrayCollapseTransformer", { enumerable: true, get: function () { return ArrayCollapseTransformer_js_1.ArrayCollapseTransformer; } });
@@ -30,3 +30,9 @@ var NodeVisitor_js_1 = require("./NodeVisitor.js");
30
30
  Object.defineProperty(exports, "NodeVisitor", { enumerable: true, get: function () { return NodeVisitor_js_1.NodeVisitor; } });
31
31
  var TextVisitor_js_1 = require("./TextVisitor.js");
32
32
  Object.defineProperty(exports, "TextVisitor", { enumerable: true, get: function () { return TextVisitor_js_1.TextVisitor; } });
33
+ var WordCountTransformer_js_1 = require("./WordCountTransformer.js");
34
+ Object.defineProperty(exports, "WordCounterTransformer", { enumerable: true, get: function () { return WordCountTransformer_js_1.WordCounterTransformer; } });
35
+ var WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
36
+ Object.defineProperty(exports, "WordCounterVisitor", { enumerable: true, get: function () { return WordCounterVisitor_js_1.WordCounterVisitor; } });
37
+ var DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
38
+ Object.defineProperty(exports, "DocumentThinningTransformer", { enumerable: true, get: function () { return DocumentThinningTransformer_js_1.DocumentThinningTransformer; } });