document-ir 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/DocumentThinningTransformer.d.ts +16 -0
- package/esm/DocumentThinningTransformer.js +246 -0
- package/esm/IdentityTransformer.js +5 -1
- package/esm/WordCountTransformer.d.ts +6 -0
- package/esm/WordCountTransformer.js +83 -0
- package/esm/WordCounterVisitor.d.ts +13 -0
- package/esm/WordCounterVisitor.js +43 -0
- package/esm/index.d.ts +3 -0
- package/esm/index.js +3 -0
- package/package.json +1 -1
- package/script/DocumentThinningTransformer.d.ts +16 -0
- package/script/DocumentThinningTransformer.js +250 -0
- package/script/IdentityTransformer.js +5 -1
- package/script/WordCountTransformer.d.ts +6 -0
- package/script/WordCountTransformer.js +87 -0
- package/script/WordCounterVisitor.d.ts +13 -0
- package/script/WordCounterVisitor.js +47 -0
- package/script/index.d.ts +3 -0
- package/script/index.js +7 -1
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { ArrayCollapseTransformer } from "./index.js";
|
|
2
|
+
import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
|
|
3
|
+
export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
|
|
4
|
+
protected sticker(node: StickerNode): Promise<Node | null>;
|
|
5
|
+
protected bubble(node: BubbleNode): Promise<Node | null>;
|
|
6
|
+
protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
|
|
7
|
+
protected columns(node: ColumnsNode): Promise<Node | null>;
|
|
8
|
+
protected quote(node: QuoteNode): Promise<Node | null>;
|
|
9
|
+
protected image(node: ImageNode): Promise<Node | null>;
|
|
10
|
+
protected figureImage(node: FigureImageNode): Promise<Node | null>;
|
|
11
|
+
protected video(node: VideoNode): Promise<Node | null>;
|
|
12
|
+
protected definitionList(node: DefinitionListNode): Promise<Node | null>;
|
|
13
|
+
protected redacted(_node: RedactedNode): Promise<Node | null>;
|
|
14
|
+
protected note(node: NoteNode): Promise<Node | null>;
|
|
15
|
+
protected card(node: CardNode): Promise<Node | null>;
|
|
16
|
+
}
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
import { ArrayCollapseTransformer } from "./index.js";
|
|
2
|
+
export class DocumentThinningTransformer extends ArrayCollapseTransformer {
|
|
3
|
+
async sticker(node) {
|
|
4
|
+
if (node.content.length == 0) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
const content = await this.chooseChildren(node.content);
|
|
8
|
+
if (!content) {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
return {
|
|
12
|
+
type: "paragraph",
|
|
13
|
+
content,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
async bubble(node) {
|
|
17
|
+
if (node.content.length == 0) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
const content = await this.chooseChildren(node.content);
|
|
21
|
+
if (!content) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
return {
|
|
25
|
+
type: "paragraph",
|
|
26
|
+
content,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
async highTechAlert(node) {
|
|
30
|
+
if (node.content.length == 0) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
const content = await this.chooseChildren(node.content);
|
|
34
|
+
if (!content) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
type: "array",
|
|
39
|
+
content,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
async columns(node) {
|
|
43
|
+
const flattened = node.columns.flat();
|
|
44
|
+
if (flattened.length == 0) {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
const content = await this.chooseChildren(flattened);
|
|
48
|
+
if (!content) {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
type: "array",
|
|
53
|
+
content,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
async quote(node) {
|
|
57
|
+
if (node.content.length == 0) {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
const content = await this.chooseChildren(node.content);
|
|
61
|
+
if (!content) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
type: "array",
|
|
66
|
+
content,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
image(node) {
|
|
70
|
+
return Promise.resolve({
|
|
71
|
+
type: "paragraph",
|
|
72
|
+
content: [{
|
|
73
|
+
type: "text",
|
|
74
|
+
text: "inline image: ",
|
|
75
|
+
}, {
|
|
76
|
+
type: "text",
|
|
77
|
+
text: node.alt,
|
|
78
|
+
}],
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
async figureImage(node) {
|
|
82
|
+
const image = {
|
|
83
|
+
type: "paragraph",
|
|
84
|
+
content: [{
|
|
85
|
+
type: "text",
|
|
86
|
+
text: "inline image: ",
|
|
87
|
+
}, {
|
|
88
|
+
type: "text",
|
|
89
|
+
text: node.alt,
|
|
90
|
+
}],
|
|
91
|
+
};
|
|
92
|
+
if (node.content) {
|
|
93
|
+
const content = await this.chooseChildren(node.content);
|
|
94
|
+
return {
|
|
95
|
+
type: "array",
|
|
96
|
+
content: [
|
|
97
|
+
image,
|
|
98
|
+
...content,
|
|
99
|
+
],
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
return image;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
async video(node) {
|
|
107
|
+
const video = {
|
|
108
|
+
type: "paragraph",
|
|
109
|
+
content: [{
|
|
110
|
+
type: "text",
|
|
111
|
+
text: "inline video: ",
|
|
112
|
+
}, {
|
|
113
|
+
type: "text",
|
|
114
|
+
text: node.alt,
|
|
115
|
+
}],
|
|
116
|
+
};
|
|
117
|
+
if (node.content) {
|
|
118
|
+
const content = await this.chooseChildren(node.content);
|
|
119
|
+
return {
|
|
120
|
+
type: "array",
|
|
121
|
+
content: [
|
|
122
|
+
video,
|
|
123
|
+
...content,
|
|
124
|
+
],
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
return video;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
async definitionList(node) {
|
|
132
|
+
const content = [];
|
|
133
|
+
for (const d of node.content) {
|
|
134
|
+
const defContent = [];
|
|
135
|
+
const title = await this.chooseChildren(d.title);
|
|
136
|
+
if (title) {
|
|
137
|
+
for (const n of title) {
|
|
138
|
+
defContent.push(n);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
defContent.push({ type: "text", text: " " });
|
|
142
|
+
const abbreviation = await this.chooseChildren(d.abbreviation);
|
|
143
|
+
if (abbreviation) {
|
|
144
|
+
for (const n of abbreviation) {
|
|
145
|
+
defContent.push(n);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (d.content.length > 0 && d.content[0].type != "paragraph") {
|
|
149
|
+
defContent.push({ type: "text", text: " " });
|
|
150
|
+
const def = await this.chooseChildren(d.content);
|
|
151
|
+
if (def) {
|
|
152
|
+
for (const n of def) {
|
|
153
|
+
defContent.push(n);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
content.push({
|
|
158
|
+
type: "paragraph",
|
|
159
|
+
content: defContent,
|
|
160
|
+
});
|
|
161
|
+
if (d.content.length > 0 && d.content[0].type == "paragraph") {
|
|
162
|
+
const def = await this.chooseChildren(d.content);
|
|
163
|
+
if (def) {
|
|
164
|
+
for (const n of def) {
|
|
165
|
+
content.push(n);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
type: "array",
|
|
172
|
+
content,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// deno-lint-ignore require-await
|
|
176
|
+
async redacted(_node) {
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
async note(node) {
|
|
180
|
+
if (node.content.length == 0) {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
const content = await this.chooseChildren(node.content);
|
|
184
|
+
if (!content) {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
type: "paragraph",
|
|
189
|
+
content: [
|
|
190
|
+
{ type: "text", text: "Note: " },
|
|
191
|
+
...content,
|
|
192
|
+
],
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
async card(node) {
|
|
196
|
+
const content = [];
|
|
197
|
+
if (node.header) {
|
|
198
|
+
const title = await this.chooseChildren(node.header.title);
|
|
199
|
+
if (title.length > 0) {
|
|
200
|
+
content.push({
|
|
201
|
+
type: "paragraph",
|
|
202
|
+
content: title,
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
if (node.content) {
|
|
207
|
+
const card = await this.chooseChildren(node.content.content);
|
|
208
|
+
for (const c of card) {
|
|
209
|
+
content.push(c);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (node.media) {
|
|
213
|
+
for (const media of node.media.content) {
|
|
214
|
+
const m = await this.choose(media);
|
|
215
|
+
if (m) {
|
|
216
|
+
content.push(m);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
if (node.attribution) {
|
|
221
|
+
const attribution = [];
|
|
222
|
+
if (node.attribution.title) {
|
|
223
|
+
const title = await this.chooseChildren(node.attribution.title);
|
|
224
|
+
for (const n of title) {
|
|
225
|
+
content.push(n);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
if (node.attribution.date) {
|
|
229
|
+
if (content.length > 0) {
|
|
230
|
+
content.push({ type: "text", text: " " });
|
|
231
|
+
}
|
|
232
|
+
content.push({ type: "text", text: `${node.attribution.date}` });
|
|
233
|
+
}
|
|
234
|
+
if (attribution.length > 0) {
|
|
235
|
+
content.push({
|
|
236
|
+
type: "paragraph",
|
|
237
|
+
content: attribution,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return {
|
|
242
|
+
type: "array",
|
|
243
|
+
content: content,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
}
|
|
@@ -207,11 +207,15 @@ export class IdentityTransformer {
|
|
|
207
207
|
await this.beforeBlock();
|
|
208
208
|
const content = await this.chooseChildren(node.content);
|
|
209
209
|
await this.afterBlock();
|
|
210
|
-
|
|
210
|
+
const result = {
|
|
211
211
|
type: "header",
|
|
212
212
|
content,
|
|
213
213
|
level: node.level || 2,
|
|
214
214
|
};
|
|
215
|
+
if (node.htmlId) {
|
|
216
|
+
result.htmlId = node.htmlId;
|
|
217
|
+
}
|
|
218
|
+
return result;
|
|
215
219
|
}
|
|
216
220
|
async highTechAlert(node) {
|
|
217
221
|
await this.beforeBlock();
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
|
|
2
|
+
import { WordCounterVisitor } from "./WordCounterVisitor.js";
|
|
3
|
+
import { IdentityTransformer, TextVisitor } from "./index.js";
|
|
4
|
+
function convertHierarchy(parent) {
|
|
5
|
+
const docHierarchy = {
|
|
6
|
+
headerText: parent.header,
|
|
7
|
+
headerId: parent.headerId,
|
|
8
|
+
words: 0,
|
|
9
|
+
totalWords: 0,
|
|
10
|
+
children: [],
|
|
11
|
+
};
|
|
12
|
+
const visitor = new WordCounterVisitor();
|
|
13
|
+
for (const node of parent.nodes) {
|
|
14
|
+
visitor.visit(node);
|
|
15
|
+
}
|
|
16
|
+
docHierarchy.words = visitor.getCount();
|
|
17
|
+
docHierarchy.totalWords = docHierarchy.words;
|
|
18
|
+
for (const child of parent.children) {
|
|
19
|
+
const childHierarchy = convertHierarchy(child);
|
|
20
|
+
docHierarchy.children.push(childHierarchy);
|
|
21
|
+
docHierarchy.totalWords += childHierarchy.totalWords;
|
|
22
|
+
}
|
|
23
|
+
return docHierarchy;
|
|
24
|
+
}
|
|
25
|
+
export class WordCounterTransformer extends IdentityTransformer {
|
|
26
|
+
constructor() {
|
|
27
|
+
super();
|
|
28
|
+
}
|
|
29
|
+
async transform(node) {
|
|
30
|
+
// Isolate it
|
|
31
|
+
const jsonNode = JSON.parse(JSON.stringify(node));
|
|
32
|
+
const thinned = await new DocumentThinningTransformer().transform(jsonNode);
|
|
33
|
+
const stack = [];
|
|
34
|
+
const root = {
|
|
35
|
+
header: node.title,
|
|
36
|
+
headerId: "title",
|
|
37
|
+
nodes: [],
|
|
38
|
+
children: [],
|
|
39
|
+
depth: 1,
|
|
40
|
+
};
|
|
41
|
+
stack.push(root);
|
|
42
|
+
let depth = 1;
|
|
43
|
+
for (const node of thinned.content) {
|
|
44
|
+
if (node.type == "header") {
|
|
45
|
+
if (node.level == 1) {
|
|
46
|
+
// never pop the root
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
else if (node.level <= depth) {
|
|
50
|
+
for (let i = stack.length - 1; i > 0; i--) {
|
|
51
|
+
if (stack[i].depth >= node.level) {
|
|
52
|
+
stack.pop();
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const visitor = new TextVisitor();
|
|
57
|
+
visitor.visit(node);
|
|
58
|
+
const h = {
|
|
59
|
+
header: visitor.getText(),
|
|
60
|
+
depth: node.level,
|
|
61
|
+
children: [],
|
|
62
|
+
nodes: [],
|
|
63
|
+
};
|
|
64
|
+
if (node.htmlId) {
|
|
65
|
+
h.headerId = node.htmlId;
|
|
66
|
+
}
|
|
67
|
+
stack[stack.length - 1].children.push(h);
|
|
68
|
+
stack.push(h);
|
|
69
|
+
depth = node.level;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
stack[stack.length - 1].nodes.push(node);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// The transformer does not actually walk through the document tree
|
|
76
|
+
// We just append a newly calculated hierarchy object
|
|
77
|
+
const doc = {
|
|
78
|
+
...node,
|
|
79
|
+
};
|
|
80
|
+
doc.hierarchy = convertHierarchy(root);
|
|
81
|
+
return doc;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { NodeVisitor } from "./index.js";
|
|
2
|
+
import { DocumentNode, TextNode } from "./types.js";
|
|
3
|
+
export declare class WordCounterVisitor extends NodeVisitor {
|
|
4
|
+
private count;
|
|
5
|
+
private texts;
|
|
6
|
+
constructor();
|
|
7
|
+
private countText;
|
|
8
|
+
protected beforeBlock(): void;
|
|
9
|
+
protected afterBlock(): void;
|
|
10
|
+
protected text(node: TextNode): void;
|
|
11
|
+
protected document(node: DocumentNode): void;
|
|
12
|
+
getCount(): number;
|
|
13
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { NodeVisitor } from "./index.js";
|
|
2
|
+
export class WordCounterVisitor extends NodeVisitor {
|
|
3
|
+
constructor() {
|
|
4
|
+
super();
|
|
5
|
+
Object.defineProperty(this, "count", {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
configurable: true,
|
|
8
|
+
writable: true,
|
|
9
|
+
value: void 0
|
|
10
|
+
});
|
|
11
|
+
Object.defineProperty(this, "texts", {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
configurable: true,
|
|
14
|
+
writable: true,
|
|
15
|
+
value: void 0
|
|
16
|
+
});
|
|
17
|
+
this.count = 0;
|
|
18
|
+
this.texts = [];
|
|
19
|
+
}
|
|
20
|
+
countText() {
|
|
21
|
+
if (this.texts.length > 0) {
|
|
22
|
+
this.count += this.texts.join("").split(" ").length;
|
|
23
|
+
this.texts = [];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
beforeBlock() {
|
|
27
|
+
this.countText();
|
|
28
|
+
}
|
|
29
|
+
afterBlock() {
|
|
30
|
+
this.countText();
|
|
31
|
+
}
|
|
32
|
+
text(node) {
|
|
33
|
+
this.texts.push(node.text);
|
|
34
|
+
}
|
|
35
|
+
document(node) {
|
|
36
|
+
super.document(node);
|
|
37
|
+
this.countText();
|
|
38
|
+
}
|
|
39
|
+
getCount() {
|
|
40
|
+
this.countText();
|
|
41
|
+
return this.count;
|
|
42
|
+
}
|
|
43
|
+
}
|
package/esm/index.d.ts
CHANGED
|
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
|
|
|
6
6
|
export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
|
|
7
7
|
export { NodeVisitor } from "./NodeVisitor.js";
|
|
8
8
|
export { TextVisitor } from "./TextVisitor.js";
|
|
9
|
+
export { WordCounterTransformer } from "./WordCountTransformer.js";
|
|
10
|
+
export { WordCounterVisitor } from "./WordCounterVisitor.js";
|
|
11
|
+
export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
|
package/esm/index.js
CHANGED
|
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
|
|
|
6
6
|
export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
|
|
7
7
|
export { NodeVisitor } from "./NodeVisitor.js";
|
|
8
8
|
export { TextVisitor } from "./TextVisitor.js";
|
|
9
|
+
export { WordCounterTransformer } from "./WordCountTransformer.js";
|
|
10
|
+
export { WordCounterVisitor } from "./WordCounterVisitor.js";
|
|
11
|
+
export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
|
package/package.json
CHANGED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { ArrayCollapseTransformer } from "./index.js";
|
|
2
|
+
import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
|
|
3
|
+
export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
|
|
4
|
+
protected sticker(node: StickerNode): Promise<Node | null>;
|
|
5
|
+
protected bubble(node: BubbleNode): Promise<Node | null>;
|
|
6
|
+
protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
|
|
7
|
+
protected columns(node: ColumnsNode): Promise<Node | null>;
|
|
8
|
+
protected quote(node: QuoteNode): Promise<Node | null>;
|
|
9
|
+
protected image(node: ImageNode): Promise<Node | null>;
|
|
10
|
+
protected figureImage(node: FigureImageNode): Promise<Node | null>;
|
|
11
|
+
protected video(node: VideoNode): Promise<Node | null>;
|
|
12
|
+
protected definitionList(node: DefinitionListNode): Promise<Node | null>;
|
|
13
|
+
protected redacted(_node: RedactedNode): Promise<Node | null>;
|
|
14
|
+
protected note(node: NoteNode): Promise<Node | null>;
|
|
15
|
+
protected card(node: CardNode): Promise<Node | null>;
|
|
16
|
+
}
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DocumentThinningTransformer = void 0;
|
|
4
|
+
const index_js_1 = require("./index.js");
|
|
5
|
+
class DocumentThinningTransformer extends index_js_1.ArrayCollapseTransformer {
|
|
6
|
+
async sticker(node) {
|
|
7
|
+
if (node.content.length == 0) {
|
|
8
|
+
return null;
|
|
9
|
+
}
|
|
10
|
+
const content = await this.chooseChildren(node.content);
|
|
11
|
+
if (!content) {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
return {
|
|
15
|
+
type: "paragraph",
|
|
16
|
+
content,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async bubble(node) {
|
|
20
|
+
if (node.content.length == 0) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
const content = await this.chooseChildren(node.content);
|
|
24
|
+
if (!content) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
return {
|
|
28
|
+
type: "paragraph",
|
|
29
|
+
content,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
async highTechAlert(node) {
|
|
33
|
+
if (node.content.length == 0) {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
const content = await this.chooseChildren(node.content);
|
|
37
|
+
if (!content) {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
type: "array",
|
|
42
|
+
content,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
async columns(node) {
|
|
46
|
+
const flattened = node.columns.flat();
|
|
47
|
+
if (flattened.length == 0) {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
const content = await this.chooseChildren(flattened);
|
|
51
|
+
if (!content) {
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
type: "array",
|
|
56
|
+
content,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
async quote(node) {
|
|
60
|
+
if (node.content.length == 0) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
const content = await this.chooseChildren(node.content);
|
|
64
|
+
if (!content) {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
return {
|
|
68
|
+
type: "array",
|
|
69
|
+
content,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
image(node) {
|
|
73
|
+
return Promise.resolve({
|
|
74
|
+
type: "paragraph",
|
|
75
|
+
content: [{
|
|
76
|
+
type: "text",
|
|
77
|
+
text: "inline image: ",
|
|
78
|
+
}, {
|
|
79
|
+
type: "text",
|
|
80
|
+
text: node.alt,
|
|
81
|
+
}],
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
async figureImage(node) {
|
|
85
|
+
const image = {
|
|
86
|
+
type: "paragraph",
|
|
87
|
+
content: [{
|
|
88
|
+
type: "text",
|
|
89
|
+
text: "inline image: ",
|
|
90
|
+
}, {
|
|
91
|
+
type: "text",
|
|
92
|
+
text: node.alt,
|
|
93
|
+
}],
|
|
94
|
+
};
|
|
95
|
+
if (node.content) {
|
|
96
|
+
const content = await this.chooseChildren(node.content);
|
|
97
|
+
return {
|
|
98
|
+
type: "array",
|
|
99
|
+
content: [
|
|
100
|
+
image,
|
|
101
|
+
...content,
|
|
102
|
+
],
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
return image;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
async video(node) {
|
|
110
|
+
const video = {
|
|
111
|
+
type: "paragraph",
|
|
112
|
+
content: [{
|
|
113
|
+
type: "text",
|
|
114
|
+
text: "inline video: ",
|
|
115
|
+
}, {
|
|
116
|
+
type: "text",
|
|
117
|
+
text: node.alt,
|
|
118
|
+
}],
|
|
119
|
+
};
|
|
120
|
+
if (node.content) {
|
|
121
|
+
const content = await this.chooseChildren(node.content);
|
|
122
|
+
return {
|
|
123
|
+
type: "array",
|
|
124
|
+
content: [
|
|
125
|
+
video,
|
|
126
|
+
...content,
|
|
127
|
+
],
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
return video;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
async definitionList(node) {
|
|
135
|
+
const content = [];
|
|
136
|
+
for (const d of node.content) {
|
|
137
|
+
const defContent = [];
|
|
138
|
+
const title = await this.chooseChildren(d.title);
|
|
139
|
+
if (title) {
|
|
140
|
+
for (const n of title) {
|
|
141
|
+
defContent.push(n);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
defContent.push({ type: "text", text: " " });
|
|
145
|
+
const abbreviation = await this.chooseChildren(d.abbreviation);
|
|
146
|
+
if (abbreviation) {
|
|
147
|
+
for (const n of abbreviation) {
|
|
148
|
+
defContent.push(n);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (d.content.length > 0 && d.content[0].type != "paragraph") {
|
|
152
|
+
defContent.push({ type: "text", text: " " });
|
|
153
|
+
const def = await this.chooseChildren(d.content);
|
|
154
|
+
if (def) {
|
|
155
|
+
for (const n of def) {
|
|
156
|
+
defContent.push(n);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
content.push({
|
|
161
|
+
type: "paragraph",
|
|
162
|
+
content: defContent,
|
|
163
|
+
});
|
|
164
|
+
if (d.content.length > 0 && d.content[0].type == "paragraph") {
|
|
165
|
+
const def = await this.chooseChildren(d.content);
|
|
166
|
+
if (def) {
|
|
167
|
+
for (const n of def) {
|
|
168
|
+
content.push(n);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
type: "array",
|
|
175
|
+
content,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// deno-lint-ignore require-await
|
|
179
|
+
async redacted(_node) {
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
async note(node) {
|
|
183
|
+
if (node.content.length == 0) {
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
const content = await this.chooseChildren(node.content);
|
|
187
|
+
if (!content) {
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
return {
|
|
191
|
+
type: "paragraph",
|
|
192
|
+
content: [
|
|
193
|
+
{ type: "text", text: "Note: " },
|
|
194
|
+
...content,
|
|
195
|
+
],
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
async card(node) {
|
|
199
|
+
const content = [];
|
|
200
|
+
if (node.header) {
|
|
201
|
+
const title = await this.chooseChildren(node.header.title);
|
|
202
|
+
if (title.length > 0) {
|
|
203
|
+
content.push({
|
|
204
|
+
type: "paragraph",
|
|
205
|
+
content: title,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
if (node.content) {
|
|
210
|
+
const card = await this.chooseChildren(node.content.content);
|
|
211
|
+
for (const c of card) {
|
|
212
|
+
content.push(c);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
if (node.media) {
|
|
216
|
+
for (const media of node.media.content) {
|
|
217
|
+
const m = await this.choose(media);
|
|
218
|
+
if (m) {
|
|
219
|
+
content.push(m);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (node.attribution) {
|
|
224
|
+
const attribution = [];
|
|
225
|
+
if (node.attribution.title) {
|
|
226
|
+
const title = await this.chooseChildren(node.attribution.title);
|
|
227
|
+
for (const n of title) {
|
|
228
|
+
content.push(n);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (node.attribution.date) {
|
|
232
|
+
if (content.length > 0) {
|
|
233
|
+
content.push({ type: "text", text: " " });
|
|
234
|
+
}
|
|
235
|
+
content.push({ type: "text", text: `${node.attribution.date}` });
|
|
236
|
+
}
|
|
237
|
+
if (attribution.length > 0) {
|
|
238
|
+
content.push({
|
|
239
|
+
type: "paragraph",
|
|
240
|
+
content: attribution,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return {
|
|
245
|
+
type: "array",
|
|
246
|
+
content: content,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
exports.DocumentThinningTransformer = DocumentThinningTransformer;
|
|
@@ -210,11 +210,15 @@ class IdentityTransformer {
|
|
|
210
210
|
await this.beforeBlock();
|
|
211
211
|
const content = await this.chooseChildren(node.content);
|
|
212
212
|
await this.afterBlock();
|
|
213
|
-
|
|
213
|
+
const result = {
|
|
214
214
|
type: "header",
|
|
215
215
|
content,
|
|
216
216
|
level: node.level || 2,
|
|
217
217
|
};
|
|
218
|
+
if (node.htmlId) {
|
|
219
|
+
result.htmlId = node.htmlId;
|
|
220
|
+
}
|
|
221
|
+
return result;
|
|
218
222
|
}
|
|
219
223
|
async highTechAlert(node) {
|
|
220
224
|
await this.beforeBlock();
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.WordCounterTransformer = void 0;
|
|
4
|
+
const DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
|
|
5
|
+
const WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
|
|
6
|
+
const index_js_1 = require("./index.js");
|
|
7
|
+
function convertHierarchy(parent) {
|
|
8
|
+
const docHierarchy = {
|
|
9
|
+
headerText: parent.header,
|
|
10
|
+
headerId: parent.headerId,
|
|
11
|
+
words: 0,
|
|
12
|
+
totalWords: 0,
|
|
13
|
+
children: [],
|
|
14
|
+
};
|
|
15
|
+
const visitor = new WordCounterVisitor_js_1.WordCounterVisitor();
|
|
16
|
+
for (const node of parent.nodes) {
|
|
17
|
+
visitor.visit(node);
|
|
18
|
+
}
|
|
19
|
+
docHierarchy.words = visitor.getCount();
|
|
20
|
+
docHierarchy.totalWords = docHierarchy.words;
|
|
21
|
+
for (const child of parent.children) {
|
|
22
|
+
const childHierarchy = convertHierarchy(child);
|
|
23
|
+
docHierarchy.children.push(childHierarchy);
|
|
24
|
+
docHierarchy.totalWords += childHierarchy.totalWords;
|
|
25
|
+
}
|
|
26
|
+
return docHierarchy;
|
|
27
|
+
}
|
|
28
|
+
class WordCounterTransformer extends index_js_1.IdentityTransformer {
|
|
29
|
+
constructor() {
|
|
30
|
+
super();
|
|
31
|
+
}
|
|
32
|
+
async transform(node) {
|
|
33
|
+
// Isolate it
|
|
34
|
+
const jsonNode = JSON.parse(JSON.stringify(node));
|
|
35
|
+
const thinned = await new DocumentThinningTransformer_js_1.DocumentThinningTransformer().transform(jsonNode);
|
|
36
|
+
const stack = [];
|
|
37
|
+
const root = {
|
|
38
|
+
header: node.title,
|
|
39
|
+
headerId: "title",
|
|
40
|
+
nodes: [],
|
|
41
|
+
children: [],
|
|
42
|
+
depth: 1,
|
|
43
|
+
};
|
|
44
|
+
stack.push(root);
|
|
45
|
+
let depth = 1;
|
|
46
|
+
for (const node of thinned.content) {
|
|
47
|
+
if (node.type == "header") {
|
|
48
|
+
if (node.level == 1) {
|
|
49
|
+
// never pop the root
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
else if (node.level <= depth) {
|
|
53
|
+
for (let i = stack.length - 1; i > 0; i--) {
|
|
54
|
+
if (stack[i].depth >= node.level) {
|
|
55
|
+
stack.pop();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
const visitor = new index_js_1.TextVisitor();
|
|
60
|
+
visitor.visit(node);
|
|
61
|
+
const h = {
|
|
62
|
+
header: visitor.getText(),
|
|
63
|
+
depth: node.level,
|
|
64
|
+
children: [],
|
|
65
|
+
nodes: [],
|
|
66
|
+
};
|
|
67
|
+
if (node.htmlId) {
|
|
68
|
+
h.headerId = node.htmlId;
|
|
69
|
+
}
|
|
70
|
+
stack[stack.length - 1].children.push(h);
|
|
71
|
+
stack.push(h);
|
|
72
|
+
depth = node.level;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
stack[stack.length - 1].nodes.push(node);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// The transformer does not actually walk through the document tree
|
|
79
|
+
// We just append a newly calculated hierarchy object
|
|
80
|
+
const doc = {
|
|
81
|
+
...node,
|
|
82
|
+
};
|
|
83
|
+
doc.hierarchy = convertHierarchy(root);
|
|
84
|
+
return doc;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
exports.WordCounterTransformer = WordCounterTransformer;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { NodeVisitor } from "./index.js";
|
|
2
|
+
import { DocumentNode, TextNode } from "./types.js";
|
|
3
|
+
export declare class WordCounterVisitor extends NodeVisitor {
|
|
4
|
+
private count;
|
|
5
|
+
private texts;
|
|
6
|
+
constructor();
|
|
7
|
+
private countText;
|
|
8
|
+
protected beforeBlock(): void;
|
|
9
|
+
protected afterBlock(): void;
|
|
10
|
+
protected text(node: TextNode): void;
|
|
11
|
+
protected document(node: DocumentNode): void;
|
|
12
|
+
getCount(): number;
|
|
13
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.WordCounterVisitor = void 0;
|
|
4
|
+
const index_js_1 = require("./index.js");
|
|
5
|
+
class WordCounterVisitor extends index_js_1.NodeVisitor {
|
|
6
|
+
constructor() {
|
|
7
|
+
super();
|
|
8
|
+
Object.defineProperty(this, "count", {
|
|
9
|
+
enumerable: true,
|
|
10
|
+
configurable: true,
|
|
11
|
+
writable: true,
|
|
12
|
+
value: void 0
|
|
13
|
+
});
|
|
14
|
+
Object.defineProperty(this, "texts", {
|
|
15
|
+
enumerable: true,
|
|
16
|
+
configurable: true,
|
|
17
|
+
writable: true,
|
|
18
|
+
value: void 0
|
|
19
|
+
});
|
|
20
|
+
this.count = 0;
|
|
21
|
+
this.texts = [];
|
|
22
|
+
}
|
|
23
|
+
countText() {
|
|
24
|
+
if (this.texts.length > 0) {
|
|
25
|
+
this.count += this.texts.join("").split(" ").length;
|
|
26
|
+
this.texts = [];
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
beforeBlock() {
|
|
30
|
+
this.countText();
|
|
31
|
+
}
|
|
32
|
+
afterBlock() {
|
|
33
|
+
this.countText();
|
|
34
|
+
}
|
|
35
|
+
text(node) {
|
|
36
|
+
this.texts.push(node.text);
|
|
37
|
+
}
|
|
38
|
+
document(node) {
|
|
39
|
+
super.document(node);
|
|
40
|
+
this.countText();
|
|
41
|
+
}
|
|
42
|
+
getCount() {
|
|
43
|
+
this.countText();
|
|
44
|
+
return this.count;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
exports.WordCounterVisitor = WordCounterVisitor;
|
package/script/index.d.ts
CHANGED
|
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
|
|
|
6
6
|
export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
|
|
7
7
|
export { NodeVisitor } from "./NodeVisitor.js";
|
|
8
8
|
export { TextVisitor } from "./TextVisitor.js";
|
|
9
|
+
export { WordCounterTransformer } from "./WordCountTransformer.js";
|
|
10
|
+
export { WordCounterVisitor } from "./WordCounterVisitor.js";
|
|
11
|
+
export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
|
package/script/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
|
|
17
|
+
exports.DocumentThinningTransformer = exports.WordCounterVisitor = exports.WordCounterTransformer = exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
|
|
18
18
|
__exportStar(require("./types.js"), exports);
|
|
19
19
|
var ArrayCollapseTransformer_js_1 = require("./ArrayCollapseTransformer.js");
|
|
20
20
|
Object.defineProperty(exports, "ArrayCollapseTransformer", { enumerable: true, get: function () { return ArrayCollapseTransformer_js_1.ArrayCollapseTransformer; } });
|
|
@@ -30,3 +30,9 @@ var NodeVisitor_js_1 = require("./NodeVisitor.js");
|
|
|
30
30
|
Object.defineProperty(exports, "NodeVisitor", { enumerable: true, get: function () { return NodeVisitor_js_1.NodeVisitor; } });
|
|
31
31
|
var TextVisitor_js_1 = require("./TextVisitor.js");
|
|
32
32
|
Object.defineProperty(exports, "TextVisitor", { enumerable: true, get: function () { return TextVisitor_js_1.TextVisitor; } });
|
|
33
|
+
var WordCountTransformer_js_1 = require("./WordCountTransformer.js");
|
|
34
|
+
Object.defineProperty(exports, "WordCounterTransformer", { enumerable: true, get: function () { return WordCountTransformer_js_1.WordCounterTransformer; } });
|
|
35
|
+
var WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
|
|
36
|
+
Object.defineProperty(exports, "WordCounterVisitor", { enumerable: true, get: function () { return WordCounterVisitor_js_1.WordCounterVisitor; } });
|
|
37
|
+
var DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
|
|
38
|
+
Object.defineProperty(exports, "DocumentThinningTransformer", { enumerable: true, get: function () { return DocumentThinningTransformer_js_1.DocumentThinningTransformer; } });
|