document-ir 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
3
+ export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
4
+ protected sticker(node: StickerNode): Promise<Node | null>;
5
+ protected bubble(node: BubbleNode): Promise<Node | null>;
6
+ protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
7
+ protected columns(node: ColumnsNode): Promise<Node | null>;
8
+ protected quote(node: QuoteNode): Promise<Node | null>;
9
+ protected image(node: ImageNode): Promise<Node | null>;
10
+ protected figureImage(node: FigureImageNode): Promise<Node | null>;
11
+ protected video(node: VideoNode): Promise<Node | null>;
12
+ protected definitionList(node: DefinitionListNode): Promise<Node | null>;
13
+ protected redacted(_node: RedactedNode): Promise<Node | null>;
14
+ protected note(node: NoteNode): Promise<Node | null>;
15
+ protected card(node: CardNode): Promise<Node | null>;
16
+ }
@@ -0,0 +1,246 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ export class DocumentThinningTransformer extends ArrayCollapseTransformer {
3
+ async sticker(node) {
4
+ if (node.content.length == 0) {
5
+ return null;
6
+ }
7
+ const content = await this.chooseChildren(node.content);
8
+ if (!content) {
9
+ return null;
10
+ }
11
+ return {
12
+ type: "paragraph",
13
+ content,
14
+ };
15
+ }
16
+ async bubble(node) {
17
+ if (node.content.length == 0) {
18
+ return null;
19
+ }
20
+ const content = await this.chooseChildren(node.content);
21
+ if (!content) {
22
+ return null;
23
+ }
24
+ return {
25
+ type: "paragraph",
26
+ content,
27
+ };
28
+ }
29
+ async highTechAlert(node) {
30
+ if (node.content.length == 0) {
31
+ return null;
32
+ }
33
+ const content = await this.chooseChildren(node.content);
34
+ if (!content) {
35
+ return null;
36
+ }
37
+ return {
38
+ type: "array",
39
+ content,
40
+ };
41
+ }
42
+ async columns(node) {
43
+ const flattened = node.columns.flat();
44
+ if (flattened.length == 0) {
45
+ return null;
46
+ }
47
+ const content = await this.chooseChildren(flattened);
48
+ if (!content) {
49
+ return null;
50
+ }
51
+ return {
52
+ type: "array",
53
+ content,
54
+ };
55
+ }
56
+ async quote(node) {
57
+ if (node.content.length == 0) {
58
+ return null;
59
+ }
60
+ const content = await this.chooseChildren(node.content);
61
+ if (!content) {
62
+ return null;
63
+ }
64
+ return {
65
+ type: "array",
66
+ content,
67
+ };
68
+ }
69
+ image(node) {
70
+ return Promise.resolve({
71
+ type: "paragraph",
72
+ content: [{
73
+ type: "text",
74
+ text: "inline image: ",
75
+ }, {
76
+ type: "text",
77
+ text: node.alt,
78
+ }],
79
+ });
80
+ }
81
+ async figureImage(node) {
82
+ const image = {
83
+ type: "paragraph",
84
+ content: [{
85
+ type: "text",
86
+ text: "inline image: ",
87
+ }, {
88
+ type: "text",
89
+ text: node.alt,
90
+ }],
91
+ };
92
+ if (node.content) {
93
+ const content = await this.chooseChildren(node.content);
94
+ return {
95
+ type: "array",
96
+ content: [
97
+ image,
98
+ ...content,
99
+ ],
100
+ };
101
+ }
102
+ else {
103
+ return image;
104
+ }
105
+ }
106
+ async video(node) {
107
+ const video = {
108
+ type: "paragraph",
109
+ content: [{
110
+ type: "text",
111
+ text: "inline video: ",
112
+ }, {
113
+ type: "text",
114
+ text: node.alt,
115
+ }],
116
+ };
117
+ if (node.content) {
118
+ const content = await this.chooseChildren(node.content);
119
+ return {
120
+ type: "array",
121
+ content: [
122
+ video,
123
+ ...content,
124
+ ],
125
+ };
126
+ }
127
+ else {
128
+ return video;
129
+ }
130
+ }
131
+ async definitionList(node) {
132
+ const content = [];
133
+ for (const d of node.content) {
134
+ const defContent = [];
135
+ const title = await this.chooseChildren(d.title);
136
+ if (title) {
137
+ for (const n of title) {
138
+ defContent.push(n);
139
+ }
140
+ }
141
+ defContent.push({ type: "text", text: " " });
142
+ const abbreviation = await this.chooseChildren(d.abbreviation);
143
+ if (abbreviation) {
144
+ for (const n of abbreviation) {
145
+ defContent.push(n);
146
+ }
147
+ }
148
+ if (d.content.length > 0 && d.content[0].type != "paragraph") {
149
+ defContent.push({ type: "text", text: " " });
150
+ const def = await this.chooseChildren(d.content);
151
+ if (def) {
152
+ for (const n of def) {
153
+ defContent.push(n);
154
+ }
155
+ }
156
+ }
157
+ content.push({
158
+ type: "paragraph",
159
+ content: defContent,
160
+ });
161
+ if (d.content.length > 0 && d.content[0].type == "paragraph") {
162
+ const def = await this.chooseChildren(d.content);
163
+ if (def) {
164
+ for (const n of def) {
165
+ content.push(n);
166
+ }
167
+ }
168
+ }
169
+ }
170
+ return {
171
+ type: "array",
172
+ content,
173
+ };
174
+ }
175
+ // deno-lint-ignore require-await
176
+ async redacted(_node) {
177
+ return null;
178
+ }
179
+ async note(node) {
180
+ if (node.content.length == 0) {
181
+ return null;
182
+ }
183
+ const content = await this.chooseChildren(node.content);
184
+ if (!content) {
185
+ return null;
186
+ }
187
+ return {
188
+ type: "paragraph",
189
+ content: [
190
+ { type: "text", text: "Note: " },
191
+ ...content,
192
+ ],
193
+ };
194
+ }
195
+ async card(node) {
196
+ const content = [];
197
+ if (node.header) {
198
+ const title = await this.chooseChildren(node.header.title);
199
+ if (title.length > 0) {
200
+ content.push({
201
+ type: "paragraph",
202
+ content: title,
203
+ });
204
+ }
205
+ }
206
+ if (node.content) {
207
+ const card = await this.chooseChildren(node.content.content);
208
+ for (const c of card) {
209
+ content.push(c);
210
+ }
211
+ }
212
+ if (node.media) {
213
+ for (const media of node.media.content) {
214
+ const m = await this.choose(media);
215
+ if (m) {
216
+ content.push(m);
217
+ }
218
+ }
219
+ }
220
+ if (node.attribution) {
221
+ const attribution = [];
222
+ if (node.attribution.title) {
223
+ const title = await this.chooseChildren(node.attribution.title);
224
+ for (const n of title) {
225
+ content.push(n);
226
+ }
227
+ }
228
+ if (node.attribution.date) {
229
+ if (content.length > 0) {
230
+ content.push({ type: "text", text: " " });
231
+ }
232
+ content.push({ type: "text", text: `${node.attribution.date}` });
233
+ }
234
+ if (attribution.length > 0) {
235
+ content.push({
236
+ type: "paragraph",
237
+ content: attribution,
238
+ });
239
+ }
240
+ }
241
+ return {
242
+ type: "array",
243
+ content: content,
244
+ };
245
+ }
246
+ }
@@ -623,26 +623,32 @@ export class IdentityTransformer {
623
623
  }
624
624
  async date(node) {
625
625
  await this.beforeInline();
626
+ const content = node.content && await this.chooseChildren(node.content);
626
627
  await this.afterInline();
627
628
  return {
628
629
  type: "date",
629
- isoDate: `${node.isoDate}`,
630
+ isoDate: node.isoDate,
631
+ content,
630
632
  };
631
633
  }
632
634
  async time(node) {
633
635
  await this.beforeInline();
636
+ const content = node.content && await this.chooseChildren(node.content);
634
637
  await this.afterInline();
635
638
  return {
636
639
  type: "time",
637
- isoTime: `${node.isoTime}`,
640
+ isoTime: node.isoTime,
641
+ content,
638
642
  };
639
643
  }
640
644
  async datetime(node) {
641
645
  await this.beforeInline();
646
+ const content = node.content && await this.chooseChildren(node.content);
642
647
  await this.afterInline();
643
648
  return {
644
649
  type: "datetime",
645
- iso8601: `${node.iso8601}`,
650
+ iso8601: node.iso8601,
651
+ content,
646
652
  };
647
653
  }
648
654
  async subText(node) {
@@ -247,22 +247,19 @@ export class NodeVisitor {
247
247
  }
248
248
  }
249
249
  date(node) {
250
- this.text({
251
- type: "text",
252
- text: node.isoDate,
253
- });
250
+ this.beforeInline();
251
+ this.chooseChildren(node.content);
252
+ this.afterInline();
254
253
  }
255
254
  time(node) {
256
- this.text({
257
- type: "text",
258
- text: node.isoTime,
259
- });
255
+ this.beforeInline();
256
+ this.chooseChildren(node.content);
257
+ this.afterInline();
260
258
  }
261
259
  datetime(node) {
262
- this.text({
263
- type: "text",
264
- text: node.iso8601,
265
- });
260
+ this.beforeInline();
261
+ this.chooseChildren(node.content);
262
+ this.afterInline();
266
263
  }
267
264
  subText(node) {
268
265
  this.beforeInline();
@@ -0,0 +1,6 @@
1
+ import { IdentityTransformer } from "./index.js";
2
+ import { DocumentNode } from "./types.js";
3
+ export declare class WordCounterTransformer extends IdentityTransformer {
4
+ constructor();
5
+ transform(node: DocumentNode): Promise<DocumentNode>;
6
+ }
@@ -0,0 +1,83 @@
1
+ import { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
2
+ import { WordCounterVisitor } from "./WordCounterVisitor.js";
3
+ import { IdentityTransformer, TextVisitor } from "./index.js";
4
+ function convertHierarchy(parent) {
5
+ const docHierarchy = {
6
+ headerText: parent.header,
7
+ headerId: parent.headerId,
8
+ words: 0,
9
+ totalWords: 0,
10
+ children: [],
11
+ };
12
+ const visitor = new WordCounterVisitor();
13
+ for (const node of parent.nodes) {
14
+ visitor.visit(node);
15
+ }
16
+ docHierarchy.words = visitor.getCount();
17
+ docHierarchy.totalWords = docHierarchy.words;
18
+ for (const child of parent.children) {
19
+ const childHierarchy = convertHierarchy(child);
20
+ docHierarchy.children.push(childHierarchy);
21
+ docHierarchy.totalWords += childHierarchy.totalWords;
22
+ }
23
+ return docHierarchy;
24
+ }
25
+ export class WordCounterTransformer extends IdentityTransformer {
26
+ constructor() {
27
+ super();
28
+ }
29
+ async transform(node) {
30
+ // Isolate it
31
+ const jsonNode = JSON.parse(JSON.stringify(node));
32
+ const thinned = await new DocumentThinningTransformer().transform(jsonNode);
33
+ const stack = [];
34
+ const root = {
35
+ header: node.title,
36
+ headerId: "title",
37
+ nodes: [],
38
+ children: [],
39
+ depth: 1,
40
+ };
41
+ stack.push(root);
42
+ let depth = 1;
43
+ for (const node of thinned.content) {
44
+ if (node.type == "header") {
45
+ if (node.level == 1) {
46
+ // never pop the root
47
+ continue;
48
+ }
49
+ else if (node.level <= depth) {
50
+ for (let i = stack.length - 1; i > 0; i--) {
51
+ if (stack[i].depth >= node.level) {
52
+ stack.pop();
53
+ }
54
+ }
55
+ }
56
+ const visitor = new TextVisitor();
57
+ visitor.visit(node);
58
+ const h = {
59
+ header: visitor.getText(),
60
+ depth: node.level,
61
+ children: [],
62
+ nodes: [],
63
+ };
64
+ if (node.htmlId) {
65
+ h.headerId = node.htmlId;
66
+ }
67
+ stack[stack.length - 1].children.push(h);
68
+ stack.push(h);
69
+ depth = node.level;
70
+ }
71
+ else {
72
+ stack[stack.length - 1].nodes.push(node);
73
+ }
74
+ }
75
+ // The transformer does not actually walk through the document tree
76
+ // We just append a newly calculated hierarchy object
77
+ const doc = {
78
+ ...node,
79
+ };
80
+ doc.hierarchy = convertHierarchy(root);
81
+ return doc;
82
+ }
83
+ }
@@ -0,0 +1,13 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ import { DocumentNode, TextNode } from "./types.js";
3
+ export declare class WordCounterVisitor extends NodeVisitor {
4
+ private count;
5
+ private texts;
6
+ constructor();
7
+ private countText;
8
+ protected beforeBlock(): void;
9
+ protected afterBlock(): void;
10
+ protected text(node: TextNode): void;
11
+ protected document(node: DocumentNode): void;
12
+ getCount(): number;
13
+ }
@@ -0,0 +1,43 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ export class WordCounterVisitor extends NodeVisitor {
3
+ constructor() {
4
+ super();
5
+ Object.defineProperty(this, "count", {
6
+ enumerable: true,
7
+ configurable: true,
8
+ writable: true,
9
+ value: void 0
10
+ });
11
+ Object.defineProperty(this, "texts", {
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true,
15
+ value: void 0
16
+ });
17
+ this.count = 0;
18
+ this.texts = [];
19
+ }
20
+ countText() {
21
+ if (this.texts.length > 0) {
22
+ this.count += this.texts.join("").split(" ").length;
23
+ this.texts = [];
24
+ }
25
+ }
26
+ beforeBlock() {
27
+ this.countText();
28
+ }
29
+ afterBlock() {
30
+ this.countText();
31
+ }
32
+ text(node) {
33
+ this.texts.push(node.text);
34
+ }
35
+ document(node) {
36
+ super.document(node);
37
+ this.countText();
38
+ }
39
+ getCount() {
40
+ this.countText();
41
+ return this.count;
42
+ }
43
+ }
package/esm/index.d.ts CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/esm/index.js CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/esm/types.d.ts CHANGED
@@ -301,14 +301,17 @@ export interface SubTextNode {
301
301
  export interface DateNode {
302
302
  type: "date";
303
303
  isoDate: string;
304
+ content: Node[];
304
305
  }
305
306
  export interface TimeNode {
306
307
  type: "time";
307
308
  isoTime: string;
309
+ content: Node[];
308
310
  }
309
311
  export interface DateTimeNode {
310
312
  type: "datetime";
311
313
  iso8601: string;
314
+ content: Node[];
312
315
  }
313
316
  export type Node = ArrayNode | BlockNode | BlockQuoteNode | BoldNode | BreakNode | BubbleNode | CardNode | CenterNode | CodeNode | ColumnsNode | DefinitionNode | DefinitionListNode | DefinitionReferenceNode | EmbedNode | EmojiNode | FigureNode | FigureCaptionNode | FigureImageNode | FormattedTextNode | HeaderNode | HighTechAlertNode | HorizontalRuleNode | ImageNode | ItalicNode | LinkNode | ListNode | NoteNode | ParagraphNode | QuoteNode | RedactedNode | RegionNode | ScriptNode | SecretNode | SmallerNode | StickerNode | StrikeThroughNode | TextNode | TableNode | SocialNode | UnderlineNode | VideoNode | DateNode | TimeNode | DateTimeNode | SuperTextNode | SubTextNode | WarningNode;
314
317
  export interface DocumentMeta {
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "module": "./esm/index.js",
3
3
  "main": "./script/index.js",
4
4
  "name": "document-ir",
5
- "version": "0.0.11",
5
+ "version": "0.0.13",
6
6
  "description": "Intermediate representation and transformers for documents",
7
7
  "license": "MIT",
8
8
  "repository": {
@@ -0,0 +1,16 @@
1
+ import { ArrayCollapseTransformer } from "./index.js";
2
+ import { BubbleNode, CardNode, ColumnsNode, DefinitionListNode, FigureImageNode, HighTechAlertNode, ImageNode, Node, NoteNode, QuoteNode, RedactedNode, StickerNode, VideoNode } from "./types.js";
3
+ export declare class DocumentThinningTransformer extends ArrayCollapseTransformer {
4
+ protected sticker(node: StickerNode): Promise<Node | null>;
5
+ protected bubble(node: BubbleNode): Promise<Node | null>;
6
+ protected highTechAlert(node: HighTechAlertNode): Promise<Node | null>;
7
+ protected columns(node: ColumnsNode): Promise<Node | null>;
8
+ protected quote(node: QuoteNode): Promise<Node | null>;
9
+ protected image(node: ImageNode): Promise<Node | null>;
10
+ protected figureImage(node: FigureImageNode): Promise<Node | null>;
11
+ protected video(node: VideoNode): Promise<Node | null>;
12
+ protected definitionList(node: DefinitionListNode): Promise<Node | null>;
13
+ protected redacted(_node: RedactedNode): Promise<Node | null>;
14
+ protected note(node: NoteNode): Promise<Node | null>;
15
+ protected card(node: CardNode): Promise<Node | null>;
16
+ }
@@ -0,0 +1,250 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DocumentThinningTransformer = void 0;
4
+ const index_js_1 = require("./index.js");
5
+ class DocumentThinningTransformer extends index_js_1.ArrayCollapseTransformer {
6
+ async sticker(node) {
7
+ if (node.content.length == 0) {
8
+ return null;
9
+ }
10
+ const content = await this.chooseChildren(node.content);
11
+ if (!content) {
12
+ return null;
13
+ }
14
+ return {
15
+ type: "paragraph",
16
+ content,
17
+ };
18
+ }
19
+ async bubble(node) {
20
+ if (node.content.length == 0) {
21
+ return null;
22
+ }
23
+ const content = await this.chooseChildren(node.content);
24
+ if (!content) {
25
+ return null;
26
+ }
27
+ return {
28
+ type: "paragraph",
29
+ content,
30
+ };
31
+ }
32
+ async highTechAlert(node) {
33
+ if (node.content.length == 0) {
34
+ return null;
35
+ }
36
+ const content = await this.chooseChildren(node.content);
37
+ if (!content) {
38
+ return null;
39
+ }
40
+ return {
41
+ type: "array",
42
+ content,
43
+ };
44
+ }
45
+ async columns(node) {
46
+ const flattened = node.columns.flat();
47
+ if (flattened.length == 0) {
48
+ return null;
49
+ }
50
+ const content = await this.chooseChildren(flattened);
51
+ if (!content) {
52
+ return null;
53
+ }
54
+ return {
55
+ type: "array",
56
+ content,
57
+ };
58
+ }
59
+ async quote(node) {
60
+ if (node.content.length == 0) {
61
+ return null;
62
+ }
63
+ const content = await this.chooseChildren(node.content);
64
+ if (!content) {
65
+ return null;
66
+ }
67
+ return {
68
+ type: "array",
69
+ content,
70
+ };
71
+ }
72
+ image(node) {
73
+ return Promise.resolve({
74
+ type: "paragraph",
75
+ content: [{
76
+ type: "text",
77
+ text: "inline image: ",
78
+ }, {
79
+ type: "text",
80
+ text: node.alt,
81
+ }],
82
+ });
83
+ }
84
+ async figureImage(node) {
85
+ const image = {
86
+ type: "paragraph",
87
+ content: [{
88
+ type: "text",
89
+ text: "inline image: ",
90
+ }, {
91
+ type: "text",
92
+ text: node.alt,
93
+ }],
94
+ };
95
+ if (node.content) {
96
+ const content = await this.chooseChildren(node.content);
97
+ return {
98
+ type: "array",
99
+ content: [
100
+ image,
101
+ ...content,
102
+ ],
103
+ };
104
+ }
105
+ else {
106
+ return image;
107
+ }
108
+ }
109
+ async video(node) {
110
+ const video = {
111
+ type: "paragraph",
112
+ content: [{
113
+ type: "text",
114
+ text: "inline video: ",
115
+ }, {
116
+ type: "text",
117
+ text: node.alt,
118
+ }],
119
+ };
120
+ if (node.content) {
121
+ const content = await this.chooseChildren(node.content);
122
+ return {
123
+ type: "array",
124
+ content: [
125
+ video,
126
+ ...content,
127
+ ],
128
+ };
129
+ }
130
+ else {
131
+ return video;
132
+ }
133
+ }
134
+ async definitionList(node) {
135
+ const content = [];
136
+ for (const d of node.content) {
137
+ const defContent = [];
138
+ const title = await this.chooseChildren(d.title);
139
+ if (title) {
140
+ for (const n of title) {
141
+ defContent.push(n);
142
+ }
143
+ }
144
+ defContent.push({ type: "text", text: " " });
145
+ const abbreviation = await this.chooseChildren(d.abbreviation);
146
+ if (abbreviation) {
147
+ for (const n of abbreviation) {
148
+ defContent.push(n);
149
+ }
150
+ }
151
+ if (d.content.length > 0 && d.content[0].type != "paragraph") {
152
+ defContent.push({ type: "text", text: " " });
153
+ const def = await this.chooseChildren(d.content);
154
+ if (def) {
155
+ for (const n of def) {
156
+ defContent.push(n);
157
+ }
158
+ }
159
+ }
160
+ content.push({
161
+ type: "paragraph",
162
+ content: defContent,
163
+ });
164
+ if (d.content.length > 0 && d.content[0].type == "paragraph") {
165
+ const def = await this.chooseChildren(d.content);
166
+ if (def) {
167
+ for (const n of def) {
168
+ content.push(n);
169
+ }
170
+ }
171
+ }
172
+ }
173
+ return {
174
+ type: "array",
175
+ content,
176
+ };
177
+ }
178
+ // deno-lint-ignore require-await
179
+ async redacted(_node) {
180
+ return null;
181
+ }
182
+ async note(node) {
183
+ if (node.content.length == 0) {
184
+ return null;
185
+ }
186
+ const content = await this.chooseChildren(node.content);
187
+ if (!content) {
188
+ return null;
189
+ }
190
+ return {
191
+ type: "paragraph",
192
+ content: [
193
+ { type: "text", text: "Note: " },
194
+ ...content,
195
+ ],
196
+ };
197
+ }
198
+ async card(node) {
199
+ const content = [];
200
+ if (node.header) {
201
+ const title = await this.chooseChildren(node.header.title);
202
+ if (title.length > 0) {
203
+ content.push({
204
+ type: "paragraph",
205
+ content: title,
206
+ });
207
+ }
208
+ }
209
+ if (node.content) {
210
+ const card = await this.chooseChildren(node.content.content);
211
+ for (const c of card) {
212
+ content.push(c);
213
+ }
214
+ }
215
+ if (node.media) {
216
+ for (const media of node.media.content) {
217
+ const m = await this.choose(media);
218
+ if (m) {
219
+ content.push(m);
220
+ }
221
+ }
222
+ }
223
+ if (node.attribution) {
224
+ const attribution = [];
225
+ if (node.attribution.title) {
226
+ const title = await this.chooseChildren(node.attribution.title);
227
+ for (const n of title) {
228
+ content.push(n);
229
+ }
230
+ }
231
+ if (node.attribution.date) {
232
+ if (content.length > 0) {
233
+ content.push({ type: "text", text: " " });
234
+ }
235
+ content.push({ type: "text", text: `${node.attribution.date}` });
236
+ }
237
+ if (attribution.length > 0) {
238
+ content.push({
239
+ type: "paragraph",
240
+ content: attribution,
241
+ });
242
+ }
243
+ }
244
+ return {
245
+ type: "array",
246
+ content: content,
247
+ };
248
+ }
249
+ }
250
+ exports.DocumentThinningTransformer = DocumentThinningTransformer;
@@ -626,26 +626,32 @@ class IdentityTransformer {
626
626
  }
627
627
  async date(node) {
628
628
  await this.beforeInline();
629
+ const content = node.content && await this.chooseChildren(node.content);
629
630
  await this.afterInline();
630
631
  return {
631
632
  type: "date",
632
- isoDate: `${node.isoDate}`,
633
+ isoDate: node.isoDate,
634
+ content,
633
635
  };
634
636
  }
635
637
  async time(node) {
636
638
  await this.beforeInline();
639
+ const content = node.content && await this.chooseChildren(node.content);
637
640
  await this.afterInline();
638
641
  return {
639
642
  type: "time",
640
- isoTime: `${node.isoTime}`,
643
+ isoTime: node.isoTime,
644
+ content,
641
645
  };
642
646
  }
643
647
  async datetime(node) {
644
648
  await this.beforeInline();
649
+ const content = node.content && await this.chooseChildren(node.content);
645
650
  await this.afterInline();
646
651
  return {
647
652
  type: "datetime",
648
- iso8601: `${node.iso8601}`,
653
+ iso8601: node.iso8601,
654
+ content,
649
655
  };
650
656
  }
651
657
  async subText(node) {
@@ -250,22 +250,19 @@ class NodeVisitor {
250
250
  }
251
251
  }
252
252
  date(node) {
253
- this.text({
254
- type: "text",
255
- text: node.isoDate,
256
- });
253
+ this.beforeInline();
254
+ this.chooseChildren(node.content);
255
+ this.afterInline();
257
256
  }
258
257
  time(node) {
259
- this.text({
260
- type: "text",
261
- text: node.isoTime,
262
- });
258
+ this.beforeInline();
259
+ this.chooseChildren(node.content);
260
+ this.afterInline();
263
261
  }
264
262
  datetime(node) {
265
- this.text({
266
- type: "text",
267
- text: node.iso8601,
268
- });
263
+ this.beforeInline();
264
+ this.chooseChildren(node.content);
265
+ this.afterInline();
269
266
  }
270
267
  subText(node) {
271
268
  this.beforeInline();
@@ -0,0 +1,6 @@
1
+ import { IdentityTransformer } from "./index.js";
2
+ import { DocumentNode } from "./types.js";
3
+ export declare class WordCounterTransformer extends IdentityTransformer {
4
+ constructor();
5
+ transform(node: DocumentNode): Promise<DocumentNode>;
6
+ }
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WordCounterTransformer = void 0;
4
+ const DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
5
+ const WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
6
+ const index_js_1 = require("./index.js");
7
+ function convertHierarchy(parent) {
8
+ const docHierarchy = {
9
+ headerText: parent.header,
10
+ headerId: parent.headerId,
11
+ words: 0,
12
+ totalWords: 0,
13
+ children: [],
14
+ };
15
+ const visitor = new WordCounterVisitor_js_1.WordCounterVisitor();
16
+ for (const node of parent.nodes) {
17
+ visitor.visit(node);
18
+ }
19
+ docHierarchy.words = visitor.getCount();
20
+ docHierarchy.totalWords = docHierarchy.words;
21
+ for (const child of parent.children) {
22
+ const childHierarchy = convertHierarchy(child);
23
+ docHierarchy.children.push(childHierarchy);
24
+ docHierarchy.totalWords += childHierarchy.totalWords;
25
+ }
26
+ return docHierarchy;
27
+ }
28
+ class WordCounterTransformer extends index_js_1.IdentityTransformer {
29
+ constructor() {
30
+ super();
31
+ }
32
+ async transform(node) {
33
+ // Isolate it
34
+ const jsonNode = JSON.parse(JSON.stringify(node));
35
+ const thinned = await new DocumentThinningTransformer_js_1.DocumentThinningTransformer().transform(jsonNode);
36
+ const stack = [];
37
+ const root = {
38
+ header: node.title,
39
+ headerId: "title",
40
+ nodes: [],
41
+ children: [],
42
+ depth: 1,
43
+ };
44
+ stack.push(root);
45
+ let depth = 1;
46
+ for (const node of thinned.content) {
47
+ if (node.type == "header") {
48
+ if (node.level == 1) {
49
+ // never pop the root
50
+ continue;
51
+ }
52
+ else if (node.level <= depth) {
53
+ for (let i = stack.length - 1; i > 0; i--) {
54
+ if (stack[i].depth >= node.level) {
55
+ stack.pop();
56
+ }
57
+ }
58
+ }
59
+ const visitor = new index_js_1.TextVisitor();
60
+ visitor.visit(node);
61
+ const h = {
62
+ header: visitor.getText(),
63
+ depth: node.level,
64
+ children: [],
65
+ nodes: [],
66
+ };
67
+ if (node.htmlId) {
68
+ h.headerId = node.htmlId;
69
+ }
70
+ stack[stack.length - 1].children.push(h);
71
+ stack.push(h);
72
+ depth = node.level;
73
+ }
74
+ else {
75
+ stack[stack.length - 1].nodes.push(node);
76
+ }
77
+ }
78
+ // The transformer does not actually walk through the document tree
79
+ // We just append a newly calculated hierarchy object
80
+ const doc = {
81
+ ...node,
82
+ };
83
+ doc.hierarchy = convertHierarchy(root);
84
+ return doc;
85
+ }
86
+ }
87
+ exports.WordCounterTransformer = WordCounterTransformer;
@@ -0,0 +1,13 @@
1
+ import { NodeVisitor } from "./index.js";
2
+ import { DocumentNode, TextNode } from "./types.js";
3
+ export declare class WordCounterVisitor extends NodeVisitor {
4
+ private count;
5
+ private texts;
6
+ constructor();
7
+ private countText;
8
+ protected beforeBlock(): void;
9
+ protected afterBlock(): void;
10
+ protected text(node: TextNode): void;
11
+ protected document(node: DocumentNode): void;
12
+ getCount(): number;
13
+ }
@@ -0,0 +1,47 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WordCounterVisitor = void 0;
4
+ const index_js_1 = require("./index.js");
5
+ class WordCounterVisitor extends index_js_1.NodeVisitor {
6
+ constructor() {
7
+ super();
8
+ Object.defineProperty(this, "count", {
9
+ enumerable: true,
10
+ configurable: true,
11
+ writable: true,
12
+ value: void 0
13
+ });
14
+ Object.defineProperty(this, "texts", {
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true,
18
+ value: void 0
19
+ });
20
+ this.count = 0;
21
+ this.texts = [];
22
+ }
23
+ countText() {
24
+ if (this.texts.length > 0) {
25
+ this.count += this.texts.join("").split(" ").length;
26
+ this.texts = [];
27
+ }
28
+ }
29
+ beforeBlock() {
30
+ this.countText();
31
+ }
32
+ afterBlock() {
33
+ this.countText();
34
+ }
35
+ text(node) {
36
+ this.texts.push(node.text);
37
+ }
38
+ document(node) {
39
+ super.document(node);
40
+ this.countText();
41
+ }
42
+ getCount() {
43
+ this.countText();
44
+ return this.count;
45
+ }
46
+ }
47
+ exports.WordCounterVisitor = WordCounterVisitor;
package/script/index.d.ts CHANGED
@@ -6,3 +6,6 @@ export { WhitespaceTransformer } from "./WhitespaceTransformer.js";
6
6
  export { WhitespaceStretchingTransformer } from "./WhitespaceStretchingTransformer.js";
7
7
  export { NodeVisitor } from "./NodeVisitor.js";
8
8
  export { TextVisitor } from "./TextVisitor.js";
9
+ export { WordCounterTransformer } from "./WordCountTransformer.js";
10
+ export { WordCounterVisitor } from "./WordCounterVisitor.js";
11
+ export { DocumentThinningTransformer } from "./DocumentThinningTransformer.js";
package/script/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
17
+ exports.DocumentThinningTransformer = exports.WordCounterVisitor = exports.WordCounterTransformer = exports.TextVisitor = exports.NodeVisitor = exports.WhitespaceStretchingTransformer = exports.WhitespaceTransformer = exports.TextCollapseTransformer = exports.IdentityTransformer = exports.ArrayCollapseTransformer = void 0;
18
18
  __exportStar(require("./types.js"), exports);
19
19
  var ArrayCollapseTransformer_js_1 = require("./ArrayCollapseTransformer.js");
20
20
  Object.defineProperty(exports, "ArrayCollapseTransformer", { enumerable: true, get: function () { return ArrayCollapseTransformer_js_1.ArrayCollapseTransformer; } });
@@ -30,3 +30,9 @@ var NodeVisitor_js_1 = require("./NodeVisitor.js");
30
30
  Object.defineProperty(exports, "NodeVisitor", { enumerable: true, get: function () { return NodeVisitor_js_1.NodeVisitor; } });
31
31
  var TextVisitor_js_1 = require("./TextVisitor.js");
32
32
  Object.defineProperty(exports, "TextVisitor", { enumerable: true, get: function () { return TextVisitor_js_1.TextVisitor; } });
33
+ var WordCountTransformer_js_1 = require("./WordCountTransformer.js");
34
+ Object.defineProperty(exports, "WordCounterTransformer", { enumerable: true, get: function () { return WordCountTransformer_js_1.WordCounterTransformer; } });
35
+ var WordCounterVisitor_js_1 = require("./WordCounterVisitor.js");
36
+ Object.defineProperty(exports, "WordCounterVisitor", { enumerable: true, get: function () { return WordCounterVisitor_js_1.WordCounterVisitor; } });
37
+ var DocumentThinningTransformer_js_1 = require("./DocumentThinningTransformer.js");
38
+ Object.defineProperty(exports, "DocumentThinningTransformer", { enumerable: true, get: function () { return DocumentThinningTransformer_js_1.DocumentThinningTransformer; } });
package/script/types.d.ts CHANGED
@@ -301,14 +301,17 @@ export interface SubTextNode {
301
301
  export interface DateNode {
302
302
  type: "date";
303
303
  isoDate: string;
304
+ content: Node[];
304
305
  }
305
306
  export interface TimeNode {
306
307
  type: "time";
307
308
  isoTime: string;
309
+ content: Node[];
308
310
  }
309
311
  export interface DateTimeNode {
310
312
  type: "datetime";
311
313
  iso8601: string;
314
+ content: Node[];
312
315
  }
313
316
  export type Node = ArrayNode | BlockNode | BlockQuoteNode | BoldNode | BreakNode | BubbleNode | CardNode | CenterNode | CodeNode | ColumnsNode | DefinitionNode | DefinitionListNode | DefinitionReferenceNode | EmbedNode | EmojiNode | FigureNode | FigureCaptionNode | FigureImageNode | FormattedTextNode | HeaderNode | HighTechAlertNode | HorizontalRuleNode | ImageNode | ItalicNode | LinkNode | ListNode | NoteNode | ParagraphNode | QuoteNode | RedactedNode | RegionNode | ScriptNode | SecretNode | SmallerNode | StickerNode | StrikeThroughNode | TextNode | TableNode | SocialNode | UnderlineNode | VideoNode | DateNode | TimeNode | DateTimeNode | SuperTextNode | SubTextNode | WarningNode;
314
317
  export interface DocumentMeta {