hdoc-tools 0.19.7 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-stats.js CHANGED
@@ -1,184 +1,187 @@
1
-
2
- (function () {
3
- 'use strict';
4
-
5
- // Required modules
6
- // /const { STATUS_CODES } = require('http');
7
- const fs = require('fs'),
8
- path = require('path'),
9
- dree = require('dree'),
10
- html2text = require('html-to-text'),
11
- wordsCount = require('words-count').default;
12
-
13
- // Regex to remove Hornbill-specific tags
14
- const hbMDTagRegex = /(:{3}[ ]note)|(:{3}[ ]tip)|(:{3}[ ]important)|(:{3}[ ]caution)|(:{3}[ ]warning)|(:{3})/g;
15
-
16
- let stats = {
17
- totalMDFiles: 0,
18
- totalStaticHTMLFiles: 0,
19
- totalWordCount: 0,
20
- mdFiles: {},
21
- staticHTMLFiles: {}
22
- };
23
-
24
- let markdownFiles = [];
25
-
26
- // File callback for scan
27
- const fileCallback = function (element) {
28
- if (element.extension === 'md') {
29
- markdownFiles.push(element);
30
- stats.totalMDFiles++;
31
- } else {
32
- // file must be html
33
- const mdFilePath = element.path.slice(0, element.path.lastIndexOf('.')) + '.md';
34
- //Do we have a matching MD file - if not, then word count the HTML
35
- if (!fs.existsSync(mdFilePath)) {
36
- stats.totalStaticHTMLFiles++;
37
- const html = fs.readFileSync(element.path, 'utf8');
38
- const text = html2text.convert(html, {
39
- wordwrap: null
40
- });
41
- const wordCount = wordsCount(text);
42
- stats.totalWordCount += wordCount;
43
- stats.staticHTMLFiles[element.relativePath] = {
44
- wordCount: wordCount,
45
- sizeInBytes: element.sizeInBytes
46
- };
47
- }
48
- }
49
- };
50
-
51
- const dreeOptions = {
52
- descendants: true,
53
- depth: 10,
54
- extensions: ['md', 'html', 'htm'],
55
- hash: false,
56
- normalize: true,
57
- size: true,
58
- sizeInBytes: true,
59
- stat: false,
60
- symbolicLinks: false
61
- };
62
-
63
- exports.run = function (ui_path, source_path, verbose = false) {
64
-
65
- // GERRY: The stats here are needed to support content development. The idea is to count all of the ]
66
- // words in a HDocBook so we know the size of the book, this helps with 3rd party involvement where
67
- // we generally need to know the word count of the content in order to get a quote for things like
68
- // copy editing. reviewing and translations.
69
- //
70
- // For each .md file, and for each static .HTML file (that is html files that we have not generated) we
71
- // should do a word count, excluding MD or HTML tags
72
-
73
- // STEVE: Get the docId (book root) from the hdocbook-project.json
74
- // From there, loop through looking for:
75
- // * HTML files without a matching MD, and word count those
76
- // * MD files, and word count those
77
-
78
-
79
- console.log('Hornbill HDocBook Stats : verbose=' + verbose, '\r\n');
80
-
81
- const project_json_path = path.join(source_path, 'hdocbook-project.json');
82
-
83
- if (!fs.existsSync(project_json_path)) {
84
- // Book config does not exist
85
- console.error('Required project file does not exist:', project_json_path);
86
- return;
87
- } else {
88
- // Book config exists - load book details
89
- let book_details;
90
- try {
91
- book_details = JSON.parse(fs.readFileSync(project_json_path, 'utf8'));
92
- } catch (e) {
93
- console.error('Error reading book configuration:\r\n', e);
94
- return;
95
- }
96
- const bookPath = path.join(source_path, book_details.docId);
97
-
98
- //Load book config
99
- let bookConfig;
100
- try {
101
- bookConfig = JSON.parse(fs.readFileSync(path.join(bookPath, 'hdocbook.json'), 'utf8'));
102
- } catch (e) {
103
- console.error('Could not load book configuration:\r\n', e);
104
- return;
105
- }
106
- // Load markdown-it module
107
- const md = require('markdown-it')({
108
- html: true,
109
- linkify: true,
110
- typographer: true
111
- });
112
-
113
- // Scan content path directory, send file info to callback for processing
114
- dree.scan(bookPath, dreeOptions, fileCallback);
115
- markdownFiles.forEach(function (element) {
116
- // Load markdown file
117
- let md_txt = fs.readFileSync(element.path, 'utf8');
118
-
119
- var html_txt = md.render(md_txt.toString());
120
- const text = html2text.convert(html_txt, {
121
- wordwrap: null
122
- });
123
-
124
- // Do the wordcount and add to status
125
- const wordCount = wordsCount(text);
126
- stats.totalWordCount += wordCount;
127
- stats.mdFiles[element.relativePath] = {
128
- wordCount: wordCount,
129
- sizeInBytes: element.sizeInBytes
130
- };
131
- });
132
-
133
- if (verbose) {
134
- // Output verbose stats
135
-
136
- // Output information about all markdown files in the book
137
- console.log('--------------------');
138
- console.log(' Markdown Files ');
139
- console.log('--------------------\r\n');
140
- if (stats.totalMDFiles === 0) {
141
- console.log('No markdown files found.\r\n');
142
- } else {
143
- for (const key in stats.mdFiles) {
144
- if (stats.mdFiles.hasOwnProperty(key)) {
145
- console.log('Relative Path:', key);
146
- console.log(' Word Count:', stats.mdFiles[key].wordCount);
147
- console.log('File Size (B):', stats.mdFiles[key].sizeInBytes, '\r\n');
148
- }
149
- }
150
- }
151
-
152
- // Output information about all static HTML in the book
153
- console.log('-----------------------');
154
- console.log(' Static HTML Files ');
155
- console.log('-----------------------\r\n');
156
- if (stats.totalStaticHTMLFiles === 0) {
157
- console.log('No static HTML files found.\r\n');
158
- } else {
159
- for (const key in stats.staticHTMLFiles) {
160
- if (stats.staticHTMLFiles.hasOwnProperty(key)) {
161
- console.log('Relative Path:', key);
162
- console.log(' Word Count:', stats.staticHTMLFiles[key].wordCount);
163
- console.log('File Size (B):', stats.staticHTMLFiles[key].sizeInBytes, '\r\n');
164
- }
165
- }
166
- }
167
- }
168
-
169
- // Output stats
170
- console.log('----------------------');
171
- console.log(' Book Information ');
172
- console.log('----------------------\r\n');
173
- console.log(' Document ID:', bookConfig.docId);
174
- console.log(' Version:', bookConfig.version, '\r\n');
175
- console.log(' Title:', bookConfig.title);
176
- console.log(' Description:', bookConfig.description);
177
- console.log(' Public Source:', bookConfig.publicSource, '\r\n');
178
- console.log('Total Book Word Count:', stats.totalWordCount);
179
- console.log(' Markdown Files:', stats.totalMDFiles);
180
- console.log(' Static HTML Files:', stats.totalStaticHTMLFiles, '\r\n');
181
-
182
- }
183
- };
184
- })();
1
+ (() => {
2
+ // Required modules
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+ const dree = require("dree");
6
+ const html2text = require("html-to-text");
7
+ const wordsCount = require("words-count").default;
8
+
9
+ // Regex to remove Hornbill-specific tags
10
+ const hbMDTagRegex =
11
+ /(:{3}[ ]note)|(:{3}[ ]tip)|(:{3}[ ]important)|(:{3}[ ]caution)|(:{3}[ ]warning)|(:{3})/g;
12
+
13
+ const stats = {
14
+ totalMDFiles: 0,
15
+ totalStaticHTMLFiles: 0,
16
+ totalWordCount: 0,
17
+ mdFiles: {},
18
+ staticHTMLFiles: {},
19
+ };
20
+
21
+ const markdownFiles = [];
22
+
23
+ // File callback for scan
24
+ const fileCallback = (element) => {
25
+ if (element.extension === "md") {
26
+ markdownFiles.push(element);
27
+ stats.totalMDFiles++;
28
+ } else {
29
+ // file must be html
30
+ const mdFilePath = `${element.path.slice(0, element.path.lastIndexOf("."))}.md`;
31
+ //Do we have a matching MD file - if not, then word count the HTML
32
+ if (!fs.existsSync(mdFilePath)) {
33
+ stats.totalStaticHTMLFiles++;
34
+ const html = fs.readFileSync(element.path, "utf8");
35
+ const text = html2text.convert(html, {
36
+ wordwrap: null,
37
+ });
38
+ const wordCount = wordsCount(text);
39
+ stats.totalWordCount += wordCount;
40
+ stats.staticHTMLFiles[element.relativePath] = {
41
+ wordCount: wordCount,
42
+ sizeInBytes: element.sizeInBytes,
43
+ };
44
+ }
45
+ }
46
+ };
47
+
48
+ const dreeOptions = {
49
+ descendants: true,
50
+ depth: 10,
51
+ extensions: ["md", "html", "htm"],
52
+ hash: false,
53
+ normalize: true,
54
+ size: true,
55
+ sizeInBytes: true,
56
+ stat: false,
57
+ symbolicLinks: false,
58
+ };
59
+
60
+ exports.run = (ui_path, source_path, verbose = false) => {
61
+ // GERRY: The stats here are needed to support content development. The idea is to count all of the ]
62
+ // words in a HDocBook so we know the size of the book, this helps with 3rd party involvement where
63
+ // we generally need to know the word count of the content in order to get a quote for things like
64
+ // copy editing. reviewing and translations.
65
+ //
66
+ // For each .md file, and for each static .HTML file (that is html files that we have not generated) we
67
+ // should do a word count, excluding MD or HTML tags
68
+
69
+ // STEVE: Get the docId (book root) from the hdocbook-project.json
70
+ // From there, loop through looking for:
71
+ // * HTML files without a matching MD, and word count those
72
+ // * MD files, and word count those
73
+
74
+ console.log(`Hornbill HDocBook Stats : verbose=${verbose}\r\n`);
75
+
76
+ const project_json_path = path.join(source_path, "hdocbook-project.json");
77
+
78
+ if (!fs.existsSync(project_json_path)) {
79
+ // Book config does not exist
80
+ console.error("Required project file does not exist:", project_json_path);
81
+ return;
82
+ }
83
+ // Book config exists - load book details
84
+ let book_details;
85
+ try {
86
+ book_details = JSON.parse(fs.readFileSync(project_json_path, "utf8"));
87
+ } catch (e) {
88
+ console.error("Error reading book configuration:\r\n", e);
89
+ return;
90
+ }
91
+ const bookPath = path.join(source_path, book_details.docId);
92
+
93
+ //Load book config
94
+ let bookConfig;
95
+ try {
96
+ bookConfig = JSON.parse(
97
+ fs.readFileSync(path.join(bookPath, "hdocbook.json"), "utf8"),
98
+ );
99
+ } catch (e) {
100
+ console.error("Could not load book configuration:\r\n", e);
101
+ return;
102
+ }
103
+ // Load markdown-it module
104
+ const md = require("markdown-it")({
105
+ html: true,
106
+ linkify: true,
107
+ typographer: true,
108
+ });
109
+
110
+ // Scan content path directory, send file info to callback for processing
111
+ dree.scan(bookPath, dreeOptions, fileCallback);
112
+ for (const element of markdownFiles) {
113
+ // Load markdown file
114
+ const md_txt = fs.readFileSync(element.path, "utf8");
115
+
116
+ const html_txt = md.render(md_txt.toString());
117
+ const text = html2text.convert(html_txt, {
118
+ wordwrap: null,
119
+ });
120
+
121
+ // Do the wordcount and add to status
122
+ const wordCount = wordsCount(text);
123
+ stats.totalWordCount += wordCount;
124
+ stats.mdFiles[element.relativePath] = {
125
+ wordCount: wordCount,
126
+ sizeInBytes: element.sizeInBytes,
127
+ };
128
+ }
129
+
130
+ if (verbose) {
131
+ // Output verbose stats
132
+
133
+ // Output information about all markdown files in the book
134
+ console.log("--------------------");
135
+ console.log(" Markdown Files ");
136
+ console.log("--------------------\r\n");
137
+ if (stats.totalMDFiles === 0) {
138
+ console.log("No markdown files found.\r\n");
139
+ } else {
140
+ for (const key in stats.mdFiles) {
141
+ if (Object.hasOwn(stats.mdFiles, key)) {
142
+ console.log("Relative Path:", key);
143
+ console.log(" Word Count:", stats.mdFiles[key].wordCount);
144
+ console.log(
145
+ "File Size (B):",
146
+ stats.mdFiles[key].sizeInBytes,
147
+ "\r\n",
148
+ );
149
+ }
150
+ }
151
+ }
152
+
153
+ // Output information about all static HTML in the book
154
+ console.log("-----------------------");
155
+ console.log(" Static HTML Files ");
156
+ console.log("-----------------------\r\n");
157
+ if (stats.totalStaticHTMLFiles === 0) {
158
+ console.log("No static HTML files found.\r\n");
159
+ } else {
160
+ for (const key in stats.staticHTMLFiles) {
161
+ if (Object.hasOwn(stats.staticHTMLFiles, key)) {
162
+ console.log("Relative Path:", key);
163
+ console.log(" Word Count:", stats.staticHTMLFiles[key].wordCount);
164
+ console.log(
165
+ "File Size (B):",
166
+ stats.staticHTMLFiles[key].sizeInBytes,
167
+ "\r\n",
168
+ );
169
+ }
170
+ }
171
+ }
172
+ }
173
+
174
+ // Output stats
175
+ console.log("----------------------");
176
+ console.log(" Book Information ");
177
+ console.log("----------------------\r\n");
178
+ console.log(" Document ID:", bookConfig.docId);
179
+ console.log(" Version:", bookConfig.version, "\r\n");
180
+ console.log(" Title:", bookConfig.title);
181
+ console.log(" Description:", bookConfig.description);
182
+ console.log(" Public Source:", bookConfig.publicSource, "\r\n");
183
+ console.log("Total Book Word Count:", stats.totalWordCount);
184
+ console.log(" Markdown Files:", stats.totalMDFiles);
185
+ console.log(" Static HTML Files:", stats.totalStaticHTMLFiles, "\r\n");
186
+ };
187
+ })();