htmlnano 0.2.4 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,54 +1,72 @@
1
- 'use strict';
1
+ "use strict";
2
2
 
3
3
  Object.defineProperty(exports, "__esModule", {
4
- value: true
4
+ value: true
5
5
  });
6
6
  exports.default = removeComments;
7
7
 
8
- var _helpers = require('../helpers');
8
+ var _helpers = require("../helpers");
9
9
 
10
+ const MATCH_EXCERPT_REGEXP = /<!-- ?more ?-->/i;
10
11
  /** Removes HTML comments */
11
- function removeComments(tree, options, removeType) {
12
- if (removeType !== 'all' && removeType !== 'safe') {
13
- removeType = 'safe';
14
- }
15
12
 
16
- tree.walk(function (node) {
17
- if (node.contents && node.contents.length) {
18
- node.contents = node.contents.filter(function (content) {
19
- return !isCommentToRemove(content, removeType);
20
- });
21
- } else if (isCommentToRemove(node, removeType)) {
22
- node = '';
23
- }
13
+ function removeComments(tree, options, removeType) {
14
+ if (removeType !== 'all' && removeType !== 'safe') {
15
+ removeType = 'safe';
16
+ }
24
17
 
25
- return node;
26
- });
18
+ tree.walk(node => {
19
+ if (node.contents && node.contents.length) {
20
+ node.contents = node.contents.filter(content => !isCommentToRemove(content, removeType));
21
+ } else if (isCommentToRemove(node, removeType)) {
22
+ node = '';
23
+ }
27
24
 
28
- return tree;
25
+ return node;
26
+ });
27
+ return tree;
29
28
  }
30
29
 
31
30
  function isCommentToRemove(text, removeType) {
32
- if (typeof text !== 'string') {
33
- return false;
34
- }
31
+ if (typeof text !== 'string') {
32
+ return false;
33
+ }
35
34
 
36
- if (!(0, _helpers.isComment)(text)) {
37
- // Not HTML comment
38
- return false;
39
- }
35
+ if (!(0, _helpers.isComment)(text)) {
36
+ // Not HTML comment
37
+ return false;
38
+ }
39
+
40
+ if (removeType === 'safe') {
41
+ const isNoindex = text === '<!--noindex-->' || text === '<!--/noindex-->'; // Don't remove noindex comments.
42
+ // See: https://yandex.com/support/webmaster/controlling-robot/html.xml
40
43
 
41
- var isNoindex = text === '<!--noindex-->' || text === '<!--/noindex-->';
42
- if (removeType === 'safe' && isNoindex) {
43
- // Don't remove noindex comments.
44
- // See: https://yandex.com/support/webmaster/controlling-robot/html.xml
45
- return false;
44
+ if (isNoindex) {
45
+ return false;
46
46
  }
47
47
 
48
- // https://en.wikipedia.org/wiki/Conditional_comment
49
- if (removeType === 'safe' && (0, _helpers.isConditionalComment)(text)) {
50
- return false;
48
+ const isServerSideExclude = text === '<!--sse-->' || text === '<!--/sse-->'; // Don't remove sse comments.
49
+ // See: https://support.cloudflare.com/hc/en-us/articles/200170036-What-does-Server-Side-Excludes-SSE-do-
50
+
51
+ if (isServerSideExclude) {
52
+ return false;
53
+ } // https://en.wikipedia.org/wiki/Conditional_comment
54
+
55
+
56
+ if ((0, _helpers.isConditionalComment)(text)) {
57
+ return false;
58
+ } // Hexo: https://hexo.io/docs/tag-plugins#Post-Excerpt
59
+ // Hugo: https://gohugo.io/content-management/summaries/#manual-summary-splitting
60
+ // WordPress: https://wordpress.com/support/wordpress-editor/blocks/more-block/2/
61
+ // Jekyll: https://jekyllrb.com/docs/posts/#post-excerpts
62
+
63
+
64
+ const isCMSExcerptComment = MATCH_EXCERPT_REGEXP.test(text);
65
+
66
+ if (isCMSExcerptComment) {
67
+ return false;
51
68
  }
69
+ }
52
70
 
53
- return true;
71
+ return true;
54
72
  }
@@ -1,33 +1,31 @@
1
- 'use strict';
1
+ "use strict";
2
2
 
3
3
  Object.defineProperty(exports, "__esModule", {
4
- value: true
4
+ value: true
5
5
  });
6
6
  exports.default = removeEmptyAttributes;
7
7
  // Source: https://www.w3.org/TR/html4/sgml/dtd.html#events (Generic Attributes)
8
- var safeToRemoveAttrs = new Set(['id', 'class', 'style', 'title', 'lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup']);
9
-
8
+ const safeToRemoveAttrs = new Set(['id', 'class', 'style', 'title', 'lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup']);
10
9
  /** Removes empty attributes */
10
+
11
11
  function removeEmptyAttributes(tree) {
12
- tree.walk(function (node) {
13
- if (!node.attrs) {
14
- return node;
15
- }
12
+ tree.walk(node => {
13
+ if (!node.attrs) {
14
+ return node;
15
+ }
16
16
 
17
- Object.keys(node.attrs).forEach(function (attrName) {
18
- var attrNameLower = attrName.toLowerCase();
19
- if (!safeToRemoveAttrs.has(attrNameLower)) {
20
- return;
21
- }
17
+ Object.entries(node.attrs).forEach(([attrName, attrValue]) => {
18
+ const attrNameLower = attrName.toLowerCase();
22
19
 
23
- var attrValue = node.attrs[attrName];
24
- if (attrValue === '' || (attrValue || '').match(/^\s+$/)) {
25
- delete node.attrs[attrName];
26
- }
27
- });
20
+ if (!safeToRemoveAttrs.has(attrNameLower)) {
21
+ return;
22
+ }
28
23
 
29
- return node;
24
+ if (attrValue === '' || (attrValue || '').match(/^\s+$/)) {
25
+ delete node.attrs[attrName];
26
+ }
30
27
  });
31
-
32
- return tree;
28
+ return node;
29
+ });
30
+ return tree;
33
31
  }
@@ -0,0 +1,220 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.default = removeOptionalTags;
7
+
8
+ var _helpers = require("../helpers");
9
+
10
+ const startWithWhitespacePattern = /^\s+/;
11
+ const bodyStartTagCantBeOmittedWithFirstChildTags = new Set(['meta', 'link', 'script', 'style']);
12
+ const tbodyStartTagCantBeOmittedWithPrecededTags = new Set(['tbody', 'thead', 'tfoot']);
13
+ const tbodyEndTagCantBeOmittedWithFollowedTags = new Set(['tbody', 'tfoot']);
14
+
15
+ function isEmptyTextNode(node) {
16
+ if (typeof node === 'string' && node.trim() === '') {
17
+ return true;
18
+ }
19
+
20
+ return false;
21
+ }
22
+
23
+ function isEmptyNode(node) {
24
+ if (!node.content) {
25
+ return true;
26
+ }
27
+
28
+ if (node.content.length) {
29
+ return !node.content.filter(n => typeof n === 'string' && isEmptyTextNode(n) ? false : true).length;
30
+ }
31
+
32
+ return true;
33
+ }
34
+
35
+ function getFirstChildTag(node, nonEmpty = true) {
36
+ if (node.content && node.content.length) {
37
+ if (nonEmpty) {
38
+ for (const childNode of node.content) {
39
+ if (childNode.tag) return childNode;
40
+ if (typeof childNode === 'string' && !isEmptyTextNode(childNode)) return childNode;
41
+ }
42
+ } else {
43
+ return node.content[0] || null;
44
+ }
45
+ }
46
+
47
+ return null;
48
+ }
49
+
50
+ function getPrevNode(tree, currentNodeIndex, nonEmpty = false) {
51
+ if (nonEmpty) {
52
+ for (let i = currentNodeIndex - 1; i >= 0; i--) {
53
+ const node = tree[i];
54
+ if (node.tag) return node;
55
+ if (typeof node === 'string' && !isEmptyTextNode(node)) return node;
56
+ }
57
+ } else {
58
+ return tree[currentNodeIndex - 1] || null;
59
+ }
60
+
61
+ return null;
62
+ }
63
+
64
+ function getNextNode(tree, currentNodeIndex, nonEmpty = false) {
65
+ if (nonEmpty) {
66
+ for (let i = currentNodeIndex + 1; i < tree.length; i++) {
67
+ const node = tree[i];
68
+ if (node.tag) return node;
69
+ if (typeof node === 'string' && !isEmptyTextNode(node)) return node;
70
+ }
71
+ } else {
72
+ return tree[currentNodeIndex + 1] || null;
73
+ }
74
+
75
+ return null;
76
+ } // Specification https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
77
+
78
+ /** Remove optional tag in the DOM */
79
+
80
+
81
+ function removeOptionalTags(tree) {
82
+ tree.forEach((node, index) => {
83
+ if (!node.tag) return node;
84
+ if (node.attrs && Object.keys(node.attrs).length) return node; // const prevNode = getPrevNode(tree, index);
85
+
86
+ const prevNonEmptyNode = getPrevNode(tree, index, true);
87
+ const nextNode = getNextNode(tree, index);
88
+ const nextNonEmptyNode = getNextNode(tree, index, true);
89
+ const firstChildNode = getFirstChildTag(node, false);
90
+ const firstNonEmptyChildNode = getFirstChildTag(node);
91
+ /**
92
+ * An "html" element's start tag may be omitted if the first thing inside the "html" element is not a comment.
93
+ * An "html" element's end tag may be omitted if the "html" element is not IMMEDIATELY followed by a comment.
94
+ */
95
+
96
+ if (node.tag === 'html') {
97
+ let isHtmlStartTagCanBeOmitted = true;
98
+ let isHtmlEndTagCanBeOmitted = true;
99
+
100
+ if (typeof firstNonEmptyChildNode === 'string' && (0, _helpers.isComment)(firstNonEmptyChildNode)) {
101
+ isHtmlStartTagCanBeOmitted = false;
102
+ }
103
+
104
+ if (typeof nextNonEmptyNode === 'string' && (0, _helpers.isComment)(nextNonEmptyNode)) {
105
+ isHtmlEndTagCanBeOmitted = false;
106
+ }
107
+
108
+ if (isHtmlStartTagCanBeOmitted && isHtmlEndTagCanBeOmitted) {
109
+ node.tag = false;
110
+ }
111
+ }
112
+ /**
113
+ * A "head" element's start tag may be omitted if the element is empty, or if the first thing inside the "head" element is an element.
114
+ * A "head" element's end tag may be omitted if the "head" element is not IMMEDIATELY followed by ASCII whitespace or a comment.
115
+ */
116
+
117
+
118
+ if (node.tag === 'head') {
119
+ let isHeadStartTagCanBeOmitted = false;
120
+ let isHeadEndTagCanBeOmitted = true;
121
+
122
+ if (isEmptyNode(node) || firstNonEmptyChildNode && firstNonEmptyChildNode.tag) {
123
+ isHeadStartTagCanBeOmitted = true;
124
+ }
125
+
126
+ if (nextNode && typeof nextNode === 'string' && startWithWhitespacePattern.test(nextNode) || nextNonEmptyNode && typeof nextNonEmptyNode === 'string' && (0, _helpers.isComment)(nextNode)) {
127
+ isHeadEndTagCanBeOmitted = false;
128
+ }
129
+
130
+ if (isHeadStartTagCanBeOmitted && isHeadEndTagCanBeOmitted) {
131
+ node.tag = false;
132
+ }
133
+ }
134
+ /**
135
+ * A "body" element's start tag may be omitted if the element is empty, or if the first thing inside the "body" element is not ASCII whitespace or a comment, except if the first thing inside the "body" element is a "meta", "link", "script", "style", or "template" element.
136
+ * A "body" element's end tag may be omitted if the "body" element is not IMMEDIATELY followed by a comment.
137
+ */
138
+
139
+
140
+ if (node.tag === 'body') {
141
+ let isBodyStartTagCanBeOmitted = true;
142
+ let isBodyEndTagCanBeOmitted = true;
143
+
144
+ if (typeof firstChildNode === 'string' && startWithWhitespacePattern.test(firstChildNode) || typeof firstNonEmptyChildNode === 'string' && (0, _helpers.isComment)(firstNonEmptyChildNode)) {
145
+ isBodyStartTagCanBeOmitted = false;
146
+ }
147
+
148
+ if (firstNonEmptyChildNode && firstNonEmptyChildNode.tag && bodyStartTagCantBeOmittedWithFirstChildTags.has(firstNonEmptyChildNode.tag)) {
149
+ isBodyStartTagCanBeOmitted = false;
150
+ }
151
+
152
+ if (nextNode && typeof nextNode === 'string' && (0, _helpers.isComment)(nextNode)) {
153
+ isBodyEndTagCanBeOmitted = false;
154
+ }
155
+
156
+ if (isBodyStartTagCanBeOmitted && isBodyEndTagCanBeOmitted) {
157
+ node.tag = false;
158
+ }
159
+ }
160
+ /**
161
+ * A "colgroup" element's start tag may be omitted if the first thing inside the "colgroup" element is a "col" element, and if the element is not IMMEDIATELY preceded by another "colgroup" element. It can't be omitted if the element is empty.
162
+ * A "colgroup" element's end tag may be omitted if the "colgroup" element is not IMMEDIATELY followed by ASCII whitespace or a comment.
163
+ */
164
+
165
+
166
+ if (node.tag === 'colgroup') {
167
+ let isColgroupStartTagCanBeOmitted = false;
168
+ let isColgroupEndTagCanBeOmitted = true;
169
+
170
+ if (firstNonEmptyChildNode && firstNonEmptyChildNode.tag && firstNonEmptyChildNode.tag === 'col') {
171
+ isColgroupStartTagCanBeOmitted = true;
172
+ }
173
+
174
+ if (prevNonEmptyNode && prevNonEmptyNode.tag && prevNonEmptyNode.tag === 'colgroup') {
175
+ isColgroupStartTagCanBeOmitted = false;
176
+ }
177
+
178
+ if (nextNode && typeof nextNode === 'string' && startWithWhitespacePattern.test(nextNode) || nextNonEmptyNode && typeof nextNonEmptyNode === 'string' && (0, _helpers.isComment)(nextNonEmptyNode)) {
179
+ isColgroupEndTagCanBeOmitted = false;
180
+ }
181
+
182
+ if (isColgroupStartTagCanBeOmitted && isColgroupEndTagCanBeOmitted) {
183
+ node.tag = false;
184
+ }
185
+ }
186
+ /**
187
+ * A "tbody" element's start tag may be omitted if the first thing inside the "tbody" element is a "tr" element, and if the element is not immediately preceded by another "tbody", "thead" or "tfoot" element. It can't be omitted if the element is empty.
188
+ * A "tbody" element's end tag may be omitted if the "tbody" element is not IMMEDIATELY followed by a "tbody" or "tfoot" element.
189
+ */
190
+
191
+
192
+ if (node.tag === 'tbody') {
193
+ let isTbodyStartTagCanBeOmitted = false;
194
+ let isTbodyEndTagCanBeOmitted = true;
195
+
196
+ if (firstNonEmptyChildNode && firstNonEmptyChildNode.tag && firstNonEmptyChildNode.tag === 'tr') {
197
+ isTbodyStartTagCanBeOmitted = true;
198
+ }
199
+
200
+ if (prevNonEmptyNode && prevNonEmptyNode.tag && tbodyStartTagCantBeOmittedWithPrecededTags.has(prevNonEmptyNode.tag)) {
201
+ isTbodyStartTagCanBeOmitted = false;
202
+ }
203
+
204
+ if (nextNonEmptyNode && nextNonEmptyNode.tag && tbodyEndTagCantBeOmittedWithFollowedTags.has(nextNonEmptyNode.tag)) {
205
+ isTbodyEndTagCanBeOmitted = false;
206
+ }
207
+
208
+ if (isTbodyStartTagCanBeOmitted && isTbodyEndTagCanBeOmitted) {
209
+ node.tag = false;
210
+ }
211
+ }
212
+
213
+ if (node.content && node.content.length) {
214
+ removeOptionalTags(node.content);
215
+ }
216
+
217
+ return node;
218
+ });
219
+ return tree;
220
+ }
@@ -1,87 +1,102 @@
1
- 'use strict';
1
+ "use strict";
2
2
 
3
3
  Object.defineProperty(exports, "__esModule", {
4
- value: true
4
+ value: true
5
5
  });
6
6
  exports.default = removeRedundantAttributes;
7
- var redundantAttributes = {
8
- 'form': {
9
- 'method': 'get'
10
- },
7
+ // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types#JavaScript_types
8
+ const redundantScriptTypes = new Set(['application/javascript', 'application/ecmascript', 'application/x-ecmascript', 'application/x-javascript', 'text/javascript', 'text/ecmascript', 'text/javascript1.0', 'text/javascript1.1', 'text/javascript1.2', 'text/javascript1.3', 'text/javascript1.4', 'text/javascript1.5', 'text/jscript', 'text/livescript', 'text/x-ecmascript', 'text/x-javascript']);
9
+ const redundantAttributes = {
10
+ 'form': {
11
+ 'method': 'get'
12
+ },
13
+ 'input': {
14
+ 'type': 'text'
15
+ },
16
+ 'button': {
17
+ 'type': 'submit'
18
+ },
19
+ 'script': {
20
+ 'language': 'javascript',
21
+ 'type': node => {
22
+ for (const [attrName, attrValue] of Object.entries(node.attrs)) {
23
+ if (attrName.toLowerCase() !== 'type') {
24
+ continue;
25
+ }
11
26
 
12
- 'input': {
13
- 'type': 'text'
14
- },
27
+ return redundantScriptTypes.has(attrValue);
28
+ }
15
29
 
16
- 'button': {
17
- 'type': 'submit'
30
+ return false;
18
31
  },
32
+ // Remove attribute if the function returns false
33
+ 'charset': node => {
34
+ // The charset attribute only really makes sense on “external” SCRIPT elements:
35
+ // http://perfectionkills.com/optimizing-html/#8_script_charset
36
+ return node.attrs && !node.attrs.src;
37
+ }
38
+ },
39
+ 'style': {
40
+ 'media': 'all',
41
+ 'type': 'text/css'
42
+ },
43
+ 'link': {
44
+ 'media': 'all',
45
+ 'type': node => {
46
+ // https://html.spec.whatwg.org/multipage/links.html#link-type-stylesheet
47
+ let isRelStyleSheet = false;
48
+ let isTypeTextCSS = false;
49
+
50
+ if (node.attrs) {
51
+ for (const [attrName, attrValue] of Object.entries(node.attrs)) {
52
+ if (attrName.toLowerCase() === 'rel' && attrValue === 'stylesheet') {
53
+ isRelStyleSheet = true;
54
+ }
19
55
 
20
- 'script': {
21
- 'language': 'javascript',
22
- 'type': 'text/javascript',
23
- // Remove attribute if the function returns false
24
- 'charset': function charset(node) {
25
- // The charset attribute only really makes sense on “external” SCRIPT elements:
26
- // http://perfectionkills.com/optimizing-html/#8_script_charset
27
- return node.attrs && !node.attrs.src;
56
+ if (attrName.toLowerCase() === 'type' && attrValue === 'text/css') {
57
+ isTypeTextCSS = true;
58
+ }
28
59
  }
29
- },
60
+ } // Only "text/css" is redudant for link[rel=stylesheet]. Otherwise "type" shouldn't be removed
30
61
 
31
- 'style': {
32
- 'media': 'all',
33
- 'type': 'text/css'
34
- },
35
62
 
36
- 'link': {
37
- 'media': 'all'
63
+ return isRelStyleSheet && isTypeTextCSS;
38
64
  }
65
+ },
66
+ // See: https://html.spec.whatwg.org/#lazy-loading-attributes
67
+ 'img': {
68
+ 'loading': 'eager'
69
+ },
70
+ 'iframe': {
71
+ 'loading': 'eager'
72
+ }
39
73
  };
40
-
74
+ const TAG_MATCH_REGEXP = new RegExp('^(' + Object.keys(redundantAttributes).join('|') + ')$');
41
75
  /** Removes redundant attributes */
42
- function removeRedundantAttributes(tree) {
43
- var tags = Object.keys(redundantAttributes);
44
- var tagMatchRegExp = new RegExp('^(' + tags.join('|') + ')$');
45
- tree.match({ tag: tagMatchRegExp }, function (node) {
46
- var tagRedundantAttributes = redundantAttributes[node.tag];
47
- node.attrs = node.attrs || {};
48
- var _iteratorNormalCompletion = true;
49
- var _didIteratorError = false;
50
- var _iteratorError = undefined;
51
76
 
52
- try {
53
- for (var _iterator = Object.keys(tagRedundantAttributes)[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
54
- var redundantAttributeName = _step.value;
77
+ function removeRedundantAttributes(tree) {
78
+ tree.match({
79
+ tag: TAG_MATCH_REGEXP
80
+ }, node => {
81
+ const tagRedundantAttributes = redundantAttributes[node.tag];
82
+ node.attrs = node.attrs || {};
55
83
 
56
- var tagRedundantAttributeValue = tagRedundantAttributes[redundantAttributeName];
57
- var isRemove = false;
58
- if (typeof tagRedundantAttributeValue === 'function') {
59
- isRemove = tagRedundantAttributeValue(node);
60
- } else if (node.attrs[redundantAttributeName] === tagRedundantAttributeValue) {
61
- isRemove = true;
62
- }
84
+ for (const redundantAttributeName of Object.keys(tagRedundantAttributes)) {
85
+ let tagRedundantAttributeValue = tagRedundantAttributes[redundantAttributeName];
86
+ let isRemove = false;
63
87
 
64
- if (isRemove) {
65
- delete node.attrs[redundantAttributeName];
66
- }
67
- }
68
- } catch (err) {
69
- _didIteratorError = true;
70
- _iteratorError = err;
71
- } finally {
72
- try {
73
- if (!_iteratorNormalCompletion && _iterator.return) {
74
- _iterator.return();
75
- }
76
- } finally {
77
- if (_didIteratorError) {
78
- throw _iteratorError;
79
- }
80
- }
81
- }
88
+ if (typeof tagRedundantAttributeValue === 'function') {
89
+ isRemove = tagRedundantAttributeValue(node);
90
+ } else if (node.attrs[redundantAttributeName] === tagRedundantAttributeValue) {
91
+ isRemove = true;
92
+ }
82
93
 
83
- return node;
84
- });
94
+ if (isRemove) {
95
+ delete node.attrs[redundantAttributeName];
96
+ }
97
+ }
85
98
 
86
- return tree;
99
+ return node;
100
+ });
101
+ return tree;
87
102
  }