html2any 0.0.6 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -26
- package/dist/index.js +255 -0
- package/package.json +40 -18
- package/lib/index.js +0 -210
- package/lib/index.mjs +0 -209
package/README.md
CHANGED
|
@@ -10,19 +10,7 @@ But I also want to render the whole html easily rather than parse it manually.
|
|
|
10
10
|
|
|
11
11
|
Now `html2any` help you to render html string. It not only parses your html but also gives you ability to transform it from origin to the dest.
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
### Theory
|
|
15
|
-
|
|
16
|
-
Use `html2any` to construct an AST of html string, then convert each node recursively with `rule` passed to transform function.
|
|
17
|
-
|
|
18
|
-
For example, we translate `<p>` tag into React Native component `<Text style={styles.paragraph}>` with the prepared styles. Then decode the p tag' s content to avoid html entities mess up.
|
|
19
|
-
|
|
20
|
-
### Demo
|
|
21
|
-
|
|
22
|
-
- [Web React Online Demo](https://huozhi.github.io/html2any-web-demo/)
|
|
23
|
-
- React Native demo: https://github.com/huozhi/html2any-rn-demo
|
|
24
|
-
|
|
25
|
-
### Documentation
|
|
13
|
+
### API
|
|
26
14
|
|
|
27
15
|
html2any provide following APIs
|
|
28
16
|
|
|
@@ -39,29 +27,29 @@ html2any provide following APIs
|
|
|
39
27
|
- html2any
|
|
40
28
|
> Convert the html/xml to the final form directly.
|
|
41
29
|
|
|
42
|
-
|
|
30
|
+
### Usage
|
|
43
31
|
|
|
44
|
-
|
|
32
|
+
```
|
|
33
|
+
npm i -S html2any
|
|
34
|
+
```
|
|
45
35
|
|
|
46
36
|
```js
|
|
47
|
-
import html2any, {parse, transform} from 'html2any'
|
|
37
|
+
import html2any, { parse, transform } from 'html2any'
|
|
48
38
|
|
|
49
|
-
const html =
|
|
50
|
-
<div>123</div>
|
|
51
|
-
`
|
|
39
|
+
const html = escapeHTMLEntity(`<div>123</div>`)
|
|
52
40
|
|
|
53
41
|
const ast = parse(html)[0]
|
|
54
42
|
|
|
55
43
|
function rule(node, children) {
|
|
56
|
-
if (node
|
|
57
|
-
return
|
|
44
|
+
if (typeof node === 'string') {
|
|
45
|
+
return node
|
|
58
46
|
} else {
|
|
59
|
-
return node
|
|
47
|
+
return <div>{node}</div>
|
|
60
48
|
}
|
|
61
49
|
}
|
|
62
50
|
|
|
63
51
|
const vdom = transform(ast, rule)
|
|
64
|
-
// vdom form of
|
|
52
|
+
// JSX vdom form of html
|
|
65
53
|
// { type: 'div', props: {...}, children: '...' }
|
|
66
54
|
|
|
67
55
|
```
|
|
@@ -72,7 +60,9 @@ Or you can just call html2any directly
|
|
|
72
60
|
const vdom = html2any(html, rule)
|
|
73
61
|
```
|
|
74
62
|
|
|
75
|
-
###
|
|
63
|
+
### How It Works
|
|
64
|
+
|
|
65
|
+
Use `html2any` to construct an AST of html string, then convert each node recursively with `rule` passed to transform function.
|
|
66
|
+
|
|
67
|
+
For example, we translate `<p>` tag into React Native component `<Text style={styles.paragraph}>` with the prepared styles. Then decode the p tag' s content to avoid html entities mess up.
|
|
76
68
|
|
|
77
|
-
Any suggestions or beautifier API you expect, just post issue [here](https://github.com/huozhi/html2any/issues).
|
|
78
|
-
Reporting bugs is welcomed. : )
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
function transform(ast, rule) {
|
|
2
|
+
function next(node, index) {
|
|
3
|
+
if (node) {
|
|
4
|
+
if (typeof node === 'string') {
|
|
5
|
+
return rule(node, undefined, index);
|
|
6
|
+
}
|
|
7
|
+
if (Array.isArray(node)) {
|
|
8
|
+
return node.map((n, index)=>{
|
|
9
|
+
if (typeof n !== 'string') {
|
|
10
|
+
n.index = index; // critical array element index
|
|
11
|
+
}
|
|
12
|
+
return rule(n, next(n.children), index);
|
|
13
|
+
});
|
|
14
|
+
} else {
|
|
15
|
+
return rule(node, next(node.children), index);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
return next(ast);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const voidElementTags = [
|
|
24
|
+
'area',
|
|
25
|
+
'base',
|
|
26
|
+
'br',
|
|
27
|
+
'col',
|
|
28
|
+
'embed',
|
|
29
|
+
'hr',
|
|
30
|
+
'img',
|
|
31
|
+
'input',
|
|
32
|
+
'link',
|
|
33
|
+
'meta',
|
|
34
|
+
'param',
|
|
35
|
+
'source',
|
|
36
|
+
'track',
|
|
37
|
+
'wbr'
|
|
38
|
+
];
|
|
39
|
+
function isSelfClose(tagName) {
|
|
40
|
+
return voidElementTags.indexOf(tagName.toLowerCase()) > -1;
|
|
41
|
+
}
|
|
42
|
+
function isPair(tagX, tagY) {
|
|
43
|
+
return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
|
|
44
|
+
}
|
|
45
|
+
var utils = {
|
|
46
|
+
isPair,
|
|
47
|
+
isSelfClose
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const RAW_TEXT_TAGS = [
|
|
51
|
+
'script',
|
|
52
|
+
'style',
|
|
53
|
+
'textarea',
|
|
54
|
+
'title'
|
|
55
|
+
];
|
|
56
|
+
function extraAttrs(str) {
|
|
57
|
+
let i = 0;
|
|
58
|
+
const attrs = {};
|
|
59
|
+
while(i < str.length){
|
|
60
|
+
while(/\s/.test(str[i]))i++;
|
|
61
|
+
if (!str[i] || str[i] === '/') {
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
const nameStart = i;
|
|
65
|
+
while(str[i] && !/[\s=/>]/.test(str[i]))i++;
|
|
66
|
+
const key = str.slice(nameStart, i);
|
|
67
|
+
let value = true;
|
|
68
|
+
while(/\s/.test(str[i]))i++;
|
|
69
|
+
if (str[i] === '=') {
|
|
70
|
+
i++;
|
|
71
|
+
while(/\s/.test(str[i]))i++;
|
|
72
|
+
const quote = str[i];
|
|
73
|
+
if (quote === '"' || quote === "'") {
|
|
74
|
+
i++;
|
|
75
|
+
const valueStart = i;
|
|
76
|
+
while(str[i] && str[i] !== quote)i++;
|
|
77
|
+
value = str.slice(valueStart, i);
|
|
78
|
+
if (str[i] === quote) i++;
|
|
79
|
+
} else {
|
|
80
|
+
const valueStart = i;
|
|
81
|
+
while(str[i] && !/[\s>]/.test(str[i]))i++;
|
|
82
|
+
value = str.slice(valueStart, i);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (key) {
|
|
86
|
+
attrs[key] = value;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return attrs;
|
|
90
|
+
}
|
|
91
|
+
function makeToken(tag) {
|
|
92
|
+
const isTag = tag[0] === '<' && tag[tag.length - 1] === '>';
|
|
93
|
+
if (!isTag) {
|
|
94
|
+
return {
|
|
95
|
+
type: 'string',
|
|
96
|
+
value: tag
|
|
97
|
+
};
|
|
98
|
+
} else if (/^<!--/.test(tag) || /^<!doctype/i.test(tag) || /^<\?/.test(tag)) {
|
|
99
|
+
return null;
|
|
100
|
+
} else if (tag.startsWith('</')) {
|
|
101
|
+
return {
|
|
102
|
+
type: 'end',
|
|
103
|
+
name: tag.slice(2, -1).trim().split(/\s+/)[0]
|
|
104
|
+
};
|
|
105
|
+
} else {
|
|
106
|
+
const body = tag.slice(1, -1).trim();
|
|
107
|
+
const match = body.match(/^([^\s/>]+)/);
|
|
108
|
+
if (!match) {
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
const tagName = match[1];
|
|
112
|
+
const tagBody = body.slice(tagName.length);
|
|
113
|
+
return {
|
|
114
|
+
type: utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/' ? 'self-close' : 'start',
|
|
115
|
+
name: tagName,
|
|
116
|
+
attributes: extraAttrs(tagBody)
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
function findTagEnd(html, start) {
|
|
121
|
+
let quote = null;
|
|
122
|
+
for(let i = start + 1; i < html.length; i++){
|
|
123
|
+
const curr = html[i];
|
|
124
|
+
if (quote) {
|
|
125
|
+
if (curr === quote) quote = null;
|
|
126
|
+
} else if (curr === '"' || curr === "'") {
|
|
127
|
+
quote = curr;
|
|
128
|
+
} else if (curr === '>') {
|
|
129
|
+
return i;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return -1;
|
|
133
|
+
}
|
|
134
|
+
function getStartTagName(tag) {
|
|
135
|
+
if (tag.startsWith('</') || tag.startsWith('<!') || tag.startsWith('<?')) {
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
const match = tag.slice(1, -1).trim().match(/^([^\s/>]+)/);
|
|
139
|
+
return match && match[1];
|
|
140
|
+
}
|
|
141
|
+
function splitTokens(html) {
|
|
142
|
+
let i = 0;
|
|
143
|
+
let j = 0;
|
|
144
|
+
const tokens = [];
|
|
145
|
+
while(i < html.length){
|
|
146
|
+
const curr = html[i];
|
|
147
|
+
if (curr === '<') {
|
|
148
|
+
if (html.startsWith('<!--', i)) {
|
|
149
|
+
const k = html.indexOf('-->', i + 4);
|
|
150
|
+
if (k === -1) break;
|
|
151
|
+
if (j < i) {
|
|
152
|
+
tokens.push(html.slice(j, i));
|
|
153
|
+
}
|
|
154
|
+
tokens.push(html.slice(i, k + 3));
|
|
155
|
+
i = j = k + 3;
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
if (j < i) {
|
|
159
|
+
tokens.push(html.slice(j, i));
|
|
160
|
+
j = i;
|
|
161
|
+
}
|
|
162
|
+
const k = findTagEnd(html, i);
|
|
163
|
+
if (k === -1) {
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
tokens.push(html.slice(i, k + 1));
|
|
167
|
+
const tagName = getStartTagName(html.slice(i, k + 1));
|
|
168
|
+
if (tagName && RAW_TEXT_TAGS.indexOf(tagName.toLowerCase()) > -1) {
|
|
169
|
+
const closeTagStart = html.toLowerCase().indexOf(`</${tagName.toLowerCase()}`, k + 1);
|
|
170
|
+
if (closeTagStart > -1) {
|
|
171
|
+
const closeTagEnd = findTagEnd(html, closeTagStart);
|
|
172
|
+
if (closeTagEnd > -1) {
|
|
173
|
+
if (k + 1 < closeTagStart) {
|
|
174
|
+
tokens.push(html.slice(k + 1, closeTagStart));
|
|
175
|
+
}
|
|
176
|
+
tokens.push(html.slice(closeTagStart, closeTagEnd + 1));
|
|
177
|
+
i = j = closeTagEnd + 1;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
i = j = k + 1;
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
i++;
|
|
186
|
+
}
|
|
187
|
+
if (j < html.length) {
|
|
188
|
+
tokens.push(html.slice(j));
|
|
189
|
+
}
|
|
190
|
+
return tokens;
|
|
191
|
+
}
|
|
192
|
+
function tokenize(html) {
|
|
193
|
+
return splitTokens(html).map((s)=>s.replace(/^\n+$/g, '')).map((s)=>s.trim()).filter(Boolean).map(makeToken).filter(Boolean);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function isEmpty(stack) {
|
|
197
|
+
return stack.length === 0;
|
|
198
|
+
}
|
|
199
|
+
function getTop(stack) {
|
|
200
|
+
return stack[stack.length - 1];
|
|
201
|
+
}
|
|
202
|
+
function appendChild(node, child) {
|
|
203
|
+
if (!node.children) {
|
|
204
|
+
node.children = [];
|
|
205
|
+
}
|
|
206
|
+
node.children.push(filterProps(child));
|
|
207
|
+
}
|
|
208
|
+
function filterProps(node) {
|
|
209
|
+
if (typeof node === 'string') {
|
|
210
|
+
return node;
|
|
211
|
+
}
|
|
212
|
+
return [
|
|
213
|
+
'name',
|
|
214
|
+
'children',
|
|
215
|
+
'attributes'
|
|
216
|
+
].reduce((r, c)=>Object.assign({}, r, {
|
|
217
|
+
[c]: node[c]
|
|
218
|
+
}), {});
|
|
219
|
+
}
|
|
220
|
+
function parse(src) {
|
|
221
|
+
const tokens = tokenize(src);
|
|
222
|
+
const stack = [];
|
|
223
|
+
const tree = {
|
|
224
|
+
type: 'root',
|
|
225
|
+
children: []
|
|
226
|
+
};
|
|
227
|
+
stack.push(tree);
|
|
228
|
+
while(!isEmpty(stack) && !isEmpty(tokens)){
|
|
229
|
+
const curr = tokens.shift();
|
|
230
|
+
const top = getTop(stack);
|
|
231
|
+
if (curr.type === 'string') {
|
|
232
|
+
appendChild(top, curr.value);
|
|
233
|
+
} else if (utils.isPair(top, curr)) {
|
|
234
|
+
const node = stack.pop();
|
|
235
|
+
if (!isEmpty(stack)) {
|
|
236
|
+
appendChild(getTop(stack), node);
|
|
237
|
+
}
|
|
238
|
+
} else if (curr.type === 'self-close') {
|
|
239
|
+
appendChild(top, curr);
|
|
240
|
+
} else if (curr.type === 'start') {
|
|
241
|
+
stack.push(curr);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
while(stack.length > 1){
|
|
245
|
+
const node = stack.pop();
|
|
246
|
+
appendChild(getTop(stack), node);
|
|
247
|
+
}
|
|
248
|
+
return tree.children;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function html2any(html, rule) {
|
|
252
|
+
return transform(parse(html)[0], rule);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
export { html2any as default, parse, transform };
|
package/package.json
CHANGED
|
@@ -1,28 +1,50 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "html2any",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "
|
|
5
|
-
"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"description": "understand html with flexible transform",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"html",
|
|
7
|
+
"parser",
|
|
8
|
+
"transform",
|
|
9
|
+
"html-transform",
|
|
10
|
+
"html-parser"
|
|
11
|
+
],
|
|
12
|
+
"homepage": "https://github.com/huozhi/html2any#readme",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/huozhi/html2any/issues"
|
|
11
15
|
},
|
|
12
|
-
"
|
|
13
|
-
"
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "git+https://github.com/huozhi/html2any.git"
|
|
19
|
+
},
|
|
20
|
+
"type": "module",
|
|
21
|
+
"main": "dist/index.js",
|
|
22
|
+
"scripts": {
|
|
23
|
+
"test": "bun test",
|
|
24
|
+
"build": "bunchee",
|
|
25
|
+
"prepublishOnly": "bun run build",
|
|
26
|
+
"web:dev": "next dev web",
|
|
27
|
+
"web:build": "next build web",
|
|
28
|
+
"web:check-load": "node web/scripts/check-load.mjs",
|
|
29
|
+
"web:start": "next start web"
|
|
14
30
|
},
|
|
15
31
|
"files": [
|
|
16
32
|
"*.md",
|
|
17
|
-
"
|
|
33
|
+
"dist"
|
|
18
34
|
],
|
|
19
|
-
"author": "huozhi
|
|
20
|
-
"license": "
|
|
35
|
+
"author": "huozhi",
|
|
36
|
+
"license": "MIT",
|
|
21
37
|
"devDependencies": {
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"react": "^
|
|
26
|
-
"
|
|
38
|
+
"bunchee": "^6.10.0",
|
|
39
|
+
"react": "^19.2.6",
|
|
40
|
+
"next": "^16.2.6",
|
|
41
|
+
"react-dom": "^19.2.6",
|
|
42
|
+
"tailwindcss": "^4.3.0",
|
|
43
|
+
"@tailwindcss/postcss": "^4.3.0",
|
|
44
|
+
"sugar-high": "^1.2.0",
|
|
45
|
+
"typescript": "^6.0.3",
|
|
46
|
+
"@types/node": "^25.9.1",
|
|
47
|
+
"@types/react": "^19.2.15",
|
|
48
|
+
"@types/react-dom": "^19.2.3"
|
|
27
49
|
}
|
|
28
50
|
}
|
package/lib/index.js
DELETED
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
function _defineProperty(obj, key, value) {
|
|
2
|
-
if (key in obj) {
|
|
3
|
-
Object.defineProperty(obj, key, {
|
|
4
|
-
value: value,
|
|
5
|
-
enumerable: true,
|
|
6
|
-
configurable: true,
|
|
7
|
-
writable: true
|
|
8
|
-
});
|
|
9
|
-
} else {
|
|
10
|
-
obj[key] = value;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
return obj;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
var voidElementTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
|
|
17
|
-
|
|
18
|
-
function isSelfClose(tagName) {
|
|
19
|
-
return voidElementTags.indexOf(tagName) > -1;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function isPair(tagX, tagY) {
|
|
23
|
-
return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
var utils = {
|
|
27
|
-
isPair: isPair,
|
|
28
|
-
isSelfClose: isSelfClose
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
var ATTR_FIND = /((^\w|\s+)[a-zA-Z-:]+)(="[^"]+"|\s+|\s*$)?/;
|
|
32
|
-
|
|
33
|
-
function extraAttrs(str) {
|
|
34
|
-
var i = 0;
|
|
35
|
-
var attrs = {};
|
|
36
|
-
|
|
37
|
-
while (i < str.length) {
|
|
38
|
-
var suffix = str.slice(i);
|
|
39
|
-
var match = ATTR_FIND.exec(suffix);
|
|
40
|
-
|
|
41
|
-
if (!match || !match[1]) {
|
|
42
|
-
break;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
var result = match[0];
|
|
46
|
-
var key = match[1];
|
|
47
|
-
var value = match[3];
|
|
48
|
-
key = key.trim();
|
|
49
|
-
value = value && value.trim();
|
|
50
|
-
attrs[key] = value && value[0] === '=' ? value.slice(2, -1) : true;
|
|
51
|
-
i += result.length;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return attrs;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function makeToken(tag) {
|
|
58
|
-
var isTag = tag[0] === '<' && tag[tag.length - 1] === '>';
|
|
59
|
-
|
|
60
|
-
if (!isTag) {
|
|
61
|
-
return {
|
|
62
|
-
type: 'string',
|
|
63
|
-
value: tag
|
|
64
|
-
};
|
|
65
|
-
} else if (tag.startsWith('</')) {
|
|
66
|
-
return {
|
|
67
|
-
type: 'end',
|
|
68
|
-
name: tag.slice(2, -1)
|
|
69
|
-
};
|
|
70
|
-
} else {
|
|
71
|
-
var match = tag.match(/<([\w+:?\w*]+)\s*([^>]*)/);
|
|
72
|
-
var tagName = match[1];
|
|
73
|
-
var tagBody = match[2];
|
|
74
|
-
return {
|
|
75
|
-
type: utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/' ? 'self-close' : 'start',
|
|
76
|
-
name: tagName,
|
|
77
|
-
attributes: extraAttrs(tagBody)
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function splitTokens(html) {
|
|
83
|
-
var i = 0;
|
|
84
|
-
var j = 0;
|
|
85
|
-
var tokens = [];
|
|
86
|
-
|
|
87
|
-
while (i < html.length) {
|
|
88
|
-
var curr = html[i];
|
|
89
|
-
|
|
90
|
-
if (curr === '<') {
|
|
91
|
-
if (j < i) {
|
|
92
|
-
tokens.push(html.slice(j, i));
|
|
93
|
-
j = i;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
var k = i;
|
|
97
|
-
|
|
98
|
-
while (html[k] !== '>') {
|
|
99
|
-
k++;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
tokens.push(html.slice(i, k + 1));
|
|
103
|
-
i = j = k + 1;
|
|
104
|
-
continue;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
i++;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
return tokens;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function tokenize(html) {
|
|
114
|
-
return splitTokens(html).map(function (s) {
|
|
115
|
-
return s.replace(/^\n|\n$/g, '');
|
|
116
|
-
}).map(function (s) {
|
|
117
|
-
return s.trim();
|
|
118
|
-
}).filter(Boolean).map(makeToken);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
function isEmpty(stack) {
|
|
122
|
-
return stack.length === 0;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function getTop(stack) {
|
|
126
|
-
return stack[stack.length - 1];
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function appendChild(node, child) {
|
|
130
|
-
if (!node.children) {
|
|
131
|
-
node.children = [];
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
node.children.push(filterProps(child));
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
function filterProps(node) {
|
|
138
|
-
if (typeof node === 'string') {
|
|
139
|
-
return node;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return ['name', 'children', 'attributes'].reduce(function (r, c) {
|
|
143
|
-
return Object.assign({}, r, _defineProperty({}, c, node[c]));
|
|
144
|
-
}, {});
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
function parse(src) {
|
|
148
|
-
var tokens = tokenize(src);
|
|
149
|
-
var stack = [];
|
|
150
|
-
var tree = {
|
|
151
|
-
type: 'root',
|
|
152
|
-
children: []
|
|
153
|
-
};
|
|
154
|
-
stack.push(tree);
|
|
155
|
-
|
|
156
|
-
while (!isEmpty(stack) && !isEmpty(tokens)) {
|
|
157
|
-
var curr = tokens.shift();
|
|
158
|
-
var top = getTop(stack);
|
|
159
|
-
|
|
160
|
-
if (curr.type === 'string') {
|
|
161
|
-
appendChild(top, curr.value);
|
|
162
|
-
} else if (utils.isPair(top, curr)) {
|
|
163
|
-
var node = stack.pop();
|
|
164
|
-
|
|
165
|
-
if (!isEmpty(stack)) {
|
|
166
|
-
appendChild(getTop(stack), node);
|
|
167
|
-
}
|
|
168
|
-
} else if (curr.type === 'self-close') {
|
|
169
|
-
appendChild(top, curr);
|
|
170
|
-
} else if (curr.type === 'start') {
|
|
171
|
-
stack.push(curr);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
return tree.children;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
function transform(ast, rule) {
|
|
179
|
-
function next(node) {
|
|
180
|
-
if (node) {
|
|
181
|
-
if (typeof node === 'string') {
|
|
182
|
-
return rule(node);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
if (Array.isArray(node)) {
|
|
186
|
-
return node.map(function (n, index) {
|
|
187
|
-
if (typeof n !== 'string') {
|
|
188
|
-
n.index = index; // critical array element index
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
return rule(n, next(n.children));
|
|
192
|
-
});
|
|
193
|
-
} else {
|
|
194
|
-
return rule(node, next(node.children));
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return null;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
return next(ast);
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
var html2any = function html2any(html, rule) {
|
|
205
|
-
return transform(parse(html)[0], rule);
|
|
206
|
-
};
|
|
207
|
-
|
|
208
|
-
exports.default = html2any;
|
|
209
|
-
exports.parse = parse;
|
|
210
|
-
exports.transform = transform;
|
package/lib/index.mjs
DELETED
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
function _defineProperty(obj, key, value) {
|
|
2
|
-
if (key in obj) {
|
|
3
|
-
Object.defineProperty(obj, key, {
|
|
4
|
-
value: value,
|
|
5
|
-
enumerable: true,
|
|
6
|
-
configurable: true,
|
|
7
|
-
writable: true
|
|
8
|
-
});
|
|
9
|
-
} else {
|
|
10
|
-
obj[key] = value;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
return obj;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
var voidElementTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
|
|
17
|
-
|
|
18
|
-
function isSelfClose(tagName) {
|
|
19
|
-
return voidElementTags.indexOf(tagName) > -1;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function isPair(tagX, tagY) {
|
|
23
|
-
return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
var utils = {
|
|
27
|
-
isPair: isPair,
|
|
28
|
-
isSelfClose: isSelfClose
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
var ATTR_FIND = /((^\w|\s+)[a-zA-Z-:]+)(="[^"]+"|\s+|\s*$)?/;
|
|
32
|
-
|
|
33
|
-
function extraAttrs(str) {
|
|
34
|
-
var i = 0;
|
|
35
|
-
var attrs = {};
|
|
36
|
-
|
|
37
|
-
while (i < str.length) {
|
|
38
|
-
var suffix = str.slice(i);
|
|
39
|
-
var match = ATTR_FIND.exec(suffix);
|
|
40
|
-
|
|
41
|
-
if (!match || !match[1]) {
|
|
42
|
-
break;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
var result = match[0];
|
|
46
|
-
var key = match[1];
|
|
47
|
-
var value = match[3];
|
|
48
|
-
key = key.trim();
|
|
49
|
-
value = value && value.trim();
|
|
50
|
-
attrs[key] = value && value[0] === '=' ? value.slice(2, -1) : true;
|
|
51
|
-
i += result.length;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return attrs;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function makeToken(tag) {
|
|
58
|
-
var isTag = tag[0] === '<' && tag[tag.length - 1] === '>';
|
|
59
|
-
|
|
60
|
-
if (!isTag) {
|
|
61
|
-
return {
|
|
62
|
-
type: 'string',
|
|
63
|
-
value: tag
|
|
64
|
-
};
|
|
65
|
-
} else if (tag.startsWith('</')) {
|
|
66
|
-
return {
|
|
67
|
-
type: 'end',
|
|
68
|
-
name: tag.slice(2, -1)
|
|
69
|
-
};
|
|
70
|
-
} else {
|
|
71
|
-
var match = tag.match(/<([\w+:?\w*]+)\s*([^>]*)/);
|
|
72
|
-
var tagName = match[1];
|
|
73
|
-
var tagBody = match[2];
|
|
74
|
-
return {
|
|
75
|
-
type: utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/' ? 'self-close' : 'start',
|
|
76
|
-
name: tagName,
|
|
77
|
-
attributes: extraAttrs(tagBody)
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function splitTokens(html) {
|
|
83
|
-
var i = 0;
|
|
84
|
-
var j = 0;
|
|
85
|
-
var tokens = [];
|
|
86
|
-
|
|
87
|
-
while (i < html.length) {
|
|
88
|
-
var curr = html[i];
|
|
89
|
-
|
|
90
|
-
if (curr === '<') {
|
|
91
|
-
if (j < i) {
|
|
92
|
-
tokens.push(html.slice(j, i));
|
|
93
|
-
j = i;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
var k = i;
|
|
97
|
-
|
|
98
|
-
while (html[k] !== '>') {
|
|
99
|
-
k++;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
tokens.push(html.slice(i, k + 1));
|
|
103
|
-
i = j = k + 1;
|
|
104
|
-
continue;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
i++;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
return tokens;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function tokenize(html) {
|
|
114
|
-
return splitTokens(html).map(function (s) {
|
|
115
|
-
return s.replace(/^\n|\n$/g, '');
|
|
116
|
-
}).map(function (s) {
|
|
117
|
-
return s.trim();
|
|
118
|
-
}).filter(Boolean).map(makeToken);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
function isEmpty(stack) {
|
|
122
|
-
return stack.length === 0;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function getTop(stack) {
|
|
126
|
-
return stack[stack.length - 1];
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function appendChild(node, child) {
|
|
130
|
-
if (!node.children) {
|
|
131
|
-
node.children = [];
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
node.children.push(filterProps(child));
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
function filterProps(node) {
|
|
138
|
-
if (typeof node === 'string') {
|
|
139
|
-
return node;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return ['name', 'children', 'attributes'].reduce(function (r, c) {
|
|
143
|
-
return Object.assign({}, r, _defineProperty({}, c, node[c]));
|
|
144
|
-
}, {});
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
function parse(src) {
|
|
148
|
-
var tokens = tokenize(src);
|
|
149
|
-
var stack = [];
|
|
150
|
-
var tree = {
|
|
151
|
-
type: 'root',
|
|
152
|
-
children: []
|
|
153
|
-
};
|
|
154
|
-
stack.push(tree);
|
|
155
|
-
|
|
156
|
-
while (!isEmpty(stack) && !isEmpty(tokens)) {
|
|
157
|
-
var curr = tokens.shift();
|
|
158
|
-
var top = getTop(stack);
|
|
159
|
-
|
|
160
|
-
if (curr.type === 'string') {
|
|
161
|
-
appendChild(top, curr.value);
|
|
162
|
-
} else if (utils.isPair(top, curr)) {
|
|
163
|
-
var node = stack.pop();
|
|
164
|
-
|
|
165
|
-
if (!isEmpty(stack)) {
|
|
166
|
-
appendChild(getTop(stack), node);
|
|
167
|
-
}
|
|
168
|
-
} else if (curr.type === 'self-close') {
|
|
169
|
-
appendChild(top, curr);
|
|
170
|
-
} else if (curr.type === 'start') {
|
|
171
|
-
stack.push(curr);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
return tree.children;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
function transform(ast, rule) {
|
|
179
|
-
function next(node) {
|
|
180
|
-
if (node) {
|
|
181
|
-
if (typeof node === 'string') {
|
|
182
|
-
return rule(node);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
if (Array.isArray(node)) {
|
|
186
|
-
return node.map(function (n, index) {
|
|
187
|
-
if (typeof n !== 'string') {
|
|
188
|
-
n.index = index; // critical array element index
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
return rule(n, next(n.children));
|
|
192
|
-
});
|
|
193
|
-
} else {
|
|
194
|
-
return rule(node, next(node.children));
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return null;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
return next(ast);
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
var html2any = function html2any(html, rule) {
|
|
205
|
-
return transform(parse(html)[0], rule);
|
|
206
|
-
};
|
|
207
|
-
|
|
208
|
-
export default html2any;
|
|
209
|
-
export { parse, transform };
|