html2any 0.0.3 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -38
- package/lib/index.js +196 -9
- package/lib/index.js.map +1 -0
- package/lib/index.mjs +196 -0
- package/lib/index.mjs.map +1 -0
- package/package.json +19 -18
- package/lib/parser.js +0 -59
- package/lib/tokenizer.js +0 -84
- package/lib/transform.js +0 -25
- package/lib/utils.js +0 -14
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://badge.fury.io/js/html2any)
|
|
4
4
|
|
|
5
|
-
> A non-dependecy package for coverting html string to your customized format/
|
|
5
|
+
> A non-dependecy package for coverting html/xml string to your customized format/structures.
|
|
6
6
|
|
|
7
7
|
While building websites, people may met issues for rendering rich text into different formats.
|
|
8
8
|
For example, I've got an `<video>` tag, but I wanna render it with my own React video component.
|
|
@@ -10,62 +10,59 @@ But I also want to render the whole html easily rather than parse it manually.
|
|
|
10
10
|
|
|
11
11
|
Now `html2any` help you to render html string. It not only parses your html but also gives you ability to transform it from origin to the dest.
|
|
12
12
|
|
|
13
|
+
### API
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
html2any provide following APIs
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
### Demo
|
|
21
|
-
|
|
22
|
-
- [Web React Online Demo](https://huozhi.github.io/html2any-web-demo/)
|
|
23
|
-
- React Native demo: https://github.com/huozhi/html2any-rn-demo
|
|
24
|
-
|
|
25
|
-
### Documentation
|
|
26
|
-
|
|
27
|
-
html2any provide 3 APIs below:
|
|
28
|
-
|
|
29
|
-
- API
|
|
30
|
-
- `Array[String] tokenizer(String html)`
|
|
31
|
-
- `AST(Object) parse(Array[String] tokens)`
|
|
32
|
-
- `transform(AST ast, function rule)`
|
|
33
|
-
|
|
34
|
-
- tokenizer
|
|
35
|
-
> Give you ability to parse raw html string to tokens
|
|
17
|
+
- `AST(Object) parse(String source)`
|
|
18
|
+
- `void transform(AST ast, function rule)`
|
|
19
|
+
- `void html2any(html, function rule)`
|
|
36
20
|
|
|
37
21
|
- parse
|
|
38
|
-
> Build the AST from
|
|
22
|
+
> Build the AST from source to AST from source html/xml code
|
|
39
23
|
|
|
40
24
|
- transform
|
|
41
25
|
> Convert the AST to the final form with the specific rule.
|
|
42
26
|
|
|
43
|
-
|
|
27
|
+
- html2any
|
|
28
|
+
> Convert the html/xml to the final form directly.
|
|
44
29
|
|
|
45
|
-
|
|
30
|
+
### Usage
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
npm i -S html2any
|
|
34
|
+
```
|
|
46
35
|
|
|
47
36
|
```js
|
|
48
|
-
import
|
|
37
|
+
import html2any, { parse, transform } from 'html2any'
|
|
38
|
+
|
|
39
|
+
const html = escapeHTMLEntity(`<div>123</div>`)
|
|
49
40
|
|
|
50
|
-
const
|
|
51
|
-
<div>123</div>
|
|
52
|
-
`
|
|
41
|
+
const ast = parse(html)[0]
|
|
53
42
|
|
|
54
|
-
const ast = parse(tokenizer(html))[0]
|
|
55
43
|
function rule(node, children) {
|
|
56
|
-
if (node
|
|
57
|
-
return
|
|
44
|
+
if (typeof node === 'string') {
|
|
45
|
+
return node
|
|
58
46
|
} else {
|
|
59
|
-
return node
|
|
47
|
+
return <div>{node}</div>
|
|
60
48
|
}
|
|
61
49
|
}
|
|
62
50
|
|
|
63
|
-
const
|
|
64
|
-
//
|
|
51
|
+
const vdom = transform(ast, rule)
|
|
52
|
+
// JSX vdom form of html
|
|
65
53
|
// { type: 'div', props: {...}, children: '...' }
|
|
54
|
+
|
|
66
55
|
```
|
|
67
56
|
|
|
68
|
-
|
|
57
|
+
Or you can just call html2any directly
|
|
58
|
+
|
|
59
|
+
```js
|
|
60
|
+
const vdom = html2any(html, rule)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### How It Works
|
|
64
|
+
|
|
65
|
+
Use `html2any` to construct an AST of html string, then convert each node recursively with `rule` passed to transform function.
|
|
66
|
+
|
|
67
|
+
For example, we translate `<p>` tag into React Native component `<Text style={styles.paragraph}>` with the prepared styles. Then decode the p tag' s content to avoid html entities mess up.
|
|
69
68
|
|
|
70
|
-
Any suggestions or beautifier API you expect, just post issue [here](https://github.com/huozhi/html2any/issues).
|
|
71
|
-
Reporting bugs is welcomed. : )
|
package/lib/index.js
CHANGED
|
@@ -1,11 +1,198 @@
|
|
|
1
|
-
|
|
1
|
+
function transform(ast, rule) {
|
|
2
|
+
function next(node) {
|
|
3
|
+
if (node) {
|
|
4
|
+
if (typeof node === 'string') {
|
|
5
|
+
return rule(node);
|
|
6
|
+
}
|
|
2
7
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
8
|
+
if (Array.isArray(node)) {
|
|
9
|
+
return node.map(function (n, index) {
|
|
10
|
+
if (typeof n !== 'string') {
|
|
11
|
+
n.index = index; // critical array element index
|
|
12
|
+
}
|
|
6
13
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
}
|
|
14
|
+
return rule(n, next(n.children));
|
|
15
|
+
});
|
|
16
|
+
} else {
|
|
17
|
+
return rule(node, next(node.children));
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return next(ast);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
var voidElementTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
|
|
28
|
+
|
|
29
|
+
function isSelfClose(tagName) {
|
|
30
|
+
return voidElementTags.indexOf(tagName) > -1;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function isPair(tagX, tagY) {
|
|
34
|
+
return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
var utils = {
|
|
38
|
+
isPair: isPair,
|
|
39
|
+
isSelfClose: isSelfClose
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
var ATTR_FIND = /((^\w|\s+)[a-zA-Z-:]+)(="[^"]+"|\s+|\s*$)?/;
|
|
43
|
+
|
|
44
|
+
function extraAttrs(str) {
|
|
45
|
+
var i = 0;
|
|
46
|
+
var attrs = {};
|
|
47
|
+
|
|
48
|
+
while (i < str.length) {
|
|
49
|
+
var suffix = str.slice(i);
|
|
50
|
+
var match = ATTR_FIND.exec(suffix);
|
|
51
|
+
|
|
52
|
+
if (!match || !match[1]) {
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
var result = match[0];
|
|
57
|
+
var key = match[1];
|
|
58
|
+
var value = match[3];
|
|
59
|
+
key = key.trim();
|
|
60
|
+
value = value && value.trim();
|
|
61
|
+
attrs[key] = value && value[0] === '=' ? value.slice(2, -1) : true;
|
|
62
|
+
i += result.length;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return attrs;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function makeToken(tag) {
|
|
69
|
+
var isTag = tag[0] === '<' && tag[tag.length - 1] === '>';
|
|
70
|
+
|
|
71
|
+
if (!isTag) {
|
|
72
|
+
return {
|
|
73
|
+
type: 'string',
|
|
74
|
+
value: tag
|
|
75
|
+
};
|
|
76
|
+
} else if (tag.startsWith('</')) {
|
|
77
|
+
return {
|
|
78
|
+
type: 'end',
|
|
79
|
+
name: tag.slice(2, -1)
|
|
80
|
+
};
|
|
81
|
+
} else {
|
|
82
|
+
var match = tag.match(/<([\w+:?\w*]+)\s*([^>]*)/);
|
|
83
|
+
var tagName = match[1];
|
|
84
|
+
var tagBody = match[2];
|
|
85
|
+
return {
|
|
86
|
+
type: utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/' ? 'self-close' : 'start',
|
|
87
|
+
name: tagName,
|
|
88
|
+
attributes: extraAttrs(tagBody)
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function splitTokens(html) {
|
|
94
|
+
var i = 0;
|
|
95
|
+
var j = 0;
|
|
96
|
+
var tokens = [];
|
|
97
|
+
|
|
98
|
+
while (i < html.length) {
|
|
99
|
+
var curr = html[i];
|
|
100
|
+
|
|
101
|
+
if (curr === '<') {
|
|
102
|
+
if (j < i) {
|
|
103
|
+
tokens.push(html.slice(j, i));
|
|
104
|
+
j = i;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
var k = i;
|
|
108
|
+
|
|
109
|
+
while (html[k] !== '>') {
|
|
110
|
+
k++;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
tokens.push(html.slice(i, k + 1));
|
|
114
|
+
i = j = k + 1;
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return tokens;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function tokenize(html) {
|
|
125
|
+
return splitTokens(html).map(function (s) {
|
|
126
|
+
return s.replace(/^\n+$/g, '');
|
|
127
|
+
}).map(function (s) {
|
|
128
|
+
return s.trim();
|
|
129
|
+
}).filter(Boolean).map(makeToken);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function isEmpty(stack) {
|
|
133
|
+
return stack.length === 0;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function getTop(stack) {
|
|
137
|
+
return stack[stack.length - 1];
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function appendChild(node, child) {
|
|
141
|
+
if (!node.children) {
|
|
142
|
+
node.children = [];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
node.children.push(filterProps(child));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function filterProps(node) {
|
|
149
|
+
if (typeof node === 'string') {
|
|
150
|
+
return node;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return ['name', 'children', 'attributes'].reduce(function (r, c) {
|
|
154
|
+
var _Object$assign;
|
|
155
|
+
|
|
156
|
+
return Object.assign({}, r, (_Object$assign = {}, _Object$assign[c] = node[c], _Object$assign));
|
|
157
|
+
}, {});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function parse(src) {
|
|
161
|
+
var tokens = tokenize(src);
|
|
162
|
+
var stack = [];
|
|
163
|
+
var tree = {
|
|
164
|
+
type: 'root',
|
|
165
|
+
children: []
|
|
166
|
+
};
|
|
167
|
+
stack.push(tree);
|
|
168
|
+
|
|
169
|
+
while (!isEmpty(stack) && !isEmpty(tokens)) {
|
|
170
|
+
var curr = tokens.shift();
|
|
171
|
+
var top = getTop(stack);
|
|
172
|
+
|
|
173
|
+
if (curr.type === 'string') {
|
|
174
|
+
appendChild(top, curr.value);
|
|
175
|
+
} else if (utils.isPair(top, curr)) {
|
|
176
|
+
var node = stack.pop();
|
|
177
|
+
|
|
178
|
+
if (!isEmpty(stack)) {
|
|
179
|
+
appendChild(getTop(stack), node);
|
|
180
|
+
}
|
|
181
|
+
} else if (curr.type === 'self-close') {
|
|
182
|
+
appendChild(top, curr);
|
|
183
|
+
} else if (curr.type === 'start') {
|
|
184
|
+
stack.push(curr);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return tree.children;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function html2any(html, rule) {
|
|
192
|
+
return transform(parse(html)[0], rule);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
exports['default'] = html2any;
|
|
196
|
+
exports.parse = parse;
|
|
197
|
+
exports.transform = transform;
|
|
198
|
+
//# sourceMappingURL=index.js.map
|
package/lib/index.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sources":["../src/transform.js","../src/utils.js","../src/tokenize.js","../src/parse.js","../src/index.js"],"sourcesContent":["function transform(ast, rule) {\n function next(node) {\n if (node) {\n if (typeof node === 'string') {\n return rule(node)\n }\n if (Array.isArray(node)) {\n return node.map((n, index) => {\n if (typeof n !== 'string') {\n n.index = index // critical array element index\n }\n return rule(n, next(n.children))\n })\n } else {\n return rule(node, next(node.children))\n }\n }\n return null\n }\n return next(ast)\n}\n\nexport default transform\n","const voidElementTags = [\n 'area',\n 'base',\n 'br',\n 'col',\n 'embed',\n 'hr',\n 'img',\n 'input',\n 'link',\n 'meta',\n 'param',\n 'source',\n 'track',\n 'wbr',\n]\n\nfunction isSelfClose(tagName) {\n return voidElementTags.indexOf(tagName) > -1\n}\n\nfunction isPair(tagX, tagY) {\n return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end'\n}\n\nexport default { isPair, isSelfClose }\n","import utils from './utils'\n\n// assuming that quato always following equation - `=\"\"`\nconst ATTR_FIND = /((^\\w|\\s+)[a-zA-Z-:]+)(=\"[^\"]+\"|\\s+|\\s*$)?/\n\nfunction extraAttrs(str) {\n let i = 0\n const attrs = {}\n while (i < str.length) {\n const suffix = str.slice(i)\n const match = ATTR_FIND.exec(suffix)\n if (!match || !match[1]) {\n break\n }\n const result = match[0]\n let key = match[1]\n let value = match[3]\n\n key = key.trim()\n value = value && value.trim()\n\n attrs[key] = (value && value[0] === '=') ? value.slice(2, -1) : true\n i += result.length\n }\n return attrs\n}\n\nfunction makeToken(tag) {\n const isTag = tag[0] === '<' && tag[tag.length - 1] === '>'\n\n if (!isTag) {\n return {\n type: 'string',\n value: tag,\n }\n } else if (tag.startsWith('</')) {\n return {\n type: 'end',\n name: tag.slice(2, -1),\n }\n } else {\n const match = tag.match(/<([\\w+:?\\w*]+)\\s*([^>]*)/)\n const tagName = match[1]\n const tagBody = match[2]\n return {\n type: (utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/') ? 'self-close' : 'start',\n name: tagName,\n attributes: extraAttrs(tagBody),\n }\n }\n}\n\nfunction splitTokens(html) {\n let i = 0\n let j = 0\n const tokens = []\n while (i < html.length) {\n const curr = html[i]\n if (curr === '<') {\n if (j < i) {\n tokens.push(html.slice(j, i))\n j = i\n }\n let k = i\n while (html[k] !== '>') k++\n tokens.push(html.slice(i, k + 1))\n i = j = k + 1\n continue\n }\n i++\n }\n return tokens\n}\n\nfunction tokenize(html) {\n return splitTokens(html)\n .map(s => s.replace(/^\\n+$/g, ''))\n .map(s => s.trim())\n .filter(Boolean)\n .map(makeToken)\n}\n\nexport default tokenize\n","import tokenize from './tokenize'\nimport utils from './utils'\n\nfunction isEmpty(stack) {\n return stack.length === 0\n}\n\nfunction getTop(stack) {\n return stack[stack.length - 1]\n}\n\nfunction appendChild(node, child) {\n if (!node.children) {\n node.children = []\n }\n node.children.push(filterProps(child))\n}\n\nfunction filterProps(node) {\n if (typeof node === 'string') {\n return node\n }\n return ['name', 'children', 'attributes'].reduce(\n (r, c) => Object.assign({}, r, {[c]: node[c]}),\n {},\n )\n}\n\nfunction parse(src) {\n const tokens = tokenize(src)\n const stack = []\n const tree = {\n type: 'root',\n children: [],\n }\n\n stack.push(tree)\n while (!isEmpty(stack) && !isEmpty(tokens)) {\n const curr = tokens.shift()\n const top = getTop(stack)\n\n if (curr.type === 'string') {\n appendChild(top, curr.value)\n } else if (utils.isPair(top, curr)) {\n const node = stack.pop()\n if (!isEmpty(stack)) {\n appendChild(getTop(stack), node)\n }\n } else if (curr.type === 'self-close') {\n appendChild(top, curr)\n } else if (curr.type === 'start') {\n stack.push(curr)\n }\n }\n return tree.children\n}\n\nexport default parse\n","import transform from './transform'\nimport parse from './parse'\n\nfunction html2any(html, rule) {\n return transform(parse(html)[0], rule)\n}\n\nexport { parse, transform }\nexport default html2any\n"],"names":["transform","ast","rule","next","node","Array","isArray","map","n","index","children","voidElementTags","isSelfClose","tagName","indexOf","isPair","tagX","tagY","name","type","ATTR_FIND","extraAttrs","str","i","attrs","length","suffix","slice","match","exec","result","key","value","trim","makeToken","tag","isTag","startsWith","tagBody","utils","attributes","splitTokens","html","j","tokens","curr","push","k","tokenize","s","replace","filter","Boolean","isEmpty","stack","getTop","appendChild","child","filterProps","reduce","r","c","Object","assign","parse","src","tree","shift","top","pop","html2any"],"mappings":"AAAA,SAASA,SAAT,CAAmBC,GAAnB,EAAwBC,IAAxB,EAA8B;AAC5B,WAASC,IAAT,CAAcC,IAAd,EAAoB;AAClB,QAAIA,IAAJ,EAAU;AACR,UAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAC5B,eAAOF,IAAI,CAACE,IAAD,CAAX;AACD;;AACD,UAAIC,KAAK,CAACC,OAAN,CAAcF,IAAd,CAAJ,EAAyB;AACvB,eAAOA,IAAI,CAACG,GAAL,CAAS,UAACC,CAAD,EAAIC,KAAJ,EAAc;AAC5B,cAAI,OAAOD,CAAP,KAAa,QAAjB,EAA2B;AACzBA,YAAAA,CAAC,CAACC,KAAF,GAAUA,KAAV,CADyB;AAE1B;;AACD,iBAAOP,IAAI,CAACM,CAAD,EAAIL,IAAI,CAACK,CAAC,CAACE,QAAH,CAAR,CAAX;AACD,SALM,CAAP;AAMD,OAPD,MAOO;AACL,eAAOR,IAAI,CAACE,IAAD,EAAOD,IAAI,CAACC,IAAI,CAACM,QAAN,CAAX,CAAX;AACD;AACF;;AACD,WAAO,IAAP;AACD;;AACD,SAAOP,IAAI,CAACF,GAAD,CAAX;AACD;;ACpBD,IAAMU,eAAe,GAAG,CACtB,MADsB,EAEtB,MAFsB,EAGtB,IAHsB,EAItB,KAJsB,EAKtB,OALsB,EAMtB,IANsB,EAOtB,KAPsB,EAQtB,OARsB,EAStB,MATsB,EAUtB,MAVsB,EAWtB,OAXsB,EAYtB,QAZsB,EAatB,OAbsB,EActB,KAdsB,CAAxB;;AAiBA,SAASC,WAAT,CAAqBC,OAArB,EAA8B;AAC5B,SAAOF,eAAe,CAACG,OAAhB,CAAwBD,OAAxB,IAAmC,CAAC,CAA3C;AACD;;AAED,SAASE,MAAT,CAAgBC,IAAhB,EAAsBC,IAAtB,EAA4B;AAC1B,SAAOD,IAAI,CAACE,IAAL,KAAcD,IAAI,CAACC,IAAnB,IAA2BF,IAAI,CAACG,IAAL,KAAc,OAAzC,IAAoDF,IAAI,CAACE,IAAL,KAAc,KAAzE;AACD;;AAED,YAAe;AAAEJ,EAAAA,MAAM,EAANA,MAAF;AAAUH,EAAAA,WAAW,EAAXA;AAAV,CAAf;;ACtBA,IAAMQ,SAAS,GAAG,4CAAlB;;AAEA,SAASC,UAAT,CAAoBC,GAApB,EAAyB;AACvB,MAAIC,CAAC,GAAG,CAAR;AACA,MAAMC,KAAK,GAAG,EAAd;;AACA,SAAOD,CAAC,GAAGD,GAAG,CAACG,MAAf,EAAuB;AACrB,QAAMC,MAAM,GAAGJ,GAAG,CAACK,KAAJ,CAAUJ,CAAV,CAAf;AACA,QAAMK,KAAK,GAAGR,SAAS,CAACS,IAAV,CAAeH,MAAf,CAAd;;AACA,QAAI,CAACE,KAAD,IAAU,CAACA,KAAK,CAAC,CAAD,CAApB,EAAyB;AACvB;AACD;;AACD,QAAME,MAAM,GAAGF,KAAK,CAAC,CAAD,CAApB;AACA,QAAIG,GAAG,GAAGH,KAAK,CAAC,CAAD,CAAf;AACA,QAAII,KAAK,GAAGJ,KAAK,CAAC,CAAD,CAAjB;AAEAG,IAAAA,GAAG,GAAGA,GAAG,CAACE,IAAJ,EAAN;AACAD,IAAAA,KAAK,GAAGA,KAAK,IAAIA,KAAK,CAACC,IAAN,EAAjB;AAEAT,IAAAA,KAAK,CAACO,GAAD,CAAL,GAAcC,KAAK,IAAIA,KAAK,CAAC,CAAD,CAAL,KAAa,GAAvB,GAA8BA,KAAK,CAACL,KAAN,CAAY,CAAZ,EAAe,CAAC,CAAhB,CAA9B,GAAmD,IAAhE;AACAJ,IAAAA,CAAC,IAAIO,MAAM,CAACL,MAAZ;AACD;;AACD,SAAOD,KAAP;AACD;;AAED,SAASU,SAAT,CAAmBC,GAAnB,EAAwB;AACtB,MAAMC,KAAK,GAAGD,GAAG,CAAC,CAAD,CAAH,KAAW,GAAX,IAAkBA,GAAG,CAACA,GAAG,CAACV,MAAJ,GAAa,CAAd,CAAH,KAAwB,GAAxD;;AAEA,MAAI,CAACW,KAAL,EAAY;AACV,WAAO;AACLjB,MAAAA,IAAI,EAAE,QADD;AAELa,MAAAA,KAAK,EAAEG;AAFF,KAAP;AAID,GALD,MAKO,IAAIA,GAAG,CAACE,UAAJ,CAAe,IAAf,CAAJ,EAA0B;AAC/B,WAAO;AACLlB,MAAAA,IAAI,EAAE,KADD;AAELD,MAAAA,IAAI,EAAEiB,GAAG,CAACR,KAAJ,CAAU,CAAV,EAAa,CAAC,CAAd;AAFD,KAAP;AAID,GALM,MAKA;AACL,QAAMC,KAAK,GAAGO,GAAG,CAACP,KAAJ,CAAU,0BAAV,CAAd;AACA,QAAMf,OAAO,GAAGe,KAAK,CAAC,CAAD,CAArB;AACA,QAAMU,OAAO,GAAGV,KAAK,CAAC,CAAD,CAArB;AACA,WAAO;AACLT,MAAAA,IAAI,EAAGoB,KAAK,CAAC3B,WAAN,CAAkBC,OAAlB,KAA8ByB,OAAO,CAACA,OAAO,CAACb,MAAR,GAAiB,CAAlB,CAAP,KAAgC,GAA/D,GAAsE,YAAtE,GAAqF,OADtF;AAELP,MAAAA,IAAI,EAAEL,OAFD;AAGL2B,MAAAA,UAAU,EAAEnB,UAAU,CAACiB,OAAD;AAHjB,KAAP;AAKD;AACF;;AAED,SAASG,WAAT,CAAqBC,IAArB,EAA2B;AACzB,MAAInB,CAAC,GAAG,CAAR;AACA,MAAIoB,CAAC,GAAG,CAAR;AACA,MAAMC,MAAM,GAAG,EAAf;;AACA,SAAOrB,CAAC,GAAGmB,IAAI,CAACjB,MAAhB,EAAwB;AACtB,QAAMoB,IAAI,GAAGH,IAAI,CAACnB,CAAD,CAAjB;;AACA,QAAIsB,IAAI,KAAK,GAAb,EAAkB;AAChB,UAAIF,CAAC,GAAGpB,CAAR,EAAW;AACTqB,QAAAA,MAAM,CAACE,IAAP,CAAYJ,IAAI,CAACf,KAAL,CAAWgB,CAAX,EAAcpB,CAAd,CAAZ;AACAoB,QAAAA,CAAC,GAAGpB,CAAJ;AACD;;AACD,UAAIwB,CAAC,GAAGxB,CAAR;;AACA,aAAOmB,IAAI,CAACK,CAAD,CAAJ,KAAY,GAAnB;AAAwBA,QAAAA,CAAC;AAAzB;;AACAH,MAAAA,MAAM,CAACE,IAAP,CAAYJ,IAAI,CAACf,KAAL,CAAWJ,CAAX,EAAcwB,CAAC,GAAG,CAAlB,CAAZ;AACAxB,MAAAA,CAAC,GAAGoB,CAAC,GAAGI,CAAC,GAAG,CAAZ;AACA;AACD;;AACDxB,IAAAA,CAAC;AACF;;AACD,SAAOqB,MAAP;AACD;;AAED,SAASI,QAAT,CAAkBN,IAAlB,EAAwB;AACtB,SAAOD,WAAW,CAACC,IAAD,CAAX,CACJnC,GADI,CACA,UAAA0C,CAAC;AAAA,WAAIA,CAAC,CAACC,OAAF,CAAU,QAAV,EAAoB,EAApB,CAAJ;AAAA,GADD,EAEJ3C,GAFI,CAEA,UAAA0C,CAAC;AAAA,WAAIA,CAAC,CAAChB,IAAF,EAAJ;AAAA,GAFD,EAGJkB,MAHI,CAGGC,OAHH,EAIJ7C,GAJI,CAIA2B,SAJA,CAAP;AAKD;;AC7ED,SAASmB,OAAT,CAAiBC,KAAjB,EAAwB;AACtB,SAAOA,KAAK,CAAC7B,MAAN,KAAiB,CAAxB;AACD;;AAED,SAAS8B,MAAT,CAAgBD,KAAhB,EAAuB;AACrB,SAAOA,KAAK,CAACA,KAAK,CAAC7B,MAAN,GAAe,CAAhB,CAAZ;AACD;;AAED,SAAS+B,WAAT,CAAqBpD,IAArB,EAA2BqD,KAA3B,EAAkC;AAChC,MAAI,CAACrD,IAAI,CAACM,QAAV,EAAoB;AAClBN,IAAAA,IAAI,CAACM,QAAL,GAAgB,EAAhB;AACD;;AACDN,EAAAA,IAAI,CAACM,QAAL,CAAcoC,IAAd,CAAmBY,WAAW,CAACD,KAAD,CAA9B;AACD;;AAED,SAASC,WAAT,CAAqBtD,IAArB,EAA2B;AACzB,MAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAC5B,WAAOA,IAAP;AACD;;AACD,SAAO,CAAC,MAAD,EAAS,UAAT,EAAqB,YAArB,EAAmCuD,MAAnC,CACL,UAACC,CAAD,EAAIC,CAAJ;AAAA;;AAAA,WAAUC,MAAM,CAACC,MAAP,CAAc,EAAd,EAAkBH,CAAlB,uCAAuBC,CAAvB,IAA2BzD,IAAI,CAACyD,CAAD,CAA/B,kBAAV;AAAA,GADK,EAEL,EAFK,CAAP;AAID;;AAED,SAASG,KAAT,CAAeC,GAAf,EAAoB;AAClB,MAAMrB,MAAM,GAAGI,QAAQ,CAACiB,GAAD,CAAvB;AACA,MAAMX,KAAK,GAAG,EAAd;AACA,MAAMY,IAAI,GAAG;AACX/C,IAAAA,IAAI,EAAE,MADK;AAEXT,IAAAA,QAAQ,EAAE;AAFC,GAAb;AAKA4C,EAAAA,KAAK,CAACR,IAAN,CAAWoB,IAAX;;AACA,SAAO,CAACb,OAAO,CAACC,KAAD,CAAR,IAAmB,CAACD,OAAO,CAACT,MAAD,CAAlC,EAA4C;AAC1C,QAAMC,IAAI,GAAGD,MAAM,CAACuB,KAAP,EAAb;AACA,QAAMC,GAAG,GAAGb,MAAM,CAACD,KAAD,CAAlB;;AAEA,QAAIT,IAAI,CAAC1B,IAAL,KAAc,QAAlB,EAA4B;AAC1BqC,MAAAA,WAAW,CAACY,GAAD,EAAMvB,IAAI,CAACb,KAAX,CAAX;AACD,KAFD,MAEO,IAAIO,KAAK,CAACxB,MAAN,CAAaqD,GAAb,EAAkBvB,IAAlB,CAAJ,EAA6B;AAClC,UAAMzC,IAAI,GAAGkD,KAAK,CAACe,GAAN,EAAb;;AACA,UAAI,CAAChB,OAAO,CAACC,KAAD,CAAZ,EAAqB;AACnBE,QAAAA,WAAW,CAACD,MAAM,CAACD,KAAD,CAAP,EAAgBlD,IAAhB,CAAX;AACD;AACF,KALM,MAKA,IAAIyC,IAAI,CAAC1B,IAAL,KAAc,YAAlB,EAAgC;AACrCqC,MAAAA,WAAW,CAACY,GAAD,EAAMvB,IAAN,CAAX;AACD,KAFM,MAEA,IAAIA,IAAI,CAAC1B,IAAL,KAAc,OAAlB,EAA2B;AAChCmC,MAAAA,KAAK,CAACR,IAAN,CAAWD,IAAX;AACD;AACF;;AACD,SAAOqB,IAAI,CAACxD,QAAZ;AACD;;ACpDD,SAAS4D,QAAT,CAAkB5B,IAAlB,EAAwBxC,IAAxB,EAA8B;AAC5B,SAAOF,SAAS,CAACgE,KAAK,CAACtB,IAAD,CAAL,CAAY,CAAZ,CAAD,EAAiBxC,IAAjB,CAAhB;AACD;;;;;;"}
|
package/lib/index.mjs
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
function transform(ast, rule) {
|
|
2
|
+
function next(node) {
|
|
3
|
+
if (node) {
|
|
4
|
+
if (typeof node === 'string') {
|
|
5
|
+
return rule(node);
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
if (Array.isArray(node)) {
|
|
9
|
+
return node.map(function (n, index) {
|
|
10
|
+
if (typeof n !== 'string') {
|
|
11
|
+
n.index = index; // critical array element index
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
return rule(n, next(n.children));
|
|
15
|
+
});
|
|
16
|
+
} else {
|
|
17
|
+
return rule(node, next(node.children));
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return next(ast);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
var voidElementTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
|
|
28
|
+
|
|
29
|
+
function isSelfClose(tagName) {
|
|
30
|
+
return voidElementTags.indexOf(tagName) > -1;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function isPair(tagX, tagY) {
|
|
34
|
+
return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
var utils = {
|
|
38
|
+
isPair: isPair,
|
|
39
|
+
isSelfClose: isSelfClose
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
var ATTR_FIND = /((^\w|\s+)[a-zA-Z-:]+)(="[^"]+"|\s+|\s*$)?/;
|
|
43
|
+
|
|
44
|
+
function extraAttrs(str) {
|
|
45
|
+
var i = 0;
|
|
46
|
+
var attrs = {};
|
|
47
|
+
|
|
48
|
+
while (i < str.length) {
|
|
49
|
+
var suffix = str.slice(i);
|
|
50
|
+
var match = ATTR_FIND.exec(suffix);
|
|
51
|
+
|
|
52
|
+
if (!match || !match[1]) {
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
var result = match[0];
|
|
57
|
+
var key = match[1];
|
|
58
|
+
var value = match[3];
|
|
59
|
+
key = key.trim();
|
|
60
|
+
value = value && value.trim();
|
|
61
|
+
attrs[key] = value && value[0] === '=' ? value.slice(2, -1) : true;
|
|
62
|
+
i += result.length;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return attrs;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function makeToken(tag) {
|
|
69
|
+
var isTag = tag[0] === '<' && tag[tag.length - 1] === '>';
|
|
70
|
+
|
|
71
|
+
if (!isTag) {
|
|
72
|
+
return {
|
|
73
|
+
type: 'string',
|
|
74
|
+
value: tag
|
|
75
|
+
};
|
|
76
|
+
} else if (tag.startsWith('</')) {
|
|
77
|
+
return {
|
|
78
|
+
type: 'end',
|
|
79
|
+
name: tag.slice(2, -1)
|
|
80
|
+
};
|
|
81
|
+
} else {
|
|
82
|
+
var match = tag.match(/<([\w+:?\w*]+)\s*([^>]*)/);
|
|
83
|
+
var tagName = match[1];
|
|
84
|
+
var tagBody = match[2];
|
|
85
|
+
return {
|
|
86
|
+
type: utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/' ? 'self-close' : 'start',
|
|
87
|
+
name: tagName,
|
|
88
|
+
attributes: extraAttrs(tagBody)
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function splitTokens(html) {
|
|
94
|
+
var i = 0;
|
|
95
|
+
var j = 0;
|
|
96
|
+
var tokens = [];
|
|
97
|
+
|
|
98
|
+
while (i < html.length) {
|
|
99
|
+
var curr = html[i];
|
|
100
|
+
|
|
101
|
+
if (curr === '<') {
|
|
102
|
+
if (j < i) {
|
|
103
|
+
tokens.push(html.slice(j, i));
|
|
104
|
+
j = i;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
var k = i;
|
|
108
|
+
|
|
109
|
+
while (html[k] !== '>') {
|
|
110
|
+
k++;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
tokens.push(html.slice(i, k + 1));
|
|
114
|
+
i = j = k + 1;
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return tokens;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function tokenize(html) {
|
|
125
|
+
return splitTokens(html).map(function (s) {
|
|
126
|
+
return s.replace(/^\n+$/g, '');
|
|
127
|
+
}).map(function (s) {
|
|
128
|
+
return s.trim();
|
|
129
|
+
}).filter(Boolean).map(makeToken);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function isEmpty(stack) {
|
|
133
|
+
return stack.length === 0;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function getTop(stack) {
|
|
137
|
+
return stack[stack.length - 1];
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function appendChild(node, child) {
|
|
141
|
+
if (!node.children) {
|
|
142
|
+
node.children = [];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
node.children.push(filterProps(child));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function filterProps(node) {
|
|
149
|
+
if (typeof node === 'string') {
|
|
150
|
+
return node;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return ['name', 'children', 'attributes'].reduce(function (r, c) {
|
|
154
|
+
var _Object$assign;
|
|
155
|
+
|
|
156
|
+
return Object.assign({}, r, (_Object$assign = {}, _Object$assign[c] = node[c], _Object$assign));
|
|
157
|
+
}, {});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function parse(src) {
|
|
161
|
+
var tokens = tokenize(src);
|
|
162
|
+
var stack = [];
|
|
163
|
+
var tree = {
|
|
164
|
+
type: 'root',
|
|
165
|
+
children: []
|
|
166
|
+
};
|
|
167
|
+
stack.push(tree);
|
|
168
|
+
|
|
169
|
+
while (!isEmpty(stack) && !isEmpty(tokens)) {
|
|
170
|
+
var curr = tokens.shift();
|
|
171
|
+
var top = getTop(stack);
|
|
172
|
+
|
|
173
|
+
if (curr.type === 'string') {
|
|
174
|
+
appendChild(top, curr.value);
|
|
175
|
+
} else if (utils.isPair(top, curr)) {
|
|
176
|
+
var node = stack.pop();
|
|
177
|
+
|
|
178
|
+
if (!isEmpty(stack)) {
|
|
179
|
+
appendChild(getTop(stack), node);
|
|
180
|
+
}
|
|
181
|
+
} else if (curr.type === 'self-close') {
|
|
182
|
+
appendChild(top, curr);
|
|
183
|
+
} else if (curr.type === 'start') {
|
|
184
|
+
stack.push(curr);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return tree.children;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function html2any(html, rule) {
|
|
192
|
+
return transform(parse(html)[0], rule);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export { html2any as default, parse, transform };
|
|
196
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","sources":["../src/transform.js","../src/utils.js","../src/tokenize.js","../src/parse.js","../src/index.js"],"sourcesContent":["function transform(ast, rule) {\n function next(node) {\n if (node) {\n if (typeof node === 'string') {\n return rule(node)\n }\n if (Array.isArray(node)) {\n return node.map((n, index) => {\n if (typeof n !== 'string') {\n n.index = index // critical array element index\n }\n return rule(n, next(n.children))\n })\n } else {\n return rule(node, next(node.children))\n }\n }\n return null\n }\n return next(ast)\n}\n\nexport default transform\n","const voidElementTags = [\n 'area',\n 'base',\n 'br',\n 'col',\n 'embed',\n 'hr',\n 'img',\n 'input',\n 'link',\n 'meta',\n 'param',\n 'source',\n 'track',\n 'wbr',\n]\n\nfunction isSelfClose(tagName) {\n return voidElementTags.indexOf(tagName) > -1\n}\n\nfunction isPair(tagX, tagY) {\n return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end'\n}\n\nexport default { isPair, isSelfClose }\n","import utils from './utils'\n\n// assuming that quato always following equation - `=\"\"`\nconst ATTR_FIND = /((^\\w|\\s+)[a-zA-Z-:]+)(=\"[^\"]+\"|\\s+|\\s*$)?/\n\nfunction extraAttrs(str) {\n let i = 0\n const attrs = {}\n while (i < str.length) {\n const suffix = str.slice(i)\n const match = ATTR_FIND.exec(suffix)\n if (!match || !match[1]) {\n break\n }\n const result = match[0]\n let key = match[1]\n let value = match[3]\n\n key = key.trim()\n value = value && value.trim()\n\n attrs[key] = (value && value[0] === '=') ? value.slice(2, -1) : true\n i += result.length\n }\n return attrs\n}\n\nfunction makeToken(tag) {\n const isTag = tag[0] === '<' && tag[tag.length - 1] === '>'\n\n if (!isTag) {\n return {\n type: 'string',\n value: tag,\n }\n } else if (tag.startsWith('</')) {\n return {\n type: 'end',\n name: tag.slice(2, -1),\n }\n } else {\n const match = tag.match(/<([\\w+:?\\w*]+)\\s*([^>]*)/)\n const tagName = match[1]\n const tagBody = match[2]\n return {\n type: (utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/') ? 'self-close' : 'start',\n name: tagName,\n attributes: extraAttrs(tagBody),\n }\n }\n}\n\nfunction splitTokens(html) {\n let i = 0\n let j = 0\n const tokens = []\n while (i < html.length) {\n const curr = html[i]\n if (curr === '<') {\n if (j < i) {\n tokens.push(html.slice(j, i))\n j = i\n }\n let k = i\n while (html[k] !== '>') k++\n tokens.push(html.slice(i, k + 1))\n i = j = k + 1\n continue\n }\n i++\n }\n return tokens\n}\n\nfunction tokenize(html) {\n return splitTokens(html)\n .map(s => s.replace(/^\\n+$/g, ''))\n .map(s => s.trim())\n .filter(Boolean)\n .map(makeToken)\n}\n\nexport default tokenize\n","import tokenize from './tokenize'\nimport utils from './utils'\n\nfunction isEmpty(stack) {\n return stack.length === 0\n}\n\nfunction getTop(stack) {\n return stack[stack.length - 1]\n}\n\nfunction appendChild(node, child) {\n if (!node.children) {\n node.children = []\n }\n node.children.push(filterProps(child))\n}\n\nfunction filterProps(node) {\n if (typeof node === 'string') {\n return node\n }\n return ['name', 'children', 'attributes'].reduce(\n (r, c) => Object.assign({}, r, {[c]: node[c]}),\n {},\n )\n}\n\nfunction parse(src) {\n const tokens = tokenize(src)\n const stack = []\n const tree = {\n type: 'root',\n children: [],\n }\n\n stack.push(tree)\n while (!isEmpty(stack) && !isEmpty(tokens)) {\n const curr = tokens.shift()\n const top = getTop(stack)\n\n if (curr.type === 'string') {\n appendChild(top, curr.value)\n } else if (utils.isPair(top, curr)) {\n const node = stack.pop()\n if (!isEmpty(stack)) {\n appendChild(getTop(stack), node)\n }\n } else if (curr.type === 'self-close') {\n appendChild(top, curr)\n } else if (curr.type === 'start') {\n stack.push(curr)\n }\n }\n return tree.children\n}\n\nexport default parse\n","import transform from './transform'\nimport parse from './parse'\n\nfunction html2any(html, rule) {\n return transform(parse(html)[0], rule)\n}\n\nexport { parse, transform }\nexport default html2any\n"],"names":["transform","ast","rule","next","node","Array","isArray","map","n","index","children","voidElementTags","isSelfClose","tagName","indexOf","isPair","tagX","tagY","name","type","ATTR_FIND","extraAttrs","str","i","attrs","length","suffix","slice","match","exec","result","key","value","trim","makeToken","tag","isTag","startsWith","tagBody","utils","attributes","splitTokens","html","j","tokens","curr","push","k","tokenize","s","replace","filter","Boolean","isEmpty","stack","getTop","appendChild","child","filterProps","reduce","r","c","Object","assign","parse","src","tree","shift","top","pop","html2any"],"mappings":"AAAA,SAASA,SAAT,CAAmBC,GAAnB,EAAwBC,IAAxB,EAA8B;AAC5B,WAASC,IAAT,CAAcC,IAAd,EAAoB;AAClB,QAAIA,IAAJ,EAAU;AACR,UAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAC5B,eAAOF,IAAI,CAACE,IAAD,CAAX;AACD;;AACD,UAAIC,KAAK,CAACC,OAAN,CAAcF,IAAd,CAAJ,EAAyB;AACvB,eAAOA,IAAI,CAACG,GAAL,CAAS,UAACC,CAAD,EAAIC,KAAJ,EAAc;AAC5B,cAAI,OAAOD,CAAP,KAAa,QAAjB,EAA2B;AACzBA,YAAAA,CAAC,CAACC,KAAF,GAAUA,KAAV,CADyB;AAE1B;;AACD,iBAAOP,IAAI,CAACM,CAAD,EAAIL,IAAI,CAACK,CAAC,CAACE,QAAH,CAAR,CAAX;AACD,SALM,CAAP;AAMD,OAPD,MAOO;AACL,eAAOR,IAAI,CAACE,IAAD,EAAOD,IAAI,CAACC,IAAI,CAACM,QAAN,CAAX,CAAX;AACD;AACF;;AACD,WAAO,IAAP;AACD;;AACD,SAAOP,IAAI,CAACF,GAAD,CAAX;AACD;;ACpBD,IAAMU,eAAe,GAAG,CACtB,MADsB,EAEtB,MAFsB,EAGtB,IAHsB,EAItB,KAJsB,EAKtB,OALsB,EAMtB,IANsB,EAOtB,KAPsB,EAQtB,OARsB,EAStB,MATsB,EAUtB,MAVsB,EAWtB,OAXsB,EAYtB,QAZsB,EAatB,OAbsB,EActB,KAdsB,CAAxB;;AAiBA,SAASC,WAAT,CAAqBC,OAArB,EAA8B;AAC5B,SAAOF,eAAe,CAACG,OAAhB,CAAwBD,OAAxB,IAAmC,CAAC,CAA3C;AACD;;AAED,SAASE,MAAT,CAAgBC,IAAhB,EAAsBC,IAAtB,EAA4B;AAC1B,SAAOD,IAAI,CAACE,IAAL,KAAcD,IAAI,CAACC,IAAnB,IAA2BF,IAAI,CAACG,IAAL,KAAc,OAAzC,IAAoDF,IAAI,CAACE,IAAL,KAAc,KAAzE;AACD;;AAED,YAAe;AAAEJ,EAAAA,MAAM,EAANA,MAAF;AAAUH,EAAAA,WAAW,EAAXA;AAAV,CAAf;;ACtBA,IAAMQ,SAAS,GAAG,4CAAlB;;AAEA,SAASC,UAAT,CAAoBC,GAApB,EAAyB;AACvB,MAAIC,CAAC,GAAG,CAAR;AACA,MAAMC,KAAK,GAAG,EAAd;;AACA,SAAOD,CAAC,GAAGD,GAAG,CAACG,MAAf,EAAuB;AACrB,QAAMC,MAAM,GAAGJ,GAAG,CAACK,KAAJ,CAAUJ,CAAV,CAAf;AACA,QAAMK,KAAK,GAAGR,SAAS,CAACS,IAAV,CAAeH,MAAf,CAAd;;AACA,QAAI,CAACE,KAAD,IAAU,CAACA,KAAK,CAAC,CAAD,CAApB,EAAyB;AACvB;AACD;;AACD,QAAME,MAAM,GAAGF,KAAK,CAAC,CAAD,CAApB;AACA,QAAIG,GAAG,GAAGH,KAAK,CAAC,CAAD,CAAf;AACA,QAAII,KAAK,GAAGJ,KAAK,CAAC,CAAD,CAAjB;AAEAG,IAAAA,GAAG,GAAGA,GAAG,CAACE,IAAJ,EAAN;AACAD,IAAAA,KAAK,GAAGA,KAAK,IAAIA,KAAK,CAACC,IAAN,EAAjB;AAEAT,IAAAA,KAAK,CAACO,GAAD,CAAL,GAAcC,KAAK,IAAIA,KAAK,CAAC,CAAD,CAAL,KAAa,GAAvB,GAA8BA,KAAK,CAACL,KAAN,CAAY,CAAZ,EAAe,CAAC,CAAhB,CAA9B,GAAmD,IAAhE;AACAJ,IAAAA,CAAC,IAAIO,MAAM,CAACL,MAAZ;AACD;;AACD,SAAOD,KAAP;AACD;;AAED,SAASU,SAAT,CAAmBC,GAAnB,EAAwB;AACtB,MAAMC,KAAK,GAAGD,GAAG,CAAC,CAAD,CAAH,KAAW,GAAX,IAAkBA,GAAG,CAACA,GAAG,CAACV,MAAJ,GAAa,CAAd,CAAH,KAAwB,GAAxD;;AAEA,MAAI,CAACW,KAAL,EAAY;AACV,WAAO;AACLjB,MAAAA,IAAI,EAAE,QADD;AAELa,MAAAA,KAAK,EAAEG;AAFF,KAAP;AAID,GALD,MAKO,IAAIA,GAAG,CAACE,UAAJ,CAAe,IAAf,CAAJ,EAA0B;AAC/B,WAAO;AACLlB,MAAAA,IAAI,EAAE,KADD;AAELD,MAAAA,IAAI,EAAEiB,GAAG,CAACR,KAAJ,CAAU,CAAV,EAAa,CAAC,CAAd;AAFD,KAAP;AAID,GALM,MAKA;AACL,QAAMC,KAAK,GAAGO,GAAG,CAACP,KAAJ,CAAU,0BAAV,CAAd;AACA,QAAMf,OAAO,GAAGe,KAAK,CAAC,CAAD,CAArB;AACA,QAAMU,OAAO,GAAGV,KAAK,CAAC,CAAD,CAArB;AACA,WAAO;AACLT,MAAAA,IAAI,EAAGoB,KAAK,CAAC3B,WAAN,CAAkBC,OAAlB,KAA8ByB,OAAO,CAACA,OAAO,CAACb,MAAR,GAAiB,CAAlB,CAAP,KAAgC,GAA/D,GAAsE,YAAtE,GAAqF,OADtF;AAELP,MAAAA,IAAI,EAAEL,OAFD;AAGL2B,MAAAA,UAAU,EAAEnB,UAAU,CAACiB,OAAD;AAHjB,KAAP;AAKD;AACF;;AAED,SAASG,WAAT,CAAqBC,IAArB,EAA2B;AACzB,MAAInB,CAAC,GAAG,CAAR;AACA,MAAIoB,CAAC,GAAG,CAAR;AACA,MAAMC,MAAM,GAAG,EAAf;;AACA,SAAOrB,CAAC,GAAGmB,IAAI,CAACjB,MAAhB,EAAwB;AACtB,QAAMoB,IAAI,GAAGH,IAAI,CAACnB,CAAD,CAAjB;;AACA,QAAIsB,IAAI,KAAK,GAAb,EAAkB;AAChB,UAAIF,CAAC,GAAGpB,CAAR,EAAW;AACTqB,QAAAA,MAAM,CAACE,IAAP,CAAYJ,IAAI,CAACf,KAAL,CAAWgB,CAAX,EAAcpB,CAAd,CAAZ;AACAoB,QAAAA,CAAC,GAAGpB,CAAJ;AACD;;AACD,UAAIwB,CAAC,GAAGxB,CAAR;;AACA,aAAOmB,IAAI,CAACK,CAAD,CAAJ,KAAY,GAAnB;AAAwBA,QAAAA,CAAC;AAAzB;;AACAH,MAAAA,MAAM,CAACE,IAAP,CAAYJ,IAAI,CAACf,KAAL,CAAWJ,CAAX,EAAcwB,CAAC,GAAG,CAAlB,CAAZ;AACAxB,MAAAA,CAAC,GAAGoB,CAAC,GAAGI,CAAC,GAAG,CAAZ;AACA;AACD;;AACDxB,IAAAA,CAAC;AACF;;AACD,SAAOqB,MAAP;AACD;;AAED,SAASI,QAAT,CAAkBN,IAAlB,EAAwB;AACtB,SAAOD,WAAW,CAACC,IAAD,CAAX,CACJnC,GADI,CACA,UAAA0C,CAAC;AAAA,WAAIA,CAAC,CAACC,OAAF,CAAU,QAAV,EAAoB,EAApB,CAAJ;AAAA,GADD,EAEJ3C,GAFI,CAEA,UAAA0C,CAAC;AAAA,WAAIA,CAAC,CAAChB,IAAF,EAAJ;AAAA,GAFD,EAGJkB,MAHI,CAGGC,OAHH,EAIJ7C,GAJI,CAIA2B,SAJA,CAAP;AAKD;;AC7ED,SAASmB,OAAT,CAAiBC,KAAjB,EAAwB;AACtB,SAAOA,KAAK,CAAC7B,MAAN,KAAiB,CAAxB;AACD;;AAED,SAAS8B,MAAT,CAAgBD,KAAhB,EAAuB;AACrB,SAAOA,KAAK,CAACA,KAAK,CAAC7B,MAAN,GAAe,CAAhB,CAAZ;AACD;;AAED,SAAS+B,WAAT,CAAqBpD,IAArB,EAA2BqD,KAA3B,EAAkC;AAChC,MAAI,CAACrD,IAAI,CAACM,QAAV,EAAoB;AAClBN,IAAAA,IAAI,CAACM,QAAL,GAAgB,EAAhB;AACD;;AACDN,EAAAA,IAAI,CAACM,QAAL,CAAcoC,IAAd,CAAmBY,WAAW,CAACD,KAAD,CAA9B;AACD;;AAED,SAASC,WAAT,CAAqBtD,IAArB,EAA2B;AACzB,MAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAC5B,WAAOA,IAAP;AACD;;AACD,SAAO,CAAC,MAAD,EAAS,UAAT,EAAqB,YAArB,EAAmCuD,MAAnC,CACL,UAACC,CAAD,EAAIC,CAAJ;AAAA;;AAAA,WAAUC,MAAM,CAACC,MAAP,CAAc,EAAd,EAAkBH,CAAlB,uCAAuBC,CAAvB,IAA2BzD,IAAI,CAACyD,CAAD,CAA/B,kBAAV;AAAA,GADK,EAEL,EAFK,CAAP;AAID;;AAED,SAASG,KAAT,CAAeC,GAAf,EAAoB;AAClB,MAAMrB,MAAM,GAAGI,QAAQ,CAACiB,GAAD,CAAvB;AACA,MAAMX,KAAK,GAAG,EAAd;AACA,MAAMY,IAAI,GAAG;AACX/C,IAAAA,IAAI,EAAE,MADK;AAEXT,IAAAA,QAAQ,EAAE;AAFC,GAAb;AAKA4C,EAAAA,KAAK,CAACR,IAAN,CAAWoB,IAAX;;AACA,SAAO,CAACb,OAAO,CAACC,KAAD,CAAR,IAAmB,CAACD,OAAO,CAACT,MAAD,CAAlC,EAA4C;AAC1C,QAAMC,IAAI,GAAGD,MAAM,CAACuB,KAAP,EAAb;AACA,QAAMC,GAAG,GAAGb,MAAM,CAACD,KAAD,CAAlB;;AAEA,QAAIT,IAAI,CAAC1B,IAAL,KAAc,QAAlB,EAA4B;AAC1BqC,MAAAA,WAAW,CAACY,GAAD,EAAMvB,IAAI,CAACb,KAAX,CAAX;AACD,KAFD,MAEO,IAAIO,KAAK,CAACxB,MAAN,CAAaqD,GAAb,EAAkBvB,IAAlB,CAAJ,EAA6B;AAClC,UAAMzC,IAAI,GAAGkD,KAAK,CAACe,GAAN,EAAb;;AACA,UAAI,CAAChB,OAAO,CAACC,KAAD,CAAZ,EAAqB;AACnBE,QAAAA,WAAW,CAACD,MAAM,CAACD,KAAD,CAAP,EAAgBlD,IAAhB,CAAX;AACD;AACF,KALM,MAKA,IAAIyC,IAAI,CAAC1B,IAAL,KAAc,YAAlB,EAAgC;AACrCqC,MAAAA,WAAW,CAACY,GAAD,EAAMvB,IAAN,CAAX;AACD,KAFM,MAEA,IAAIA,IAAI,CAAC1B,IAAL,KAAc,OAAlB,EAA2B;AAChCmC,MAAAA,KAAK,CAACR,IAAN,CAAWD,IAAX;AACD;AACF;;AACD,SAAOqB,IAAI,CAACxD,QAAZ;AACD;;ACpDD,SAAS4D,QAAT,CAAkB5B,IAAlB,EAAwBxC,IAAxB,EAA8B;AAC5B,SAAOF,SAAS,CAACgE,KAAK,CAACtB,IAAD,CAAL,CAAY,CAAZ,CAAD,EAAiBxC,IAAjB,CAAhB;AACD;;;;"}
|
package/package.json
CHANGED
|
@@ -1,29 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "html2any",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"description": "
|
|
5
|
-
"main": "lib/index.js",
|
|
6
|
-
"
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
"build": "babel src -d lib",
|
|
10
|
-
"prebuild": ". ./prettier.sh \"./{src,__test__}/**/*.js\""
|
|
3
|
+
"version": "0.0.7",
|
|
4
|
+
"description": "parse html to ast and transform to anything you need",
|
|
5
|
+
"main": "./lib/index.js",
|
|
6
|
+
"exports": {
|
|
7
|
+
"import": "./lib/index.mjs",
|
|
8
|
+
"require": "./lib/index.js"
|
|
11
9
|
},
|
|
12
|
-
"
|
|
13
|
-
"
|
|
10
|
+
"scripts": {
|
|
11
|
+
"test": "yarn node --experimental-vm-modules $(yarn bin jest)",
|
|
12
|
+
"build": "rm -rf lib && bunchee src/index.js"
|
|
14
13
|
},
|
|
15
14
|
"files": [
|
|
16
15
|
"*.md",
|
|
17
16
|
"lib"
|
|
18
17
|
],
|
|
19
|
-
"
|
|
20
|
-
"
|
|
18
|
+
"type": "module",
|
|
19
|
+
"jest": {
|
|
20
|
+
"transform": {}
|
|
21
|
+
},
|
|
22
|
+
"author": "huozhi",
|
|
23
|
+
"license": "MIT",
|
|
21
24
|
"devDependencies": {
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"react": "^15.6.1",
|
|
27
|
-
"react-test-renderer": "^15.6.1"
|
|
25
|
+
"bunchee": "^1.8.2",
|
|
26
|
+
"jest": "^27.4.7",
|
|
27
|
+
"react": "^17.0.2",
|
|
28
|
+
"react-test-renderer": "^17.0.2"
|
|
28
29
|
}
|
|
29
30
|
}
|
package/lib/parser.js
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
|
|
4
|
-
|
|
5
|
-
var utils = require('./utils');
|
|
6
|
-
|
|
7
|
-
function isEmpty(stack) {
|
|
8
|
-
return stack.length === 0;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
function getTop(stack) {
|
|
12
|
-
return stack[stack.length - 1];
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
function appendChild(node, child) {
|
|
16
|
-
if (!node.children) {
|
|
17
|
-
node.children = [];
|
|
18
|
-
}
|
|
19
|
-
node.children.push(filterProps(child));
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function filterProps(node) {
|
|
23
|
-
if (typeof node === 'string') {
|
|
24
|
-
return node;
|
|
25
|
-
}
|
|
26
|
-
return ['name', 'children', 'attributes'].reduce(function (r, c) {
|
|
27
|
-
return Object.assign({}, r, _defineProperty({}, c, node[c]));
|
|
28
|
-
}, {});
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
function parse(tokens) {
|
|
32
|
-
var stack = [];
|
|
33
|
-
var tree = {
|
|
34
|
-
children: []
|
|
35
|
-
};
|
|
36
|
-
|
|
37
|
-
stack.push(tree);
|
|
38
|
-
|
|
39
|
-
while (!isEmpty(stack) && !isEmpty(tokens)) {
|
|
40
|
-
var curr = tokens.shift();
|
|
41
|
-
var top = getTop(stack);
|
|
42
|
-
|
|
43
|
-
if (curr.type === 'string') {
|
|
44
|
-
appendChild(top, curr.value);
|
|
45
|
-
} else if (utils.isPair(top, curr)) {
|
|
46
|
-
var node = stack.pop();
|
|
47
|
-
if (!isEmpty(stack)) {
|
|
48
|
-
appendChild(getTop(stack), node);
|
|
49
|
-
}
|
|
50
|
-
} else if (curr.type === 'self-close') {
|
|
51
|
-
appendChild(top, curr);
|
|
52
|
-
} else if (curr.type === 'start') {
|
|
53
|
-
stack.push(curr);
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
return tree.children;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
module.exports = parse;
|
package/lib/tokenizer.js
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
var utils = require('./utils');
|
|
4
|
-
|
|
5
|
-
// assuming that quato always following equation - `=""`
|
|
6
|
-
var ATTR_FIND = /((^\w|\s+)[a-z-]+)(="[^"]+"|\s+|\s*$)?/;
|
|
7
|
-
|
|
8
|
-
function extraAttrs(str) {
|
|
9
|
-
var i = 0;
|
|
10
|
-
var attrs = {};
|
|
11
|
-
while (i < str.length) {
|
|
12
|
-
var suffix = str.slice(i);
|
|
13
|
-
var match = ATTR_FIND.exec(suffix);
|
|
14
|
-
if (!match || !match[1]) {
|
|
15
|
-
break;
|
|
16
|
-
}
|
|
17
|
-
var result = match[0];
|
|
18
|
-
var key = match[1];
|
|
19
|
-
var value = match[3];
|
|
20
|
-
|
|
21
|
-
key = key.trim();
|
|
22
|
-
value = value && value.trim();
|
|
23
|
-
|
|
24
|
-
attrs[key] = value && value.startsWith('=') ? value.slice(2, -1) : true;
|
|
25
|
-
i += result.length;
|
|
26
|
-
}
|
|
27
|
-
return attrs;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function makeToken(tag) {
|
|
31
|
-
var isTag = tag.startsWith('<') && tag.endsWith('>');
|
|
32
|
-
|
|
33
|
-
if (!isTag) {
|
|
34
|
-
return {
|
|
35
|
-
type: 'string',
|
|
36
|
-
value: tag
|
|
37
|
-
};
|
|
38
|
-
} else if (tag.startsWith('</')) {
|
|
39
|
-
return {
|
|
40
|
-
type: 'end',
|
|
41
|
-
name: tag.slice(2, -1)
|
|
42
|
-
};
|
|
43
|
-
} else {
|
|
44
|
-
var match = tag.match(/<(\w+)\s*([^>]*)/);
|
|
45
|
-
return {
|
|
46
|
-
type: utils.isSelfClose(match[1]) ? 'self-close' : 'start',
|
|
47
|
-
name: match[1],
|
|
48
|
-
attributes: extraAttrs(match[2])
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
function splitTokens(html) {
|
|
54
|
-
var i = 0;
|
|
55
|
-
var j = 0;
|
|
56
|
-
var tokens = [];
|
|
57
|
-
while (i < html.length) {
|
|
58
|
-
var curr = html[i];
|
|
59
|
-
if (curr === '<') {
|
|
60
|
-
if (j < i) {
|
|
61
|
-
tokens.push(html.slice(j, i));
|
|
62
|
-
j = i;
|
|
63
|
-
}
|
|
64
|
-
var k = i;
|
|
65
|
-
while (html[k] !== '>') {
|
|
66
|
-
k++;
|
|
67
|
-
}tokens.push(html.slice(i, k + 1));
|
|
68
|
-
i = j = k + 1;
|
|
69
|
-
continue;
|
|
70
|
-
}
|
|
71
|
-
i++;
|
|
72
|
-
}
|
|
73
|
-
return tokens;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
function tokenizer(html) {
|
|
77
|
-
return splitTokens(html).map(function (s) {
|
|
78
|
-
return s.replace(/^\n|\n$/g, '');
|
|
79
|
-
}).map(function (s) {
|
|
80
|
-
return s.trim();
|
|
81
|
-
}).filter(Boolean).map(makeToken);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
module.exports = tokenizer;
|
package/lib/transform.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
function transform(ast, rule) {
|
|
4
|
-
function next(node) {
|
|
5
|
-
if (node) {
|
|
6
|
-
if (typeof node === 'string') {
|
|
7
|
-
return rule(node);
|
|
8
|
-
}
|
|
9
|
-
if (Array.isArray(node)) {
|
|
10
|
-
return node.map(function (n, index) {
|
|
11
|
-
if (typeof n !== 'string') {
|
|
12
|
-
n.index = index; // critical array element index
|
|
13
|
-
}
|
|
14
|
-
return rule(n, next(n.children));
|
|
15
|
-
});
|
|
16
|
-
} else {
|
|
17
|
-
return rule(node, next(node.children));
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
return null;
|
|
21
|
-
}
|
|
22
|
-
return next(ast);
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
module.exports = transform;
|
package/lib/utils.js
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
var voidElementTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
|
|
4
|
-
|
|
5
|
-
function isSelfClose(tagName) {
|
|
6
|
-
return voidElementTags.indexOf(tagName) > -1;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
function isPair(tagX, tagY) {
|
|
10
|
-
return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
exports.isPair = isPair;
|
|
14
|
-
exports.isSelfClose = isSelfClose;
|