@thi.ng/hiccup-html-parse 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -1
- package/index.js +99 -123
- package/package.json +10 -7
package/CHANGELOG.md
CHANGED
package/index.js
CHANGED
|
@@ -2,10 +2,7 @@ import { DEFAULT, defmulti } from "@thi.ng/defmulti/defmulti";
|
|
|
2
2
|
import { defContext } from "@thi.ng/parse/context";
|
|
3
3
|
import { defGrammar } from "@thi.ng/parse/grammar";
|
|
4
4
|
import { unescapeEntities } from "@thi.ng/strings/entities";
|
|
5
|
-
|
|
6
|
-
// playground URL:
|
|
7
|
-
// https://demo.thi.ng/umbrella/parse-playground/#l9oD3G5vZGU6ICc8JyEgKDxjb21tZW50PiB8IDxjZGF0YV9lbD4gfCA8dm9pZF9lbD4gfCA8ZWw-KSA7CmVsOiA8bmFtZT4gPGF0dHJpYj4qICg8ZWxfYm9keT4gfCA8ZWxfY2xvc2U-ISApIDsKZWxfYm9keTogPFdTMD4gJz4nISAoPGJvZHk-IHwgPG5vZGU-KSogIjwvIiEgPG5hbWU-ISA8V1MwPiAnPichID0-IGhvaXN0IDsKZWxfY2xvc2U6IDxXUzA-ICIvPiIhIDsKbmFtZTogW0EtWmEtejAtOV86XC1dKyA9PiBqb2luIDsKYXR0cmliOiA8V1MxPiA8bmFtZT4gPGF0dHZhbD4_IDsKYXR0dmFsOiAnPSchICg8dmFsPiB8IDxhbHRfdmFsPiB8IDxlbXB0eT4gfCA8YWx0X2VtcHR5PikgOwp2YWw6ICciJyEgLig_KyciJyEpID0-IGpvaW4gOwphbHRfdmFsOiAnXCcnISAuKD8rJ1wnJyEpID0-IGpvaW4gOwplbXB0eTogJyInICciJyA7CmFsdF9lbXB0eTogJ1wnJyEgJ1wnJyEgOwpib2R5OiAuKD8tJzwnISkgPT4gam9pbiA7Cgp2b2lkX2VsOiA8dm9pZF9uYW1lPiA8YXR0cmliPiogPFdTMD4gJy8nPyEgJz4nISA7CnZvaWRfbmFtZTogKCJhcmVhIiB8ICJiYXNlIiB8ICJiciIgfCAiY29sIiB8ICJlbWJlZCIgfCAiaHIiIHwgImltZyIgfCAiaW5wdXQiIHwgImxpbmsiIHwgIm1ldGEiIHwgInNvdXJjZSIgfCAidHJhY2siIHwgIndiciIpIDsKCmNkYXRhX2VsOiA8Y2RhdGFfbmFtZT4gPGF0dHJpYj4qICc-JyEgPGNkYXRhX2JvZHk-IDsKY2RhdGFfbmFtZTogKCJzY3JpcHQiIHwgInN0eWxlIikgOwpjZGF0YV9ib2R5OiAuKD8tPGNkYXRhX2Nsb3NlPiEpIDxjZGF0YV9jbG9zZT4hID0-IGpvaW4gOwpjZGF0YV9jbG9zZTogIjwvIiEgPGNkYXRhX25hbWU-ISA8V1MwPiAnPichIDsKCmRvY3R5cGU6ICI8ISIhICgiZG9jdHlwZSIgfCAiRE9DVFlQRSIpISA8V1MxPiAuKD8rJz4nISkgPFdTMD4gPT4gam9pbiA7CmNvbW1lbnQ6ICIhLS0iISAuKD8rIi0tPiIhKSA9PiBqb2luIDsKCm1haW46IDxTVEFSVD4gPGRvY3R5cGU-PyA8bm9kZT4rIDxFTkQ-IDukbWFpbtoBMjwhZG9jdHlwZSBodG1sPgo8aHRtbCBsYW5nPSJlbiI-CjxoZWFkPgogIDwhLS0gPGlnbm9yZT48L2lnbm9yZT4gLS0-CiAgPHNjcmlwdCBsYW5nPSJqYXZhc2NyaXB0Ij4KY29uc29sZS5sb2coIjwvIisic2NyaXB0PiIpOwogIDwvc2NyaXB0PgogIDxzdHlsZT4KYm9keSB7IG1hcmdpbjogMDsgfQogIDwvc3R5bGU-CjwvaGVhZD4KPGJvZHk-CiAgPGRpdiBpZD0iZm9vIiBib29sIGRhdGEteHl6PSIiIGVtcHR5PScnPgogICAgPGEgaHJlZj0iI2JhciI-YmF6IDxiPmJvbGQ8L2I-PC9hPjxici8-CiAgPC9kaXY-CjwvYm9keT4KPC9odG1sPqCgoKA
|
|
8
|
-
export const lang = defGrammar(`
|
|
5
|
+
const lang = defGrammar(`
|
|
9
6
|
node: '<'! (<comment> | <cdata_el> | <void_el> | <el>) ;
|
|
10
7
|
el: <name> <attrib>* (<el_body> | <el_close>! ) ;
|
|
11
8
|
el_body: <WS0> '>'! (<body> | <node>)* "</"! <name>! <WS0> '>'! => hoist ;
|
|
@@ -32,143 +29,122 @@ comment: "!--"! .(?+"-->"!) => join ;
|
|
|
32
29
|
|
|
33
30
|
main: <START> <doctype>? <node>+ <END> ;
|
|
34
31
|
`);
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
* transformation.
|
|
39
|
-
*
|
|
40
|
-
* @param src
|
|
41
|
-
* @param opts
|
|
42
|
-
*/
|
|
43
|
-
export const parseRaw = (src, opts) => {
|
|
44
|
-
const ctx = defContext(src, opts);
|
|
45
|
-
return { result: lang.rules.main(ctx), ctx };
|
|
32
|
+
const parseRaw = (src, opts) => {
|
|
33
|
+
const ctx = defContext(src, opts);
|
|
34
|
+
return { result: lang.rules.main(ctx), ctx };
|
|
46
35
|
};
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
36
|
+
const parseHtml = (src, opts) => {
|
|
37
|
+
if (!src)
|
|
38
|
+
return { type: "success", result: [] };
|
|
39
|
+
opts = {
|
|
40
|
+
debug: false,
|
|
41
|
+
collapse: true,
|
|
42
|
+
unescape: true,
|
|
43
|
+
maxDepth: 128,
|
|
44
|
+
...opts
|
|
45
|
+
};
|
|
46
|
+
try {
|
|
47
|
+
const { result, ctx } = parseRaw(src.trim(), {
|
|
48
|
+
debug: opts.debug,
|
|
49
|
+
maxDepth: opts.maxDepth
|
|
50
|
+
});
|
|
51
|
+
const loc = {
|
|
52
|
+
offset: ctx.state.p,
|
|
53
|
+
line: ctx.state.l,
|
|
54
|
+
column: ctx.state.c
|
|
64
55
|
};
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if (result) {
|
|
76
|
-
const acc = [];
|
|
77
|
-
transformScope(ctx.root, opts, acc);
|
|
78
|
-
return {
|
|
79
|
-
type: ctx.done ? "success" : "partial",
|
|
80
|
-
result: acc,
|
|
81
|
-
loc,
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
else {
|
|
85
|
-
return { type: "fail", loc };
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
catch (e) {
|
|
89
|
-
return { type: "error", err: e };
|
|
56
|
+
if (result) {
|
|
57
|
+
const acc = [];
|
|
58
|
+
transformScope(ctx.root, opts, acc);
|
|
59
|
+
return {
|
|
60
|
+
type: ctx.done ? "success" : "partial",
|
|
61
|
+
result: acc,
|
|
62
|
+
loc
|
|
63
|
+
};
|
|
64
|
+
} else {
|
|
65
|
+
return { type: "fail", loc };
|
|
90
66
|
}
|
|
67
|
+
} catch (e) {
|
|
68
|
+
return { type: "error", err: e };
|
|
69
|
+
}
|
|
91
70
|
};
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
* @remarks
|
|
97
|
-
* The dispatch values for the various implementations here correspond to the
|
|
98
|
-
* above grammar rules.
|
|
99
|
-
*
|
|
100
|
-
* @internal
|
|
101
|
-
*/
|
|
102
|
-
const transformScope = defmulti((x) => x.id, { cdata_el: "el", void_el: "el" }, {
|
|
71
|
+
const transformScope = defmulti(
|
|
72
|
+
(x) => x.id,
|
|
73
|
+
{ cdata_el: "el", void_el: "el" },
|
|
74
|
+
{
|
|
103
75
|
[DEFAULT]: (scope) => {
|
|
104
|
-
|
|
76
|
+
throw new Error(`missing impl for scope ID: ${scope.id}`);
|
|
105
77
|
},
|
|
106
78
|
// root node of the parse tree
|
|
107
79
|
root: ({ children }, opts, acc) => {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
80
|
+
if (!children)
|
|
81
|
+
return;
|
|
82
|
+
children = children[0].children;
|
|
83
|
+
if (opts.doctype && children?.[0]) {
|
|
84
|
+
acc.push(["!DOCTYPE", children[0].result]);
|
|
85
|
+
}
|
|
86
|
+
for (let x of children[1].children)
|
|
87
|
+
transformScope(x, opts, acc);
|
|
116
88
|
},
|
|
117
89
|
node: ({ children }, opts, acc) => {
|
|
118
|
-
|
|
90
|
+
transformScope(children[0], opts, acc);
|
|
119
91
|
},
|
|
120
92
|
comment: ({ result }, opts, acc) => {
|
|
121
|
-
|
|
122
|
-
|
|
93
|
+
if (opts.comments)
|
|
94
|
+
acc.push(["__COMMENT__", result.trim()]);
|
|
123
95
|
},
|
|
124
96
|
// element node transformer, collects & filters attributes/children
|
|
125
97
|
// adds resulting hiccup element to accumulator array
|
|
126
98
|
el: ({ children }, opts, acc) => {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
}
|
|
147
|
-
}
|
|
99
|
+
const [name, { children: $attribs }, body] = children;
|
|
100
|
+
if (opts.ignoreElements?.includes(name.result))
|
|
101
|
+
return;
|
|
102
|
+
const attribs = {};
|
|
103
|
+
const el = [name.result, attribs];
|
|
104
|
+
if ($attribs) {
|
|
105
|
+
for (let a of $attribs) {
|
|
106
|
+
const name2 = a.children[0].result;
|
|
107
|
+
if (opts.dataAttribs === false && name2.startsWith("data-"))
|
|
108
|
+
continue;
|
|
109
|
+
if (opts.ignoreAttribs?.includes(name2))
|
|
110
|
+
continue;
|
|
111
|
+
if (a.children[1].children) {
|
|
112
|
+
const val = a.children[1].children[0].result;
|
|
113
|
+
if (val != null)
|
|
114
|
+
attribs[name2] = unescapeEntities(val);
|
|
115
|
+
} else {
|
|
116
|
+
attribs[name2] = true;
|
|
117
|
+
}
|
|
148
118
|
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
}
|
|
119
|
+
}
|
|
120
|
+
if (body) {
|
|
121
|
+
if (body.result) {
|
|
122
|
+
el.push(body.result.trim());
|
|
123
|
+
} else if (body.children) {
|
|
124
|
+
for (let x of body.children)
|
|
125
|
+
transformScope(x, opts, el);
|
|
157
126
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
127
|
+
}
|
|
128
|
+
const result = opts.tx ? opts.tx(el) : el;
|
|
129
|
+
if (result != null)
|
|
130
|
+
acc.push(result);
|
|
161
131
|
},
|
|
162
132
|
// plain text transform (by default only resolves HTML entities)
|
|
163
133
|
body: ({ result }, opts, acc) => {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
}
|
|
174
|
-
}
|
|
134
|
+
if (!opts.whitespace && /^\s+$/.test(result))
|
|
135
|
+
return;
|
|
136
|
+
if (opts.collapse)
|
|
137
|
+
result = result.replace(/\s+/gm, " ");
|
|
138
|
+
if (opts.unescape)
|
|
139
|
+
result = unescapeEntities(result);
|
|
140
|
+
result = opts.txBody ? opts.txBody(result) : result;
|
|
141
|
+
if (result != null)
|
|
142
|
+
acc.push(result);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
);
|
|
146
|
+
export {
|
|
147
|
+
lang,
|
|
148
|
+
parseHtml,
|
|
149
|
+
parseRaw
|
|
150
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@thi.ng/hiccup-html-parse",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.10",
|
|
4
4
|
"description": "Well-formed HTML parsing and customizable transformation to nested JS arrays in @thi.ng/hiccup format",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./index.js",
|
|
@@ -24,7 +24,9 @@
|
|
|
24
24
|
"author": "Karsten Schmidt (https://thi.ng)",
|
|
25
25
|
"license": "Apache-2.0",
|
|
26
26
|
"scripts": {
|
|
27
|
-
"build": "yarn
|
|
27
|
+
"build": "yarn build:esbuild && yarn build:decl",
|
|
28
|
+
"build:decl": "tsc --declaration --emitDeclarationOnly",
|
|
29
|
+
"build:esbuild": "esbuild --format=esm --platform=neutral --target=es2022 --tsconfig=tsconfig.json --outdir=. src/**/*.ts",
|
|
28
30
|
"clean": "rimraf --glob '*.js' '*.d.ts' '*.map' doc",
|
|
29
31
|
"doc": "typedoc --excludePrivate --excludeInternal --out doc src/index.ts",
|
|
30
32
|
"doc:ae": "mkdir -p .ae/doc .ae/temp && api-extractor run --local --verbose",
|
|
@@ -33,13 +35,14 @@
|
|
|
33
35
|
"test": "bun test"
|
|
34
36
|
},
|
|
35
37
|
"dependencies": {
|
|
36
|
-
"@thi.ng/api": "^8.9.
|
|
37
|
-
"@thi.ng/defmulti": "^3.0.
|
|
38
|
-
"@thi.ng/parse": "^2.4.
|
|
39
|
-
"@thi.ng/strings": "^3.7.
|
|
38
|
+
"@thi.ng/api": "^8.9.12",
|
|
39
|
+
"@thi.ng/defmulti": "^3.0.10",
|
|
40
|
+
"@thi.ng/parse": "^2.4.10",
|
|
41
|
+
"@thi.ng/strings": "^3.7.3"
|
|
40
42
|
},
|
|
41
43
|
"devDependencies": {
|
|
42
44
|
"@microsoft/api-extractor": "^7.38.3",
|
|
45
|
+
"esbuild": "^0.19.8",
|
|
43
46
|
"rimraf": "^5.0.5",
|
|
44
47
|
"tools": "^0.0.1",
|
|
45
48
|
"typedoc": "^0.25.4",
|
|
@@ -82,5 +85,5 @@
|
|
|
82
85
|
"status": "alpha",
|
|
83
86
|
"year": 2023
|
|
84
87
|
},
|
|
85
|
-
"gitHead": "
|
|
88
|
+
"gitHead": "5e7bafedfc3d53bc131469a28de31dd8e5b4a3ff\n"
|
|
86
89
|
}
|