@thi.ng/hiccup-html-parse 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/CHANGELOG.md +1 -1
  2. package/index.js +99 -123
  3. package/package.json +10 -7
package/CHANGELOG.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Change Log
2
2
 
3
- - **Last updated**: 2023-12-09T19:12:03Z
3
+ - **Last updated**: 2023-12-11T10:07:09Z
4
4
  - **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
5
5
 
6
6
  All notable changes to this project will be documented in this file.
package/index.js CHANGED
@@ -2,10 +2,7 @@ import { DEFAULT, defmulti } from "@thi.ng/defmulti/defmulti";
2
2
  import { defContext } from "@thi.ng/parse/context";
3
3
  import { defGrammar } from "@thi.ng/parse/grammar";
4
4
  import { unescapeEntities } from "@thi.ng/strings/entities";
5
- // HTML parse grammar rules (see: thi.ng/parse readme for details)
6
- // playground URL:
7
- // https://demo.thi.ng/umbrella/parse-playground/#l9oD3G5vZGU6ICc8JyEgKDxjb21tZW50PiB8IDxjZGF0YV9lbD4gfCA8dm9pZF9lbD4gfCA8ZWw-KSA7CmVsOiA8bmFtZT4gPGF0dHJpYj4qICg8ZWxfYm9keT4gfCA8ZWxfY2xvc2U-ISApIDsKZWxfYm9keTogPFdTMD4gJz4nISAoPGJvZHk-IHwgPG5vZGU-KSogIjwvIiEgPG5hbWU-ISA8V1MwPiAnPichID0-IGhvaXN0IDsKZWxfY2xvc2U6IDxXUzA-ICIvPiIhIDsKbmFtZTogW0EtWmEtejAtOV86XC1dKyA9PiBqb2luIDsKYXR0cmliOiA8V1MxPiA8bmFtZT4gPGF0dHZhbD4_IDsKYXR0dmFsOiAnPSchICg8dmFsPiB8IDxhbHRfdmFsPiB8IDxlbXB0eT4gfCA8YWx0X2VtcHR5PikgOwp2YWw6ICciJyEgLig_KyciJyEpID0-IGpvaW4gOwphbHRfdmFsOiAnXCcnISAuKD8rJ1wnJyEpID0-IGpvaW4gOwplbXB0eTogJyInICciJyA7CmFsdF9lbXB0eTogJ1wnJyEgJ1wnJyEgOwpib2R5OiAuKD8tJzwnISkgPT4gam9pbiA7Cgp2b2lkX2VsOiA8dm9pZF9uYW1lPiA8YXR0cmliPiogPFdTMD4gJy8nPyEgJz4nISA7CnZvaWRfbmFtZTogKCJhcmVhIiB8ICJiYXNlIiB8ICJiciIgfCAiY29sIiB8ICJlbWJlZCIgfCAiaHIiIHwgImltZyIgfCAiaW5wdXQiIHwgImxpbmsiIHwgIm1ldGEiIHwgInNvdXJjZSIgfCAidHJhY2siIHwgIndiciIpIDsKCmNkYXRhX2VsOiA8Y2RhdGFfbmFtZT4gPGF0dHJpYj4qICc-JyEgPGNkYXRhX2JvZHk-IDsKY2RhdGFfbmFtZTogKCJzY3JpcHQiIHwgInN0eWxlIikgOwpjZGF0YV9ib2R5OiAuKD8tPGNkYXRhX2Nsb3NlPiEpIDxjZGF0YV9jbG9zZT4hID0-IGpvaW4gOwpjZGF0YV9jbG9zZTogIjwvIiEgPGNkYXRhX25hbWU-ISA8V1MwPiAnPichIDsKCmRvY3R5cGU6ICI8ISIhICgiZG9jdHlwZSIgfCAiRE9DVFlQRSIpISA8V1MxPiAuKD8rJz4nISkgPFdTMD4gPT4gam9pbiA7CmNvbW1lbnQ6ICIhLS0iISAuKD8rIi0tPiIhKSA9PiBqb2luIDsKCm1haW46IDxTVEFSVD4gPGRvY3R5cGU-PyA8bm9kZT4rIDxFTkQ-IDukbWFpbtoBMjwhZG9jdHlwZSBodG1sPgo8aHRtbCBsYW5nPSJlbiI-CjxoZWFkPgogIDwhLS0gPGlnbm9yZT48L2lnbm9yZT4gLS0-CiAgPHNjcmlwdCBsYW5nPSJqYXZhc2NyaXB0Ij4KY29uc29sZS5sb2coIjwvIisic2NyaXB0PiIpOwogIDwvc2NyaXB0PgogIDxzdHlsZT4KYm9keSB7IG1hcmdpbjogMDsgfQogIDwvc3R5bGU-CjwvaGVhZD4KPGJvZHk-CiAgPGRpdiBpZD0iZm9vIiBib29sIGRhdGEteHl6PSIiIGVtcHR5PScnPgogICAgPGEgaHJlZj0iI2JhciI-YmF6IDxiPmJvbGQ8L2I-PC9hPjxici8-CiAgPC9kaXY-CjwvYm9keT4KPC9odG1sPqCgoKA
8
- export const lang = defGrammar(`
5
+ const lang = defGrammar(`
9
6
  node: '<'! (<comment> | <cdata_el> | <void_el> | <el>) ;
10
7
  el: <name> <attrib>* (<el_body> | <el_close>! ) ;
11
8
  el_body: <WS0> '>'! (<body> | <node>)* "</"! <name>! <WS0> '>'! => hoist ;
@@ -32,143 +29,122 @@ comment: "!--"! .(?+"-->"!) => join ;
32
29
 
33
30
  main: <START> <doctype>? <node>+ <END> ;
34
31
  `);
35
- /**
36
- * Creates a parser context for given source string and calls the main parser
37
- * rule. Returns result object, incl. the context for further inspection and
38
- * transformation.
39
- *
40
- * @param src
41
- * @param opts
42
- */
43
- export const parseRaw = (src, opts) => {
44
- const ctx = defContext(src, opts);
45
- return { result: lang.rules.main(ctx), ctx };
32
+ const parseRaw = (src, opts) => {
33
+ const ctx = defContext(src, opts);
34
+ return { result: lang.rules.main(ctx), ctx };
46
35
  };
47
- /**
48
- * Trims given HTML source string and attempts to parse it into a collection of
49
- * elements in thi.ng/hiccup format, using provided options to transform, clean
50
- * or filter elements.
51
- *
52
- * @param src
53
- * @param opts
54
- */
55
- export const parseHtml = (src, opts) => {
56
- if (!src)
57
- return { type: "success", result: [] };
58
- opts = {
59
- debug: false,
60
- collapse: true,
61
- unescape: true,
62
- maxDepth: 128,
63
- ...opts,
36
+ const parseHtml = (src, opts) => {
37
+ if (!src)
38
+ return { type: "success", result: [] };
39
+ opts = {
40
+ debug: false,
41
+ collapse: true,
42
+ unescape: true,
43
+ maxDepth: 128,
44
+ ...opts
45
+ };
46
+ try {
47
+ const { result, ctx } = parseRaw(src.trim(), {
48
+ debug: opts.debug,
49
+ maxDepth: opts.maxDepth
50
+ });
51
+ const loc = {
52
+ offset: ctx.state.p,
53
+ line: ctx.state.l,
54
+ column: ctx.state.c
64
55
  };
65
- try {
66
- const { result, ctx } = parseRaw(src.trim(), {
67
- debug: opts.debug,
68
- maxDepth: opts.maxDepth,
69
- });
70
- const loc = {
71
- offset: ctx.state.p,
72
- line: ctx.state.l,
73
- column: ctx.state.c,
74
- };
75
- if (result) {
76
- const acc = [];
77
- transformScope(ctx.root, opts, acc);
78
- return {
79
- type: ctx.done ? "success" : "partial",
80
- result: acc,
81
- loc,
82
- };
83
- }
84
- else {
85
- return { type: "fail", loc };
86
- }
87
- }
88
- catch (e) {
89
- return { type: "error", err: e };
56
+ if (result) {
57
+ const acc = [];
58
+ transformScope(ctx.root, opts, acc);
59
+ return {
60
+ type: ctx.done ? "success" : "partial",
61
+ result: acc,
62
+ loc
63
+ };
64
+ } else {
65
+ return { type: "fail", loc };
90
66
  }
67
+ } catch (e) {
68
+ return { type: "error", err: e };
69
+ }
91
70
  };
92
- /**
93
- * Recursive depth-first transformation function to process the parse tree (this
94
- * is where the actual conversion to hiccup format happens).
95
- *
96
- * @remarks
97
- * The dispatch values for the various implementations here correspond to the
98
- * above grammar rules.
99
- *
100
- * @internal
101
- */
102
- const transformScope = defmulti((x) => x.id, { cdata_el: "el", void_el: "el" }, {
71
+ const transformScope = defmulti(
72
+ (x) => x.id,
73
+ { cdata_el: "el", void_el: "el" },
74
+ {
103
75
  [DEFAULT]: (scope) => {
104
- throw new Error(`missing impl for scope ID: ${scope.id}`);
76
+ throw new Error(`missing impl for scope ID: ${scope.id}`);
105
77
  },
106
78
  // root node of the parse tree
107
79
  root: ({ children }, opts, acc) => {
108
- if (!children)
109
- return;
110
- children = children[0].children;
111
- if (opts.doctype && children?.[0]) {
112
- acc.push(["!DOCTYPE", children[0].result]);
113
- }
114
- for (let x of children[1].children)
115
- transformScope(x, opts, acc);
80
+ if (!children)
81
+ return;
82
+ children = children[0].children;
83
+ if (opts.doctype && children?.[0]) {
84
+ acc.push(["!DOCTYPE", children[0].result]);
85
+ }
86
+ for (let x of children[1].children)
87
+ transformScope(x, opts, acc);
116
88
  },
117
89
  node: ({ children }, opts, acc) => {
118
- transformScope(children[0], opts, acc);
90
+ transformScope(children[0], opts, acc);
119
91
  },
120
92
  comment: ({ result }, opts, acc) => {
121
- if (opts.comments)
122
- acc.push(["__COMMENT__", result.trim()]);
93
+ if (opts.comments)
94
+ acc.push(["__COMMENT__", result.trim()]);
123
95
  },
124
96
  // element node transformer, collects & filters attributes/children
125
97
  // adds resulting hiccup element to accumulator array
126
98
  el: ({ children }, opts, acc) => {
127
- const [name, { children: $attribs }, body] = children;
128
- if (opts.ignoreElements?.includes(name.result))
129
- return;
130
- const attribs = {};
131
- const el = [name.result, attribs];
132
- if ($attribs) {
133
- for (let a of $attribs) {
134
- const name = a.children[0].result;
135
- if (opts.dataAttribs === false && name.startsWith("data-"))
136
- continue;
137
- if (opts.ignoreAttribs?.includes(name))
138
- continue;
139
- if (a.children[1].children) {
140
- const val = a.children[1].children[0].result;
141
- if (val != null)
142
- attribs[name] = unescapeEntities(val);
143
- }
144
- else {
145
- attribs[name] = true;
146
- }
147
- }
99
+ const [name, { children: $attribs }, body] = children;
100
+ if (opts.ignoreElements?.includes(name.result))
101
+ return;
102
+ const attribs = {};
103
+ const el = [name.result, attribs];
104
+ if ($attribs) {
105
+ for (let a of $attribs) {
106
+ const name2 = a.children[0].result;
107
+ if (opts.dataAttribs === false && name2.startsWith("data-"))
108
+ continue;
109
+ if (opts.ignoreAttribs?.includes(name2))
110
+ continue;
111
+ if (a.children[1].children) {
112
+ const val = a.children[1].children[0].result;
113
+ if (val != null)
114
+ attribs[name2] = unescapeEntities(val);
115
+ } else {
116
+ attribs[name2] = true;
117
+ }
148
118
  }
149
- if (body) {
150
- if (body.result) {
151
- el.push(body.result.trim());
152
- }
153
- else if (body.children) {
154
- for (let x of body.children)
155
- transformScope(x, opts, el);
156
- }
119
+ }
120
+ if (body) {
121
+ if (body.result) {
122
+ el.push(body.result.trim());
123
+ } else if (body.children) {
124
+ for (let x of body.children)
125
+ transformScope(x, opts, el);
157
126
  }
158
- const result = opts.tx ? opts.tx(el) : el;
159
- if (result != null)
160
- acc.push(result);
127
+ }
128
+ const result = opts.tx ? opts.tx(el) : el;
129
+ if (result != null)
130
+ acc.push(result);
161
131
  },
162
132
  // plain text transform (by default only resolves HTML entities)
163
133
  body: ({ result }, opts, acc) => {
164
- if (!opts.whitespace && /^\s+$/.test(result))
165
- return;
166
- if (opts.collapse)
167
- result = result.replace(/\s+/gm, " ");
168
- if (opts.unescape)
169
- result = unescapeEntities(result);
170
- result = opts.txBody ? opts.txBody(result) : result;
171
- if (result != null)
172
- acc.push(result);
173
- },
174
- });
134
+ if (!opts.whitespace && /^\s+$/.test(result))
135
+ return;
136
+ if (opts.collapse)
137
+ result = result.replace(/\s+/gm, " ");
138
+ if (opts.unescape)
139
+ result = unescapeEntities(result);
140
+ result = opts.txBody ? opts.txBody(result) : result;
141
+ if (result != null)
142
+ acc.push(result);
143
+ }
144
+ }
145
+ );
146
+ export {
147
+ lang,
148
+ parseHtml,
149
+ parseRaw
150
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thi.ng/hiccup-html-parse",
3
- "version": "0.3.9",
3
+ "version": "0.3.10",
4
4
  "description": "Well-formed HTML parsing and customizable transformation to nested JS arrays in @thi.ng/hiccup format",
5
5
  "type": "module",
6
6
  "module": "./index.js",
@@ -24,7 +24,9 @@
24
24
  "author": "Karsten Schmidt (https://thi.ng)",
25
25
  "license": "Apache-2.0",
26
26
  "scripts": {
27
- "build": "yarn clean && tsc --declaration",
27
+ "build": "yarn build:esbuild && yarn build:decl",
28
+ "build:decl": "tsc --declaration --emitDeclarationOnly",
29
+ "build:esbuild": "esbuild --format=esm --platform=neutral --target=es2022 --tsconfig=tsconfig.json --outdir=. src/**/*.ts",
28
30
  "clean": "rimraf --glob '*.js' '*.d.ts' '*.map' doc",
29
31
  "doc": "typedoc --excludePrivate --excludeInternal --out doc src/index.ts",
30
32
  "doc:ae": "mkdir -p .ae/doc .ae/temp && api-extractor run --local --verbose",
@@ -33,13 +35,14 @@
33
35
  "test": "bun test"
34
36
  },
35
37
  "dependencies": {
36
- "@thi.ng/api": "^8.9.11",
37
- "@thi.ng/defmulti": "^3.0.9",
38
- "@thi.ng/parse": "^2.4.9",
39
- "@thi.ng/strings": "^3.7.2"
38
+ "@thi.ng/api": "^8.9.12",
39
+ "@thi.ng/defmulti": "^3.0.10",
40
+ "@thi.ng/parse": "^2.4.10",
41
+ "@thi.ng/strings": "^3.7.3"
40
42
  },
41
43
  "devDependencies": {
42
44
  "@microsoft/api-extractor": "^7.38.3",
45
+ "esbuild": "^0.19.8",
43
46
  "rimraf": "^5.0.5",
44
47
  "tools": "^0.0.1",
45
48
  "typedoc": "^0.25.4",
@@ -82,5 +85,5 @@
82
85
  "status": "alpha",
83
86
  "year": 2023
84
87
  },
85
- "gitHead": "25f2ac8ff795a432a930119661b364d4d93b59a0\n"
88
+ "gitHead": "5e7bafedfc3d53bc131469a28de31dd8e5b4a3ff\n"
86
89
  }