@qretaio/html2json 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -14
- package/html2json_bg.wasm +0 -0
- package/index.js +27 -0
- package/package.json +5 -4
package/README.md
CHANGED
|
@@ -44,7 +44,7 @@ just install
|
|
|
44
44
|
### JavaScript / TypeScript
|
|
45
45
|
|
|
46
46
|
```javascript
|
|
47
|
-
import { extract } from
|
|
47
|
+
import { extract } from "@qretaio/html2json";
|
|
48
48
|
|
|
49
49
|
const html = `
|
|
50
50
|
<article class="post">
|
|
@@ -57,16 +57,18 @@ const html = `
|
|
|
57
57
|
</article>
|
|
58
58
|
`;
|
|
59
59
|
|
|
60
|
-
const spec =
|
|
60
|
+
const spec = {
|
|
61
61
|
title: "h2",
|
|
62
62
|
author: ".author",
|
|
63
|
-
tags: [
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
|
|
63
|
+
tags: [
|
|
64
|
+
{
|
|
65
|
+
$: ".tags span",
|
|
66
|
+
name: "$",
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
const result = await extract(html, spec);
|
|
70
72
|
console.log(result);
|
|
71
73
|
// {
|
|
72
74
|
// "title": "My Article",
|
|
@@ -129,6 +131,7 @@ The spec is a JSON object where each key defines an output field and each value
|
|
|
129
131
|
```
|
|
130
132
|
|
|
131
133
|
Available pipes:
|
|
134
|
+
|
|
132
135
|
- `trim` - Trim whitespace
|
|
133
136
|
- `lower` - Convert to lowercase
|
|
134
137
|
- `upper` - Convert to uppercase
|
|
@@ -143,11 +146,13 @@ Available pipes:
|
|
|
143
146
|
|
|
144
147
|
```json
|
|
145
148
|
{
|
|
146
|
-
"items": [
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
149
|
+
"items": [
|
|
150
|
+
{
|
|
151
|
+
"$": ".item",
|
|
152
|
+
"title": "h2",
|
|
153
|
+
"description": "p"
|
|
154
|
+
}
|
|
155
|
+
]
|
|
151
156
|
}
|
|
152
157
|
```
|
|
153
158
|
|
package/html2json_bg.wasm
CHANGED
|
Binary file
|
package/index.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// Auto-initializing wrapper for convenience
|
|
2
|
+
import _init from "./html2json.js";
|
|
3
|
+
import { extract as _extract, initSync } from "./html2json.js";
|
|
4
|
+
|
|
5
|
+
let initPromise;
|
|
6
|
+
|
|
7
|
+
// Auto-init on first call
|
|
8
|
+
function init() {
|
|
9
|
+
if (!initPromise) {
|
|
10
|
+
initPromise = _init();
|
|
11
|
+
}
|
|
12
|
+
return initPromise;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Export auto-initialized extract (async for first call)
|
|
16
|
+
export async function extract(html, spec_json) {
|
|
17
|
+
await init();
|
|
18
|
+
return _extract(html, spec_json);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Also export init functions for those who want to control timing
|
|
22
|
+
export { init, initSync };
|
|
23
|
+
|
|
24
|
+
// Export raw extract for advanced use (sync, requires manual init)
|
|
25
|
+
export { _extract as extractSync };
|
|
26
|
+
|
|
27
|
+
export default init;
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"Qreta Dev <qretadev@gmail.com>"
|
|
6
6
|
],
|
|
7
7
|
"description": "HTML to JSON extractor",
|
|
8
|
-
"version": "0.5.
|
|
8
|
+
"version": "0.5.7",
|
|
9
9
|
"license": "MIT",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,11 +14,12 @@
|
|
|
14
14
|
"files": [
|
|
15
15
|
"html2json_bg.wasm",
|
|
16
16
|
"html2json.js",
|
|
17
|
-
"html2json.d.ts"
|
|
17
|
+
"html2json.d.ts",
|
|
18
|
+
"index.js"
|
|
18
19
|
],
|
|
19
|
-
"main": "
|
|
20
|
+
"main": "index.js",
|
|
20
21
|
"types": "html2json.d.ts",
|
|
21
22
|
"sideEffects": [
|
|
22
23
|
"./snippets/*"
|
|
23
24
|
]
|
|
24
|
-
}
|
|
25
|
+
}
|