@qretaio/html2json 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/html2json.d.ts +34 -0
- package/html2json.js +144 -0
- package/html2json_bg.wasm +0 -0
- package/package.json +1 -1
package/html2json.d.ts
CHANGED
|
@@ -1,11 +1,45 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
|
|
4
|
+
/**
|
|
5
|
+
* Extract JSON from HTML using a spec
|
|
6
|
+
*
|
|
7
|
+
* # Arguments
|
|
8
|
+
*
|
|
9
|
+
* * `html` - The HTML source to parse
|
|
10
|
+
* * `spec_json` - The extraction specification as JSON string
|
|
11
|
+
*
|
|
12
|
+
* # Returns
|
|
13
|
+
*
|
|
14
|
+
* A JSON string with the extracted data
|
|
15
|
+
*
|
|
16
|
+
* # Errors
|
|
17
|
+
*
|
|
18
|
+
* Returns a JsValue error if the HTML parsing or extraction fails
|
|
19
|
+
*
|
|
20
|
+
* # Example
|
|
21
|
+
*
|
|
22
|
+
* ```javascript
|
|
23
|
+
* import { extract } from 'html2json';
|
|
24
|
+
*
|
|
25
|
+
* const html = '<div class="item"><span>Price: $25.00</span></div>';
|
|
26
|
+
* const spec = '{"price": ".item span | regex:\\\\$(\\\\d+\\\\.\\\\d+)"}';
|
|
27
|
+
* const result = extract(html, spec);
|
|
28
|
+
* console.log(result); // {"price":"25.00"}
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
export function extract(html: string, spec_json: string): string;
|
|
32
|
+
|
|
4
33
|
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
|
5
34
|
|
|
6
35
|
export interface InitOutput {
|
|
7
36
|
readonly memory: WebAssembly.Memory;
|
|
37
|
+
readonly extract: (a: number, b: number, c: number, d: number) => [number, number, number, number];
|
|
8
38
|
readonly __wbindgen_externrefs: WebAssembly.Table;
|
|
39
|
+
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
|
40
|
+
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
|
41
|
+
readonly __externref_table_dealloc: (a: number) => void;
|
|
42
|
+
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
9
43
|
readonly __wbindgen_start: () => void;
|
|
10
44
|
}
|
|
11
45
|
|
package/html2json.js
CHANGED
|
@@ -1,8 +1,66 @@
|
|
|
1
1
|
/* @ts-self-types="./html2json.d.ts" */
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Extract JSON from HTML using a spec
|
|
5
|
+
*
|
|
6
|
+
* # Arguments
|
|
7
|
+
*
|
|
8
|
+
* * `html` - The HTML source to parse
|
|
9
|
+
* * `spec_json` - The extraction specification as JSON string
|
|
10
|
+
*
|
|
11
|
+
* # Returns
|
|
12
|
+
*
|
|
13
|
+
* A JSON string with the extracted data
|
|
14
|
+
*
|
|
15
|
+
* # Errors
|
|
16
|
+
*
|
|
17
|
+
* Returns a JsValue error if the HTML parsing or extraction fails
|
|
18
|
+
*
|
|
19
|
+
* # Example
|
|
20
|
+
*
|
|
21
|
+
* ```javascript
|
|
22
|
+
* import { extract } from 'html2json';
|
|
23
|
+
*
|
|
24
|
+
* const html = '<div class="item"><span>Price: $25.00</span></div>';
|
|
25
|
+
* const spec = '{"price": ".item span | regex:\\\\$(\\\\d+\\\\.\\\\d+)"}';
|
|
26
|
+
* const result = extract(html, spec);
|
|
27
|
+
* console.log(result); // {"price":"25.00"}
|
|
28
|
+
* ```
|
|
29
|
+
* @param {string} html
|
|
30
|
+
* @param {string} spec_json
|
|
31
|
+
* @returns {string}
|
|
32
|
+
*/
|
|
33
|
+
export function extract(html, spec_json) {
|
|
34
|
+
let deferred4_0;
|
|
35
|
+
let deferred4_1;
|
|
36
|
+
try {
|
|
37
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
38
|
+
const len0 = WASM_VECTOR_LEN;
|
|
39
|
+
const ptr1 = passStringToWasm0(spec_json, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
40
|
+
const len1 = WASM_VECTOR_LEN;
|
|
41
|
+
const ret = wasm.extract(ptr0, len0, ptr1, len1);
|
|
42
|
+
var ptr3 = ret[0];
|
|
43
|
+
var len3 = ret[1];
|
|
44
|
+
if (ret[3]) {
|
|
45
|
+
ptr3 = 0; len3 = 0;
|
|
46
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
47
|
+
}
|
|
48
|
+
deferred4_0 = ptr3;
|
|
49
|
+
deferred4_1 = len3;
|
|
50
|
+
return getStringFromWasm0(ptr3, len3);
|
|
51
|
+
} finally {
|
|
52
|
+
wasm.__wbindgen_free(deferred4_0, deferred4_1, 1);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
3
56
|
function __wbg_get_imports() {
|
|
4
57
|
const import0 = {
|
|
5
58
|
__proto__: null,
|
|
59
|
+
__wbindgen_cast_0000000000000001: function(arg0, arg1) {
|
|
60
|
+
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
61
|
+
const ret = getStringFromWasm0(arg0, arg1);
|
|
62
|
+
return ret;
|
|
63
|
+
},
|
|
6
64
|
__wbindgen_init_externref_table: function() {
|
|
7
65
|
const table = wasm.__wbindgen_externrefs;
|
|
8
66
|
const offset = table.grow(4);
|
|
@@ -19,10 +77,96 @@ function __wbg_get_imports() {
|
|
|
19
77
|
};
|
|
20
78
|
}
|
|
21
79
|
|
|
80
|
+
function getStringFromWasm0(ptr, len) {
|
|
81
|
+
ptr = ptr >>> 0;
|
|
82
|
+
return decodeText(ptr, len);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
let cachedUint8ArrayMemory0 = null;
|
|
86
|
+
function getUint8ArrayMemory0() {
|
|
87
|
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
|
88
|
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
|
89
|
+
}
|
|
90
|
+
return cachedUint8ArrayMemory0;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function passStringToWasm0(arg, malloc, realloc) {
|
|
94
|
+
if (realloc === undefined) {
|
|
95
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
96
|
+
const ptr = malloc(buf.length, 1) >>> 0;
|
|
97
|
+
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
|
98
|
+
WASM_VECTOR_LEN = buf.length;
|
|
99
|
+
return ptr;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let len = arg.length;
|
|
103
|
+
let ptr = malloc(len, 1) >>> 0;
|
|
104
|
+
|
|
105
|
+
const mem = getUint8ArrayMemory0();
|
|
106
|
+
|
|
107
|
+
let offset = 0;
|
|
108
|
+
|
|
109
|
+
for (; offset < len; offset++) {
|
|
110
|
+
const code = arg.charCodeAt(offset);
|
|
111
|
+
if (code > 0x7F) break;
|
|
112
|
+
mem[ptr + offset] = code;
|
|
113
|
+
}
|
|
114
|
+
if (offset !== len) {
|
|
115
|
+
if (offset !== 0) {
|
|
116
|
+
arg = arg.slice(offset);
|
|
117
|
+
}
|
|
118
|
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
|
119
|
+
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
|
120
|
+
const ret = cachedTextEncoder.encodeInto(arg, view);
|
|
121
|
+
|
|
122
|
+
offset += ret.written;
|
|
123
|
+
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
WASM_VECTOR_LEN = offset;
|
|
127
|
+
return ptr;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function takeFromExternrefTable0(idx) {
|
|
131
|
+
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
132
|
+
wasm.__externref_table_dealloc(idx);
|
|
133
|
+
return value;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
137
|
+
cachedTextDecoder.decode();
|
|
138
|
+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
|
|
139
|
+
let numBytesDecoded = 0;
|
|
140
|
+
function decodeText(ptr, len) {
|
|
141
|
+
numBytesDecoded += len;
|
|
142
|
+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
|
|
143
|
+
cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
144
|
+
cachedTextDecoder.decode();
|
|
145
|
+
numBytesDecoded = len;
|
|
146
|
+
}
|
|
147
|
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const cachedTextEncoder = new TextEncoder();
|
|
151
|
+
|
|
152
|
+
if (!('encodeInto' in cachedTextEncoder)) {
|
|
153
|
+
cachedTextEncoder.encodeInto = function (arg, view) {
|
|
154
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
155
|
+
view.set(buf);
|
|
156
|
+
return {
|
|
157
|
+
read: arg.length,
|
|
158
|
+
written: buf.length
|
|
159
|
+
};
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
let WASM_VECTOR_LEN = 0;
|
|
164
|
+
|
|
22
165
|
let wasmModule, wasm;
|
|
23
166
|
function __wbg_finalize_init(instance, module) {
|
|
24
167
|
wasm = instance.exports;
|
|
25
168
|
wasmModule = module;
|
|
169
|
+
cachedUint8ArrayMemory0 = null;
|
|
26
170
|
wasm.__wbindgen_start();
|
|
27
171
|
return wasm;
|
|
28
172
|
}
|
package/html2json_bg.wasm
CHANGED
|
Binary file
|