@qretaio/html2json 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/html2json.d.ts CHANGED
@@ -1,11 +1,45 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
 
4
+ /**
5
+ * Extract JSON from HTML using a spec
6
+ *
7
+ * # Arguments
8
+ *
9
+ * * `html` - The HTML source to parse
10
+ * * `spec_json` - The extraction specification as JSON string
11
+ *
12
+ * # Returns
13
+ *
14
+ * A JSON string with the extracted data
15
+ *
16
+ * # Errors
17
+ *
18
+ * Returns a JsValue error if the HTML parsing or extraction fails
19
+ *
20
+ * # Example
21
+ *
22
+ * ```javascript
23
+ * import { extract } from 'html2json';
24
+ *
25
+ * const html = '<div class="item"><span>Price: $25.00</span></div>';
26
+ * const spec = '{"price": ".item span | regex:\\\\$(\\\\d+\\\\.\\\\d+)"}';
27
+ * const result = extract(html, spec);
28
+ * console.log(result); // {"price":"25.00"}
29
+ * ```
30
+ */
31
+ export function extract(html: string, spec_json: string): string;
32
+
4
33
  export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
5
34
 
6
35
  export interface InitOutput {
7
36
  readonly memory: WebAssembly.Memory;
37
+ readonly extract: (a: number, b: number, c: number, d: number) => [number, number, number, number];
8
38
  readonly __wbindgen_externrefs: WebAssembly.Table;
39
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
40
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
41
+ readonly __externref_table_dealloc: (a: number) => void;
42
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
9
43
  readonly __wbindgen_start: () => void;
10
44
  }
11
45
 
package/html2json.js CHANGED
@@ -1,8 +1,66 @@
1
1
  /* @ts-self-types="./html2json.d.ts" */
2
2
 
3
+ /**
4
+ * Extract JSON from HTML using a spec
5
+ *
6
+ * # Arguments
7
+ *
8
+ * * `html` - The HTML source to parse
9
+ * * `spec_json` - The extraction specification as JSON string
10
+ *
11
+ * # Returns
12
+ *
13
+ * A JSON string with the extracted data
14
+ *
15
+ * # Errors
16
+ *
17
+ * Returns a JsValue error if the HTML parsing or extraction fails
18
+ *
19
+ * # Example
20
+ *
21
+ * ```javascript
22
+ * import { extract } from 'html2json';
23
+ *
24
+ * const html = '<div class="item"><span>Price: $25.00</span></div>';
25
+ * const spec = '{"price": ".item span | regex:\\\\$(\\\\d+\\\\.\\\\d+)"}';
26
+ * const result = extract(html, spec);
27
+ * console.log(result); // {"price":"25.00"}
28
+ * ```
29
+ * @param {string} html
30
+ * @param {string} spec_json
31
+ * @returns {string}
32
+ */
33
+ export function extract(html, spec_json) {
34
+ let deferred4_0;
35
+ let deferred4_1;
36
+ try {
37
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
38
+ const len0 = WASM_VECTOR_LEN;
39
+ const ptr1 = passStringToWasm0(spec_json, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
40
+ const len1 = WASM_VECTOR_LEN;
41
+ const ret = wasm.extract(ptr0, len0, ptr1, len1);
42
+ var ptr3 = ret[0];
43
+ var len3 = ret[1];
44
+ if (ret[3]) {
45
+ ptr3 = 0; len3 = 0;
46
+ throw takeFromExternrefTable0(ret[2]);
47
+ }
48
+ deferred4_0 = ptr3;
49
+ deferred4_1 = len3;
50
+ return getStringFromWasm0(ptr3, len3);
51
+ } finally {
52
+ wasm.__wbindgen_free(deferred4_0, deferred4_1, 1);
53
+ }
54
+ }
55
+
3
56
  function __wbg_get_imports() {
4
57
  const import0 = {
5
58
  __proto__: null,
59
+ __wbindgen_cast_0000000000000001: function(arg0, arg1) {
60
+ // Cast intrinsic for `Ref(String) -> Externref`.
61
+ const ret = getStringFromWasm0(arg0, arg1);
62
+ return ret;
63
+ },
6
64
  __wbindgen_init_externref_table: function() {
7
65
  const table = wasm.__wbindgen_externrefs;
8
66
  const offset = table.grow(4);
@@ -19,10 +77,96 @@ function __wbg_get_imports() {
19
77
  };
20
78
  }
21
79
 
80
+ function getStringFromWasm0(ptr, len) {
81
+ ptr = ptr >>> 0;
82
+ return decodeText(ptr, len);
83
+ }
84
+
85
+ let cachedUint8ArrayMemory0 = null;
86
+ function getUint8ArrayMemory0() {
87
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
88
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
89
+ }
90
+ return cachedUint8ArrayMemory0;
91
+ }
92
+
93
+ function passStringToWasm0(arg, malloc, realloc) {
94
+ if (realloc === undefined) {
95
+ const buf = cachedTextEncoder.encode(arg);
96
+ const ptr = malloc(buf.length, 1) >>> 0;
97
+ getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
98
+ WASM_VECTOR_LEN = buf.length;
99
+ return ptr;
100
+ }
101
+
102
+ let len = arg.length;
103
+ let ptr = malloc(len, 1) >>> 0;
104
+
105
+ const mem = getUint8ArrayMemory0();
106
+
107
+ let offset = 0;
108
+
109
+ for (; offset < len; offset++) {
110
+ const code = arg.charCodeAt(offset);
111
+ if (code > 0x7F) break;
112
+ mem[ptr + offset] = code;
113
+ }
114
+ if (offset !== len) {
115
+ if (offset !== 0) {
116
+ arg = arg.slice(offset);
117
+ }
118
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
119
+ const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
120
+ const ret = cachedTextEncoder.encodeInto(arg, view);
121
+
122
+ offset += ret.written;
123
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
124
+ }
125
+
126
+ WASM_VECTOR_LEN = offset;
127
+ return ptr;
128
+ }
129
+
130
+ function takeFromExternrefTable0(idx) {
131
+ const value = wasm.__wbindgen_externrefs.get(idx);
132
+ wasm.__externref_table_dealloc(idx);
133
+ return value;
134
+ }
135
+
136
+ let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
137
+ cachedTextDecoder.decode();
138
+ const MAX_SAFARI_DECODE_BYTES = 2146435072;
139
+ let numBytesDecoded = 0;
140
+ function decodeText(ptr, len) {
141
+ numBytesDecoded += len;
142
+ if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
143
+ cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
144
+ cachedTextDecoder.decode();
145
+ numBytesDecoded = len;
146
+ }
147
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
148
+ }
149
+
150
+ const cachedTextEncoder = new TextEncoder();
151
+
152
+ if (!('encodeInto' in cachedTextEncoder)) {
153
+ cachedTextEncoder.encodeInto = function (arg, view) {
154
+ const buf = cachedTextEncoder.encode(arg);
155
+ view.set(buf);
156
+ return {
157
+ read: arg.length,
158
+ written: buf.length
159
+ };
160
+ };
161
+ }
162
+
163
+ let WASM_VECTOR_LEN = 0;
164
+
22
165
  let wasmModule, wasm;
23
166
  function __wbg_finalize_init(instance, module) {
24
167
  wasm = instance.exports;
25
168
  wasmModule = module;
169
+ cachedUint8ArrayMemory0 = null;
26
170
  wasm.__wbindgen_start();
27
171
  return wasm;
28
172
  }
package/html2json_bg.wasm CHANGED
Binary file
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "Qreta Dev <qretadev@gmail.com>"
6
6
  ],
7
7
  "description": "HTML to JSON extractor",
8
- "version": "0.5.4",
8
+ "version": "0.5.5",
9
9
  "license": "MIT",
10
10
  "repository": {
11
11
  "type": "git",