@fast-scrape/wasm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/scrape_wasm.js ADDED
@@ -0,0 +1,1107 @@
1
+ /* @ts-self-types="./scrape_wasm.d.ts" */
2
+
3
+ /**
4
+ * A parsed HTML document.
5
+ *
6
+ * `Soup` is the main entry point for parsing and querying HTML documents.
7
+ * It provides methods for finding elements by CSS selector.
8
+ *
9
+ * @example
10
+ * ```javascript
11
+ * import init, { Soup } from '@scrape-rs/wasm';
12
+ *
13
+ * await init();
14
+ *
15
+ * const soup = new Soup("<div class='item'>Hello</div>");
16
+ * const div = soup.find("div.item");
17
+ * console.log(div.text); // "Hello"
18
+ * ```
19
+ */
20
+ export class Soup {
21
+ static __wrap(ptr) {
22
+ ptr = ptr >>> 0;
23
+ const obj = Object.create(Soup.prototype);
24
+ obj.__wbg_ptr = ptr;
25
+ SoupFinalization.register(obj, obj.__wbg_ptr, obj);
26
+ return obj;
27
+ }
28
+ __destroy_into_raw() {
29
+ const ptr = this.__wbg_ptr;
30
+ this.__wbg_ptr = 0;
31
+ SoupFinalization.unregister(this);
32
+ return ptr;
33
+ }
34
+ free() {
35
+ const ptr = this.__destroy_into_raw();
36
+ wasm.__wbg_soup_free(ptr, 0);
37
+ }
38
+ /**
39
+ * Finds the first element matching a CSS selector.
40
+ *
41
+ * @param selector - CSS selector string
42
+ * @returns The first matching Tag, or undefined if not found
43
+ * @throws Error if the selector syntax is invalid
44
+ * @param {string} selector
45
+ * @returns {Tag | undefined}
46
+ */
47
+ find(selector) {
48
+ try {
49
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
50
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
51
+ const len0 = WASM_VECTOR_LEN;
52
+ wasm.soup_find(retptr, this.__wbg_ptr, ptr0, len0);
53
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
54
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
55
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
56
+ if (r2) {
57
+ throw takeObject(r1);
58
+ }
59
+ return r0 === 0 ? undefined : Tag.__wrap(r0);
60
+ } finally {
61
+ wasm.__wbindgen_add_to_stack_pointer(16);
62
+ }
63
+ }
64
+ /**
65
+ * Finds all elements matching a CSS selector.
66
+ *
67
+ * @param selector - CSS selector string
68
+ * @returns Array of matching Tag instances
69
+ * @throws Error if the selector syntax is invalid
70
+ * @param {string} selector
71
+ * @returns {Tag[]}
72
+ */
73
+ findAll(selector) {
74
+ try {
75
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
76
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
77
+ const len0 = WASM_VECTOR_LEN;
78
+ wasm.soup_findAll(retptr, this.__wbg_ptr, ptr0, len0);
79
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
80
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
81
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
82
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
83
+ if (r3) {
84
+ throw takeObject(r2);
85
+ }
86
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
87
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
88
+ return v2;
89
+ } finally {
90
+ wasm.__wbindgen_add_to_stack_pointer(16);
91
+ }
92
+ }
93
+ /**
94
+ * Get the number of nodes in the document.
95
+ * @returns {number}
96
+ */
97
+ get length() {
98
+ const ret = wasm.soup_length(this.__wbg_ptr);
99
+ return ret >>> 0;
100
+ }
101
+ /**
102
+ * Parses an HTML string into a Soup document.
103
+ *
104
+ * @param html - The HTML string to parse
105
+ * @param config - Optional configuration options
106
+ * @param {string} html
107
+ * @param {SoupConfig | null} [config]
108
+ */
109
+ constructor(html, config) {
110
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
111
+ const len0 = WASM_VECTOR_LEN;
112
+ let ptr1 = 0;
113
+ if (!isLikeNone(config)) {
114
+ _assertClass(config, SoupConfig);
115
+ ptr1 = config.__destroy_into_raw();
116
+ }
117
+ const ret = wasm.soup_new(ptr0, len0, ptr1);
118
+ this.__wbg_ptr = ret >>> 0;
119
+ SoupFinalization.register(this, this.__wbg_ptr, this);
120
+ return this;
121
+ }
122
+ /**
123
+ * Get the root element of the document.
124
+ *
125
+ * @returns The root Tag (usually <html>), or undefined for empty documents
126
+ * @returns {Tag | undefined}
127
+ */
128
+ get root() {
129
+ const ret = wasm.soup_root(this.__wbg_ptr);
130
+ return ret === 0 ? undefined : Tag.__wrap(ret);
131
+ }
132
+ /**
133
+ * Finds all elements matching a CSS selector (alias for findAll).
134
+ *
135
+ * @param selector - CSS selector string
136
+ * @returns Array of matching Tag instances
137
+ * @param {string} selector
138
+ * @returns {Tag[]}
139
+ */
140
+ select(selector) {
141
+ try {
142
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
143
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
144
+ const len0 = WASM_VECTOR_LEN;
145
+ wasm.soup_select(retptr, this.__wbg_ptr, ptr0, len0);
146
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
147
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
148
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
149
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
150
+ if (r3) {
151
+ throw takeObject(r2);
152
+ }
153
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
154
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
155
+ return v2;
156
+ } finally {
157
+ wasm.__wbindgen_add_to_stack_pointer(16);
158
+ }
159
+ }
160
+ /**
161
+ * Get the text content of the entire document.
162
+ *
163
+ * @returns All text content with HTML tags stripped
164
+ * @returns {string}
165
+ */
166
+ get text() {
167
+ let deferred1_0;
168
+ let deferred1_1;
169
+ try {
170
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
171
+ wasm.soup_text(retptr, this.__wbg_ptr);
172
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
173
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
174
+ deferred1_0 = r0;
175
+ deferred1_1 = r1;
176
+ return getStringFromWasm0(r0, r1);
177
+ } finally {
178
+ wasm.__wbindgen_add_to_stack_pointer(16);
179
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
180
+ }
181
+ }
182
+ /**
183
+ * Get the document title.
184
+ *
185
+ * @returns The title text, or undefined if no <title> element exists
186
+ * @returns {string | undefined}
187
+ */
188
+ get title() {
189
+ try {
190
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
191
+ wasm.soup_title(retptr, this.__wbg_ptr);
192
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
193
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
194
+ let v1;
195
+ if (r0 !== 0) {
196
+ v1 = getStringFromWasm0(r0, r1).slice();
197
+ wasm.__wbindgen_export3(r0, r1 * 1, 1);
198
+ }
199
+ return v1;
200
+ } finally {
201
+ wasm.__wbindgen_add_to_stack_pointer(16);
202
+ }
203
+ }
204
+ /**
205
+ * Get the HTML representation of the document.
206
+ *
207
+ * @returns The document as an HTML string
208
+ * @returns {string}
209
+ */
210
+ toHtml() {
211
+ let deferred1_0;
212
+ let deferred1_1;
213
+ try {
214
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
215
+ wasm.soup_toHtml(retptr, this.__wbg_ptr);
216
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
217
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
218
+ deferred1_0 = r0;
219
+ deferred1_1 = r1;
220
+ return getStringFromWasm0(r0, r1);
221
+ } finally {
222
+ wasm.__wbindgen_add_to_stack_pointer(16);
223
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
224
+ }
225
+ }
226
+ }
227
+ if (Symbol.dispose) Soup.prototype[Symbol.dispose] = Soup.prototype.free;
228
+
229
+ /**
230
+ * Configuration options for HTML parsing.
231
+ *
232
+ * All options have sensible defaults. Use setters to customize behavior.
233
+ *
234
+ * @example
235
+ * ```javascript
236
+ * const config = new SoupConfig();
237
+ * config.maxDepth = 256;
238
+ * config.strictMode = true;
239
+ * const soup = new Soup("<div>Hello</div>", config);
240
+ * ```
241
+ */
242
+ export class SoupConfig {
243
+ __destroy_into_raw() {
244
+ const ptr = this.__wbg_ptr;
245
+ this.__wbg_ptr = 0;
246
+ SoupConfigFinalization.unregister(this);
247
+ return ptr;
248
+ }
249
+ free() {
250
+ const ptr = this.__destroy_into_raw();
251
+ wasm.__wbg_soupconfig_free(ptr, 0);
252
+ }
253
+ /**
254
+ * Include comment nodes in DOM.
255
+ * @returns {boolean}
256
+ */
257
+ get includeComments() {
258
+ const ret = wasm.soupconfig_includeComments(this.__wbg_ptr);
259
+ return ret !== 0;
260
+ }
261
+ /**
262
+ * Maximum nesting depth for DOM tree.
263
+ * @returns {number}
264
+ */
265
+ get maxDepth() {
266
+ const ret = wasm.soupconfig_maxDepth(this.__wbg_ptr);
267
+ return ret >>> 0;
268
+ }
269
+ /**
270
+ * Creates a new configuration with default values.
271
+ *
272
+ * Default values:
273
+ * - maxDepth: 512
274
+ * - strictMode: false
275
+ * - preserveWhitespace: false
276
+ * - includeComments: false
277
+ */
278
+ constructor() {
279
+ const ret = wasm.soupconfig_new();
280
+ this.__wbg_ptr = ret >>> 0;
281
+ SoupConfigFinalization.register(this, this.__wbg_ptr, this);
282
+ return this;
283
+ }
284
+ /**
285
+ * Preserve whitespace-only text nodes.
286
+ * @returns {boolean}
287
+ */
288
+ get preserveWhitespace() {
289
+ const ret = wasm.soupconfig_preserveWhitespace(this.__wbg_ptr);
290
+ return ret !== 0;
291
+ }
292
+ /**
293
+ * Sets comment inclusion.
294
+ * @param {boolean} value
295
+ */
296
+ set includeComments(value) {
297
+ wasm.soupconfig_set_includeComments(this.__wbg_ptr, value);
298
+ }
299
+ /**
300
+ * Sets the maximum nesting depth.
301
+ * @param {number} value
302
+ */
303
+ set maxDepth(value) {
304
+ wasm.soupconfig_set_maxDepth(this.__wbg_ptr, value);
305
+ }
306
+ /**
307
+ * Sets whitespace preservation.
308
+ * @param {boolean} value
309
+ */
310
+ set preserveWhitespace(value) {
311
+ wasm.soupconfig_set_preserveWhitespace(this.__wbg_ptr, value);
312
+ }
313
+ /**
314
+ * Sets strict parsing mode.
315
+ * @param {boolean} value
316
+ */
317
+ set strictMode(value) {
318
+ wasm.soupconfig_set_strictMode(this.__wbg_ptr, value);
319
+ }
320
+ /**
321
+ * Enable strict parsing mode (fail on malformed HTML).
322
+ * @returns {boolean}
323
+ */
324
+ get strictMode() {
325
+ const ret = wasm.soupconfig_strictMode(this.__wbg_ptr);
326
+ return ret !== 0;
327
+ }
328
+ }
329
+ if (Symbol.dispose) SoupConfig.prototype[Symbol.dispose] = SoupConfig.prototype.free;
330
+
331
+ /**
332
+ * An HTML element in the DOM tree.
333
+ *
334
+ * Provides access to element content, attributes, and tree navigation.
335
+ *
336
+ * @example
337
+ * ```javascript
338
+ * const soup = new Soup('<div class="test">Hello</div>');
339
+ * const div = soup.find("div");
340
+ * console.log(div.name); // "div"
341
+ * console.log(div.text); // "Hello"
342
+ * console.log(div.attr("class")); // "test"
343
+ * ```
344
+ */
345
+ export class Tag {
346
+ static __wrap(ptr) {
347
+ ptr = ptr >>> 0;
348
+ const obj = Object.create(Tag.prototype);
349
+ obj.__wbg_ptr = ptr;
350
+ TagFinalization.register(obj, obj.__wbg_ptr, obj);
351
+ return obj;
352
+ }
353
+ __destroy_into_raw() {
354
+ const ptr = this.__wbg_ptr;
355
+ this.__wbg_ptr = 0;
356
+ TagFinalization.unregister(this);
357
+ return ptr;
358
+ }
359
+ free() {
360
+ const ptr = this.__destroy_into_raw();
361
+ wasm.__wbg_tag_free(ptr, 0);
362
+ }
363
+ /**
364
+ * Get an attribute value by name (alias for get).
365
+ *
366
+ * @param name - The attribute name
367
+ * @returns The attribute value, or undefined if not present
368
+ * @param {string} name
369
+ * @returns {string | undefined}
370
+ */
371
+ attr(name) {
372
+ try {
373
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
374
+ const ptr0 = passStringToWasm0(name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
375
+ const len0 = WASM_VECTOR_LEN;
376
+ wasm.tag_attr(retptr, this.__wbg_ptr, ptr0, len0);
377
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
378
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
379
+ let v2;
380
+ if (r0 !== 0) {
381
+ v2 = getStringFromWasm0(r0, r1).slice();
382
+ wasm.__wbindgen_export3(r0, r1 * 1, 1);
383
+ }
384
+ return v2;
385
+ } finally {
386
+ wasm.__wbindgen_add_to_stack_pointer(16);
387
+ }
388
+ }
389
+ /**
390
+ * Get all attributes as an object.
391
+ * @returns {object}
392
+ */
393
+ get attrs() {
394
+ const ret = wasm.tag_attrs(this.__wbg_ptr);
395
+ return takeObject(ret);
396
+ }
397
+ /**
398
+ * Get all direct child elements.
399
+ * @returns {Tag[]}
400
+ */
401
+ get children() {
402
+ try {
403
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
404
+ wasm.tag_children(retptr, this.__wbg_ptr);
405
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
406
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
407
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
408
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
409
+ return v1;
410
+ } finally {
411
+ wasm.__wbindgen_add_to_stack_pointer(16);
412
+ }
413
+ }
414
+ /**
415
+ * Get all classes as an array.
416
+ * @returns {string[]}
417
+ */
418
+ get classes() {
419
+ try {
420
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
421
+ wasm.tag_classes(retptr, this.__wbg_ptr);
422
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
423
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
424
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
425
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
426
+ return v1;
427
+ } finally {
428
+ wasm.__wbindgen_add_to_stack_pointer(16);
429
+ }
430
+ }
431
+ /**
432
+ * Get all descendant elements.
433
+ * @returns {Tag[]}
434
+ */
435
+ get descendants() {
436
+ try {
437
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
438
+ wasm.tag_descendants(retptr, this.__wbg_ptr);
439
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
440
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
441
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
442
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
443
+ return v1;
444
+ } finally {
445
+ wasm.__wbindgen_add_to_stack_pointer(16);
446
+ }
447
+ }
448
+ /**
449
+ * Find the first descendant matching a CSS selector.
450
+ *
451
+ * @param selector - CSS selector string
452
+ * @returns The first matching Tag, or undefined if not found
453
+ * @throws Error if the selector syntax is invalid
454
+ * @param {string} selector
455
+ * @returns {Tag | undefined}
456
+ */
457
+ find(selector) {
458
+ try {
459
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
460
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
461
+ const len0 = WASM_VECTOR_LEN;
462
+ wasm.tag_find(retptr, this.__wbg_ptr, ptr0, len0);
463
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
464
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
465
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
466
+ if (r2) {
467
+ throw takeObject(r1);
468
+ }
469
+ return r0 === 0 ? undefined : Tag.__wrap(r0);
470
+ } finally {
471
+ wasm.__wbindgen_add_to_stack_pointer(16);
472
+ }
473
+ }
474
+ /**
475
+ * Find all descendants matching a CSS selector.
476
+ *
477
+ * @param selector - CSS selector string
478
+ * @returns Array of matching Tag instances
479
+ * @throws Error if the selector syntax is invalid
480
+ * @param {string} selector
481
+ * @returns {Tag[]}
482
+ */
483
+ findAll(selector) {
484
+ try {
485
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
486
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
487
+ const len0 = WASM_VECTOR_LEN;
488
+ wasm.tag_findAll(retptr, this.__wbg_ptr, ptr0, len0);
489
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
490
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
491
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
492
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
493
+ if (r3) {
494
+ throw takeObject(r2);
495
+ }
496
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
497
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
498
+ return v2;
499
+ } finally {
500
+ wasm.__wbindgen_add_to_stack_pointer(16);
501
+ }
502
+ }
503
+ /**
504
+ * Get an attribute value by name.
505
+ *
506
+ * @param name - The attribute name
507
+ * @returns The attribute value, or undefined if not present
508
+ * @param {string} name
509
+ * @returns {string | undefined}
510
+ */
511
+ get(name) {
512
+ try {
513
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
514
+ const ptr0 = passStringToWasm0(name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
515
+ const len0 = WASM_VECTOR_LEN;
516
+ wasm.tag_get(retptr, this.__wbg_ptr, ptr0, len0);
517
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
518
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
519
+ let v2;
520
+ if (r0 !== 0) {
521
+ v2 = getStringFromWasm0(r0, r1).slice();
522
+ wasm.__wbindgen_export3(r0, r1 * 1, 1);
523
+ }
524
+ return v2;
525
+ } finally {
526
+ wasm.__wbindgen_add_to_stack_pointer(16);
527
+ }
528
+ }
529
+ /**
530
+ * Check if the element has an attribute.
531
+ *
532
+ * @param name - The attribute name
533
+ * @returns True if the attribute exists
534
+ * @param {string} name
535
+ * @returns {boolean}
536
+ */
537
+ hasAttr(name) {
538
+ const ptr0 = passStringToWasm0(name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
539
+ const len0 = WASM_VECTOR_LEN;
540
+ const ret = wasm.tag_hasAttr(this.__wbg_ptr, ptr0, len0);
541
+ return ret !== 0;
542
+ }
543
+ /**
544
+ * Check if the element has a specific class.
545
+ *
546
+ * @param className - The class name to check
547
+ * @returns True if the element has the class
548
+ * @param {string} class_name
549
+ * @returns {boolean}
550
+ */
551
+ hasClass(class_name) {
552
+ const ptr0 = passStringToWasm0(class_name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
553
+ const len0 = WASM_VECTOR_LEN;
554
+ const ret = wasm.tag_hasClass(this.__wbg_ptr, ptr0, len0);
555
+ return ret !== 0;
556
+ }
557
+ /**
558
+ * Get the inner HTML content (excluding this element's tags).
559
+ * @returns {string}
560
+ */
561
+ get innerHTML() {
562
+ let deferred1_0;
563
+ let deferred1_1;
564
+ try {
565
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
566
+ wasm.tag_innerHTML(retptr, this.__wbg_ptr);
567
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
568
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
569
+ deferred1_0 = r0;
570
+ deferred1_1 = r1;
571
+ return getStringFromWasm0(r0, r1);
572
+ } finally {
573
+ wasm.__wbindgen_add_to_stack_pointer(16);
574
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
575
+ }
576
+ }
577
+ /**
578
+ * Get the number of direct child elements.
579
+ * @returns {number}
580
+ */
581
+ get length() {
582
+ const ret = wasm.tag_length(this.__wbg_ptr);
583
+ return ret >>> 0;
584
+ }
585
+ /**
586
+ * Get the tag name (e.g., "div", "span").
587
+ * @returns {string | undefined}
588
+ */
589
+ get name() {
590
+ try {
591
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
592
+ wasm.tag_name(retptr, this.__wbg_ptr);
593
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
594
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
595
+ let v1;
596
+ if (r0 !== 0) {
597
+ v1 = getStringFromWasm0(r0, r1).slice();
598
+ wasm.__wbindgen_export3(r0, r1 * 1, 1);
599
+ }
600
+ return v1;
601
+ } finally {
602
+ wasm.__wbindgen_add_to_stack_pointer(16);
603
+ }
604
+ }
605
+ /**
606
+ * Get the next sibling element.
607
+ * @returns {Tag | undefined}
608
+ */
609
+ get nextSibling() {
610
+ const ret = wasm.tag_nextSibling(this.__wbg_ptr);
611
+ return ret === 0 ? undefined : Tag.__wrap(ret);
612
+ }
613
+ /**
614
+ * Get the outer HTML (including this element's tags).
615
+ * @returns {string}
616
+ */
617
+ get outerHTML() {
618
+ let deferred1_0;
619
+ let deferred1_1;
620
+ try {
621
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
622
+ wasm.tag_outerHTML(retptr, this.__wbg_ptr);
623
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
624
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
625
+ deferred1_0 = r0;
626
+ deferred1_1 = r1;
627
+ return getStringFromWasm0(r0, r1);
628
+ } finally {
629
+ wasm.__wbindgen_add_to_stack_pointer(16);
630
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
631
+ }
632
+ }
633
+ /**
634
+ * Get the parent element.
635
+ * @returns {Tag | undefined}
636
+ */
637
+ get parent() {
638
+ const ret = wasm.tag_parent(this.__wbg_ptr);
639
+ return ret === 0 ? undefined : Tag.__wrap(ret);
640
+ }
641
+ /**
642
+ * Get the previous sibling element.
643
+ * @returns {Tag | undefined}
644
+ */
645
+ get prevSibling() {
646
+ const ret = wasm.tag_prevSibling(this.__wbg_ptr);
647
+ return ret === 0 ? undefined : Tag.__wrap(ret);
648
+ }
649
+ /**
650
+ * Find all descendants matching a CSS selector (alias for findAll).
651
+ *
652
+ * @param selector - CSS selector string
653
+ * @returns Array of matching Tag instances
654
+ * @param {string} selector
655
+ * @returns {Tag[]}
656
+ */
657
+ select(selector) {
658
+ try {
659
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
660
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
661
+ const len0 = WASM_VECTOR_LEN;
662
+ wasm.tag_select(retptr, this.__wbg_ptr, ptr0, len0);
663
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
664
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
665
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
666
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
667
+ if (r3) {
668
+ throw takeObject(r2);
669
+ }
670
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
671
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
672
+ return v2;
673
+ } finally {
674
+ wasm.__wbindgen_add_to_stack_pointer(16);
675
+ }
676
+ }
677
+ /**
678
+ * Get the text content of this element and all descendants.
679
+ * @returns {string}
680
+ */
681
+ get text() {
682
+ let deferred1_0;
683
+ let deferred1_1;
684
+ try {
685
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
686
+ wasm.tag_text(retptr, this.__wbg_ptr);
687
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
688
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
689
+ deferred1_0 = r0;
690
+ deferred1_1 = r1;
691
+ return getStringFromWasm0(r0, r1);
692
+ } finally {
693
+ wasm.__wbindgen_add_to_stack_pointer(16);
694
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
695
+ }
696
+ }
697
+ }
698
+ if (Symbol.dispose) Tag.prototype[Symbol.dispose] = Tag.prototype.free;
699
+
700
+ /**
701
+ * Check if WASM SIMD is supported in the current environment.
702
+ *
703
+ * Returns true if the module was compiled with SIMD support and
704
+ * is running on a platform that supports SIMD128 instructions.
705
+ *
706
+ * SIMD support requires:
707
+ * - Chrome 91+ / Firefox 89+ / Safari 16.4+
708
+ * - Module built with RUSTFLAGS='-C target-feature=+simd128'
709
+ * @returns {boolean}
710
+ */
711
+ export function hasSimdSupport() {
712
+ const ret = wasm.hasSimdSupport();
713
+ return ret !== 0;
714
+ }
715
+
716
+ /**
717
+ * Initialize the WASM module.
718
+ *
719
+ * Sets up panic hook for better error messages in browser console.
720
+ * This is called automatically when the module is loaded.
721
+ */
722
+ export function init() {
723
+ wasm.init();
724
+ }
725
+
726
+ /**
727
+ * Parse multiple HTML documents.
728
+ *
729
+ * Note: WASM does not support threads, so this processes documents sequentially.
730
+ * For parallel processing in browsers, use Web Workers with separate WASM instances.
731
+ *
732
+ * @param documents - Array of HTML strings to parse
733
+ * @returns Array of Soup documents
734
+ *
735
+ * @example
736
+ * ```javascript
737
+ * const soups = parseBatch(['<div>A</div>', '<div>B</div>']);
738
+ * console.log(soups.length); // 2
739
+ * ```
740
+ * @param {string[]} documents
741
+ * @returns {Soup[]}
742
+ */
743
+ export function parseBatch(documents) {
744
+ try {
745
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
746
+ const ptr0 = passArrayJsValueToWasm0(documents, wasm.__wbindgen_export);
747
+ const len0 = WASM_VECTOR_LEN;
748
+ wasm.parseBatch(retptr, ptr0, len0);
749
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
750
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
751
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
752
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
753
+ return v2;
754
+ } finally {
755
+ wasm.__wbindgen_add_to_stack_pointer(16);
756
+ }
757
+ }
758
+
759
+ /**
760
+ * Get the library version.
761
+ *
762
+ * @returns Version string (e.g., "0.1.0")
763
+ * @returns {string}
764
+ */
765
+ export function version() {
766
+ let deferred1_0;
767
+ let deferred1_1;
768
+ try {
769
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
770
+ wasm.version(retptr);
771
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
772
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
773
+ deferred1_0 = r0;
774
+ deferred1_1 = r1;
775
+ return getStringFromWasm0(r0, r1);
776
+ } finally {
777
+ wasm.__wbindgen_add_to_stack_pointer(16);
778
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
779
+ }
780
+ }
781
+
782
+ function __wbg_get_imports() {
783
+ const import0 = {
784
+ __proto__: null,
785
+ __wbg_Error_8c4e43fe74559d73: function(arg0, arg1) {
786
+ const ret = Error(getStringFromWasm0(arg0, arg1));
787
+ return addHeapObject(ret);
788
+ },
789
+ __wbg___wbindgen_string_get_72fb696202c56729: function(arg0, arg1) {
790
+ const obj = getObject(arg1);
791
+ const ret = typeof(obj) === 'string' ? obj : undefined;
792
+ var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2);
793
+ var len1 = WASM_VECTOR_LEN;
794
+ getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
795
+ getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
796
+ },
797
+ __wbg___wbindgen_throw_be289d5034ed271b: function(arg0, arg1) {
798
+ throw new Error(getStringFromWasm0(arg0, arg1));
799
+ },
800
+ __wbg_error_7534b8e9a36f1ab4: function(arg0, arg1) {
801
+ let deferred0_0;
802
+ let deferred0_1;
803
+ try {
804
+ deferred0_0 = arg0;
805
+ deferred0_1 = arg1;
806
+ console.error(getStringFromWasm0(arg0, arg1));
807
+ } finally {
808
+ wasm.__wbindgen_export3(deferred0_0, deferred0_1, 1);
809
+ }
810
+ },
811
+ __wbg_new_361308b2356cecd0: function() {
812
+ const ret = new Object();
813
+ return addHeapObject(ret);
814
+ },
815
+ __wbg_new_8a6f238a6ece86ea: function() {
816
+ const ret = new Error();
817
+ return addHeapObject(ret);
818
+ },
819
+ __wbg_set_6cb8631f80447a67: function() { return handleError(function (arg0, arg1, arg2) {
820
+ const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
821
+ return ret;
822
+ }, arguments); },
823
+ __wbg_soup_new: function(arg0) {
824
+ const ret = Soup.__wrap(arg0);
825
+ return addHeapObject(ret);
826
+ },
827
+ __wbg_stack_0ed75d68575b0f3c: function(arg0, arg1) {
828
+ const ret = getObject(arg1).stack;
829
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2);
830
+ const len1 = WASM_VECTOR_LEN;
831
+ getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
832
+ getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
833
+ },
834
+ __wbg_tag_new: function(arg0) {
835
+ const ret = Tag.__wrap(arg0);
836
+ return addHeapObject(ret);
837
+ },
838
+ __wbindgen_cast_0000000000000001: function(arg0, arg1) {
839
+ // Cast intrinsic for `Ref(String) -> Externref`.
840
+ const ret = getStringFromWasm0(arg0, arg1);
841
+ return addHeapObject(ret);
842
+ },
843
+ __wbindgen_object_drop_ref: function(arg0) {
844
+ takeObject(arg0);
845
+ },
846
+ };
847
+ return {
848
+ __proto__: null,
849
+ "./scrape_wasm_bg.js": import0,
850
+ };
851
+ }
852
+
853
+ const SoupFinalization = (typeof FinalizationRegistry === 'undefined')
854
+ ? { register: () => {}, unregister: () => {} }
855
+ : new FinalizationRegistry(ptr => wasm.__wbg_soup_free(ptr >>> 0, 1));
856
+ const SoupConfigFinalization = (typeof FinalizationRegistry === 'undefined')
857
+ ? { register: () => {}, unregister: () => {} }
858
+ : new FinalizationRegistry(ptr => wasm.__wbg_soupconfig_free(ptr >>> 0, 1));
859
+ const TagFinalization = (typeof FinalizationRegistry === 'undefined')
860
+ ? { register: () => {}, unregister: () => {} }
861
+ : new FinalizationRegistry(ptr => wasm.__wbg_tag_free(ptr >>> 0, 1));
862
+
863
+ function addHeapObject(obj) {
864
+ if (heap_next === heap.length) heap.push(heap.length + 1);
865
+ const idx = heap_next;
866
+ heap_next = heap[idx];
867
+
868
+ heap[idx] = obj;
869
+ return idx;
870
+ }
871
+
872
+ function _assertClass(instance, klass) {
873
+ if (!(instance instanceof klass)) {
874
+ throw new Error(`expected instance of ${klass.name}`);
875
+ }
876
+ }
877
+
878
+ function dropObject(idx) {
879
+ if (idx < 132) return;
880
+ heap[idx] = heap_next;
881
+ heap_next = idx;
882
+ }
883
+
884
+ function getArrayJsValueFromWasm0(ptr, len) {
885
+ ptr = ptr >>> 0;
886
+ const mem = getDataViewMemory0();
887
+ const result = [];
888
+ for (let i = ptr; i < ptr + 4 * len; i += 4) {
889
+ result.push(takeObject(mem.getUint32(i, true)));
890
+ }
891
+ return result;
892
+ }
893
+
894
+ let cachedDataViewMemory0 = null;
895
+ function getDataViewMemory0() {
896
+ if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
897
+ cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
898
+ }
899
+ return cachedDataViewMemory0;
900
+ }
901
+
902
+ function getStringFromWasm0(ptr, len) {
903
+ ptr = ptr >>> 0;
904
+ return decodeText(ptr, len);
905
+ }
906
+
907
+ let cachedUint8ArrayMemory0 = null;
908
+ function getUint8ArrayMemory0() {
909
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
910
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
911
+ }
912
+ return cachedUint8ArrayMemory0;
913
+ }
914
+
915
+ function getObject(idx) { return heap[idx]; }
916
+
917
+ function handleError(f, args) {
918
+ try {
919
+ return f.apply(this, args);
920
+ } catch (e) {
921
+ wasm.__wbindgen_export4(addHeapObject(e));
922
+ }
923
+ }
924
+
925
+ let heap = new Array(128).fill(undefined);
926
+ heap.push(undefined, null, true, false);
927
+
928
+ let heap_next = heap.length;
929
+
930
+ function isLikeNone(x) {
931
+ return x === undefined || x === null;
932
+ }
933
+
934
+ function passArrayJsValueToWasm0(array, malloc) {
935
+ const ptr = malloc(array.length * 4, 4) >>> 0;
936
+ const mem = getDataViewMemory0();
937
+ for (let i = 0; i < array.length; i++) {
938
+ mem.setUint32(ptr + 4 * i, addHeapObject(array[i]), true);
939
+ }
940
+ WASM_VECTOR_LEN = array.length;
941
+ return ptr;
942
+ }
943
+
944
+ function passStringToWasm0(arg, malloc, realloc) {
945
+ if (realloc === undefined) {
946
+ const buf = cachedTextEncoder.encode(arg);
947
+ const ptr = malloc(buf.length, 1) >>> 0;
948
+ getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
949
+ WASM_VECTOR_LEN = buf.length;
950
+ return ptr;
951
+ }
952
+
953
+ let len = arg.length;
954
+ let ptr = malloc(len, 1) >>> 0;
955
+
956
+ const mem = getUint8ArrayMemory0();
957
+
958
+ let offset = 0;
959
+
960
+ for (; offset < len; offset++) {
961
+ const code = arg.charCodeAt(offset);
962
+ if (code > 0x7F) break;
963
+ mem[ptr + offset] = code;
964
+ }
965
+ if (offset !== len) {
966
+ if (offset !== 0) {
967
+ arg = arg.slice(offset);
968
+ }
969
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
970
+ const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
971
+ const ret = cachedTextEncoder.encodeInto(arg, view);
972
+
973
+ offset += ret.written;
974
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
975
+ }
976
+
977
+ WASM_VECTOR_LEN = offset;
978
+ return ptr;
979
+ }
980
+
981
+ function takeObject(idx) {
982
+ const ret = getObject(idx);
983
+ dropObject(idx);
984
+ return ret;
985
+ }
986
+
987
+ let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
988
+ cachedTextDecoder.decode();
989
+ const MAX_SAFARI_DECODE_BYTES = 2146435072;
990
+ let numBytesDecoded = 0;
991
+ function decodeText(ptr, len) {
992
+ numBytesDecoded += len;
993
+ if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
994
+ cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
995
+ cachedTextDecoder.decode();
996
+ numBytesDecoded = len;
997
+ }
998
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
999
+ }
1000
+
1001
+ const cachedTextEncoder = new TextEncoder();
1002
+
1003
+ if (!('encodeInto' in cachedTextEncoder)) {
1004
+ cachedTextEncoder.encodeInto = function (arg, view) {
1005
+ const buf = cachedTextEncoder.encode(arg);
1006
+ view.set(buf);
1007
+ return {
1008
+ read: arg.length,
1009
+ written: buf.length
1010
+ };
1011
+ };
1012
+ }
1013
+
1014
+ let WASM_VECTOR_LEN = 0;
1015
+
1016
+ let wasmModule, wasm;
1017
+ function __wbg_finalize_init(instance, module) {
1018
+ wasm = instance.exports;
1019
+ wasmModule = module;
1020
+ cachedDataViewMemory0 = null;
1021
+ cachedUint8ArrayMemory0 = null;
1022
+ wasm.__wbindgen_start();
1023
+ return wasm;
1024
+ }
1025
+
1026
+ async function __wbg_load(module, imports) {
1027
+ if (typeof Response === 'function' && module instanceof Response) {
1028
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
1029
+ try {
1030
+ return await WebAssembly.instantiateStreaming(module, imports);
1031
+ } catch (e) {
1032
+ const validResponse = module.ok && expectedResponseType(module.type);
1033
+
1034
+ if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
1035
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
1036
+
1037
+ } else { throw e; }
1038
+ }
1039
+ }
1040
+
1041
+ const bytes = await module.arrayBuffer();
1042
+ return await WebAssembly.instantiate(bytes, imports);
1043
+ } else {
1044
+ const instance = await WebAssembly.instantiate(module, imports);
1045
+
1046
+ if (instance instanceof WebAssembly.Instance) {
1047
+ return { instance, module };
1048
+ } else {
1049
+ return instance;
1050
+ }
1051
+ }
1052
+
1053
+ function expectedResponseType(type) {
1054
+ switch (type) {
1055
+ case 'basic': case 'cors': case 'default': return true;
1056
+ }
1057
+ return false;
1058
+ }
1059
+ }
1060
+
1061
+ function initSync(module) {
1062
+ if (wasm !== undefined) return wasm;
1063
+
1064
+
1065
+ if (module !== undefined) {
1066
+ if (Object.getPrototypeOf(module) === Object.prototype) {
1067
+ ({module} = module)
1068
+ } else {
1069
+ console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
1070
+ }
1071
+ }
1072
+
1073
+ const imports = __wbg_get_imports();
1074
+ if (!(module instanceof WebAssembly.Module)) {
1075
+ module = new WebAssembly.Module(module);
1076
+ }
1077
+ const instance = new WebAssembly.Instance(module, imports);
1078
+ return __wbg_finalize_init(instance, module);
1079
+ }
1080
+
1081
+ async function __wbg_init(module_or_path) {
1082
+ if (wasm !== undefined) return wasm;
1083
+
1084
+
1085
+ if (module_or_path !== undefined) {
1086
+ if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
1087
+ ({module_or_path} = module_or_path)
1088
+ } else {
1089
+ console.warn('using deprecated parameters for the initialization function; pass a single object instead')
1090
+ }
1091
+ }
1092
+
1093
+ if (module_or_path === undefined) {
1094
+ module_or_path = new URL('scrape_wasm_bg.wasm', import.meta.url);
1095
+ }
1096
+ const imports = __wbg_get_imports();
1097
+
1098
+ if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
1099
+ module_or_path = fetch(module_or_path);
1100
+ }
1101
+
1102
+ const { instance, module } = await __wbg_load(await module_or_path, imports);
1103
+
1104
+ return __wbg_finalize_init(instance, module);
1105
+ }
1106
+
1107
+ export { initSync, __wbg_init as default };