@fast-scrape/wasm 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/package.json +1 -1
- package/scrape_wasm.d.ts +242 -0
- package/scrape_wasm.js +549 -0
- package/scrape_wasm_bg.wasm +0 -0
package/README.md
CHANGED
|
@@ -117,13 +117,15 @@ function extractLinks(soup: Soup): string[] {
|
|
|
117
117
|
|
|
118
118
|
## Bundle size
|
|
119
119
|
|
|
120
|
+
v0.2.0 optimization brings package to under 500 KB:
|
|
121
|
+
|
|
120
122
|
| Build | Size |
|
|
121
123
|
|-------|------|
|
|
122
124
|
| Minified + gzip | ~150 KB |
|
|
123
125
|
| Minified | ~400 KB |
|
|
124
126
|
|
|
125
127
|
> [!TIP]
|
|
126
|
-
> SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+.
|
|
128
|
+
> SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+. v0.2.0 includes zero-copy serialization for 50-70% memory savings in HTML extraction.
|
|
127
129
|
|
|
128
130
|
## Browser support
|
|
129
131
|
|
|
@@ -134,6 +136,10 @@ function extractLinks(soup: Soup): string[] {
|
|
|
134
136
|
| Safari | 13+ | 16.4+ |
|
|
135
137
|
| Edge | 80+ | 91+ |
|
|
136
138
|
|
|
139
|
+
## Built on Servo
|
|
140
|
+
|
|
141
|
+
Powered by battle-tested libraries from the [Servo](https://servo.org/) browser engine: [html5ever](https://crates.io/crates/html5ever) (HTML5 parser) and [selectors](https://crates.io/crates/selectors) (CSS selector engine).
|
|
142
|
+
|
|
137
143
|
## Related packages
|
|
138
144
|
|
|
139
145
|
| Platform | Package |
|
package/package.json
CHANGED
package/scrape_wasm.d.ts
CHANGED
|
@@ -1,6 +1,43 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
|
|
4
|
+
/**
|
|
5
|
+
* A pre-compiled CSS selector for efficient repeated matching.
|
|
6
|
+
*
|
|
7
|
+
* Compiled selectors avoid the overhead of parsing the selector string on each query.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```javascript
|
|
11
|
+
* import init, { CompiledSelector, Soup } from '@scrape-rs/wasm';
|
|
12
|
+
*
|
|
13
|
+
* await init();
|
|
14
|
+
*
|
|
15
|
+
* const selector = CompiledSelector.compile("div.item");
|
|
16
|
+
* const soup = new Soup("<div class='item'>A</div><div class='item'>B</div>");
|
|
17
|
+
* const items = soup.selectCompiled(selector);
|
|
18
|
+
* console.log(items.length); // 2
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
export class CompiledSelector {
|
|
22
|
+
private constructor();
|
|
23
|
+
free(): void;
|
|
24
|
+
[Symbol.dispose](): void;
|
|
25
|
+
/**
|
|
26
|
+
* Compile a CSS selector string.
|
|
27
|
+
*
|
|
28
|
+
* @param selector - The CSS selector to compile
|
|
29
|
+
* @returns A compiled selector
|
|
30
|
+
* @throws Error if the selector syntax is invalid
|
|
31
|
+
*/
|
|
32
|
+
static compile(selector: string): CompiledSelector;
|
|
33
|
+
/**
|
|
34
|
+
* Get the original selector string.
|
|
35
|
+
*
|
|
36
|
+
* @returns The selector string that was compiled
|
|
37
|
+
*/
|
|
38
|
+
readonly source: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
4
41
|
/**
|
|
5
42
|
* A parsed HTML document.
|
|
6
43
|
*
|
|
@@ -37,6 +74,13 @@ export class Soup {
|
|
|
37
74
|
* @throws Error if the selector syntax is invalid
|
|
38
75
|
*/
|
|
39
76
|
findAll(selector: string): Tag[];
|
|
77
|
+
/**
|
|
78
|
+
* Find the first element matching a compiled selector.
|
|
79
|
+
*
|
|
80
|
+
* @param selector - A compiled CSS selector
|
|
81
|
+
* @returns The first matching Tag, or undefined if not found
|
|
82
|
+
*/
|
|
83
|
+
findCompiled(selector: CompiledSelector): Tag | undefined;
|
|
40
84
|
/**
|
|
41
85
|
* Parses an HTML string into a Soup document.
|
|
42
86
|
*
|
|
@@ -44,6 +88,26 @@ export class Soup {
|
|
|
44
88
|
* @param config - Optional configuration options
|
|
45
89
|
*/
|
|
46
90
|
constructor(html: string, config?: SoupConfig | null);
|
|
91
|
+
/**
|
|
92
|
+
* Parse an HTML fragment without html/body wrapper.
|
|
93
|
+
*
|
|
94
|
+
* @param html - HTML fragment string to parse
|
|
95
|
+
* @param context - Optional context element name (default: "body")
|
|
96
|
+
* @param config - Optional parsing configuration
|
|
97
|
+
* @returns A new Soup instance containing the fragment
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```javascript
|
|
101
|
+
* // Parse without wrapper
|
|
102
|
+
* const soup = Soup.parseFragment("<div>A</div><div>B</div>");
|
|
103
|
+
* const divs = soup.findAll("div");
|
|
104
|
+
* console.log(divs.length); // 2
|
|
105
|
+
*
|
|
106
|
+
* // Parse with td context
|
|
107
|
+
* const tdSoup = Soup.parseFragment("<td>Cell</td>", "tr");
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
static parseFragment(html: string, context?: string | null, config?: SoupConfig | null): Soup;
|
|
47
111
|
/**
|
|
48
112
|
* Finds all elements matching a CSS selector (alias for findAll).
|
|
49
113
|
*
|
|
@@ -51,6 +115,44 @@ export class Soup {
|
|
|
51
115
|
* @returns Array of matching Tag instances
|
|
52
116
|
*/
|
|
53
117
|
select(selector: string): Tag[];
|
|
118
|
+
/**
|
|
119
|
+
* Extract attribute values from all elements matching a selector.
|
|
120
|
+
*
|
|
121
|
+
* @param selector - CSS selector string
|
|
122
|
+
* @param attr - Attribute name to extract
|
|
123
|
+
* @returns Array of attribute values (undefined if attribute is missing)
|
|
124
|
+
* @throws Error if the selector syntax is invalid
|
|
125
|
+
*
|
|
126
|
+
* @example
|
|
127
|
+
* ```javascript
|
|
128
|
+
* const soup = new Soup("<a href='/a'>A</a><a href='/b'>B</a><a>C</a>");
|
|
129
|
+
* const hrefs = soup.selectAttr("a", "href");
|
|
130
|
+
* // hrefs: ["/a", "/b", undefined]
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
selectAttr(selector: string, attr: string): any[];
|
|
134
|
+
/**
|
|
135
|
+
* Find all elements matching a compiled selector.
|
|
136
|
+
*
|
|
137
|
+
* @param selector - A compiled CSS selector
|
|
138
|
+
* @returns Array of matching Tag instances
|
|
139
|
+
*/
|
|
140
|
+
selectCompiled(selector: CompiledSelector): Tag[];
|
|
141
|
+
/**
|
|
142
|
+
* Extract text content from all elements matching a selector.
|
|
143
|
+
*
|
|
144
|
+
* @param selector - CSS selector string
|
|
145
|
+
* @returns Array of text content strings
|
|
146
|
+
* @throws Error if the selector syntax is invalid
|
|
147
|
+
*
|
|
148
|
+
* @example
|
|
149
|
+
* ```javascript
|
|
150
|
+
* const soup = new Soup("<div>A</div><div>B</div>");
|
|
151
|
+
* const texts = soup.selectText("div");
|
|
152
|
+
* // texts: ["A", "B"]
|
|
153
|
+
* ```
|
|
154
|
+
*/
|
|
155
|
+
selectText(selector: string): string[];
|
|
54
156
|
/**
|
|
55
157
|
* Get the HTML representation of the document.
|
|
56
158
|
*
|
|
@@ -150,6 +252,44 @@ export class Tag {
|
|
|
150
252
|
* @returns The attribute value, or undefined if not present
|
|
151
253
|
*/
|
|
152
254
|
attr(name: string): string | undefined;
|
|
255
|
+
/**
|
|
256
|
+
* Get all direct child elements with a specific class.
|
|
257
|
+
*
|
|
258
|
+
* @param className - The class name to filter by
|
|
259
|
+
* @returns Array of matching child Tag instances
|
|
260
|
+
*
|
|
261
|
+
* @example
|
|
262
|
+
* ```javascript
|
|
263
|
+
* const soup = new Soup("<div><p class='item'>A</p><span>B</span><p class='item'>C</p></div>");
|
|
264
|
+
* const div = soup.find("div");
|
|
265
|
+
* const items = div.childrenByClass("item");
|
|
266
|
+
* // items.length: 2
|
|
267
|
+
* ```
|
|
268
|
+
*/
|
|
269
|
+
childrenByClass(class_name: string): Tag[];
|
|
270
|
+
/**
|
|
271
|
+
* Get all direct child elements with a specific tag name.
|
|
272
|
+
*
|
|
273
|
+
* @param name - The tag name to filter by
|
|
274
|
+
* @returns Array of matching child Tag instances
|
|
275
|
+
*
|
|
276
|
+
* @example
|
|
277
|
+
* ```javascript
|
|
278
|
+
* const soup = new Soup("<div><p>A</p><span>B</span><p>C</p></div>");
|
|
279
|
+
* const div = soup.find("div");
|
|
280
|
+
* const paras = div.childrenByName("p");
|
|
281
|
+
* // paras.length: 2
|
|
282
|
+
* ```
|
|
283
|
+
*/
|
|
284
|
+
childrenByName(name: string): Tag[];
|
|
285
|
+
/**
|
|
286
|
+
* Find the nearest ancestor matching a CSS selector.
|
|
287
|
+
*
|
|
288
|
+
* @param selector - CSS selector string
|
|
289
|
+
* @returns The nearest matching ancestor Tag, or undefined if not found
|
|
290
|
+
* @throws Error if the selector syntax is invalid
|
|
291
|
+
*/
|
|
292
|
+
closest(selector: string): Tag | undefined;
|
|
153
293
|
/**
|
|
154
294
|
* Find the first descendant matching a CSS selector.
|
|
155
295
|
*
|
|
@@ -166,6 +306,13 @@ export class Tag {
|
|
|
166
306
|
* @throws Error if the selector syntax is invalid
|
|
167
307
|
*/
|
|
168
308
|
findAll(selector: string): Tag[];
|
|
309
|
+
/**
|
|
310
|
+
* Find the first descendant matching a compiled selector.
|
|
311
|
+
*
|
|
312
|
+
* @param selector - A compiled CSS selector
|
|
313
|
+
* @returns The first matching Tag, or undefined if not found
|
|
314
|
+
*/
|
|
315
|
+
findCompiled(selector: CompiledSelector): Tag | undefined;
|
|
169
316
|
/**
|
|
170
317
|
* Get an attribute value by name.
|
|
171
318
|
*
|
|
@@ -194,6 +341,50 @@ export class Tag {
|
|
|
194
341
|
* @returns Array of matching Tag instances
|
|
195
342
|
*/
|
|
196
343
|
select(selector: string): Tag[];
|
|
344
|
+
/**
|
|
345
|
+
* Extract attribute values from all descendants matching a selector.
|
|
346
|
+
*
|
|
347
|
+
* @param selector - CSS selector string
|
|
348
|
+
* @param attr - Attribute name to extract
|
|
349
|
+
* @returns Array of attribute values (undefined if attribute is missing)
|
|
350
|
+
* @throws Error if the selector syntax is invalid
|
|
351
|
+
*
|
|
352
|
+
* @example
|
|
353
|
+
* ```javascript
|
|
354
|
+
* const soup = new Soup("<div><a href='/a'>A</a><a href='/b'>B</a></div>");
|
|
355
|
+
* const div = soup.find("div");
|
|
356
|
+
* const hrefs = div.selectAttr("a", "href");
|
|
357
|
+
* // hrefs: ["/a", "/b"]
|
|
358
|
+
* ```
|
|
359
|
+
*/
|
|
360
|
+
selectAttr(selector: string, attr: string): any[];
|
|
361
|
+
/**
|
|
362
|
+
* Find all descendants matching a compiled selector.
|
|
363
|
+
*
|
|
364
|
+
* @param selector - A compiled CSS selector
|
|
365
|
+
* @returns Array of matching Tag instances
|
|
366
|
+
*/
|
|
367
|
+
selectCompiled(selector: CompiledSelector): Tag[];
|
|
368
|
+
/**
|
|
369
|
+
* Extract text content from all descendants matching a selector.
|
|
370
|
+
*
|
|
371
|
+
* @param selector - CSS selector string
|
|
372
|
+
* @returns Array of text content strings
|
|
373
|
+
* @throws Error if the selector syntax is invalid
|
|
374
|
+
*
|
|
375
|
+
* @example
|
|
376
|
+
* ```javascript
|
|
377
|
+
* const soup = new Soup("<div><p>A</p><p>B</p></div>");
|
|
378
|
+
* const div = soup.find("div");
|
|
379
|
+
* const texts = div.selectText("p");
|
|
380
|
+
* // texts: ["A", "B"]
|
|
381
|
+
* ```
|
|
382
|
+
*/
|
|
383
|
+
selectText(selector: string): string[];
|
|
384
|
+
/**
|
|
385
|
+
* Get all ancestor elements (alias for parents).
|
|
386
|
+
*/
|
|
387
|
+
readonly ancestors: Tag[];
|
|
197
388
|
/**
|
|
198
389
|
* Get all attributes as an object.
|
|
199
390
|
*/
|
|
@@ -226,6 +417,10 @@ export class Tag {
|
|
|
226
417
|
* Get the next sibling element.
|
|
227
418
|
*/
|
|
228
419
|
readonly nextSibling: Tag | undefined;
|
|
420
|
+
/**
|
|
421
|
+
* Get all following sibling elements.
|
|
422
|
+
*/
|
|
423
|
+
readonly nextSiblings: Tag[];
|
|
229
424
|
/**
|
|
230
425
|
* Get the outer HTML (including this element's tags).
|
|
231
426
|
*/
|
|
@@ -234,14 +429,40 @@ export class Tag {
|
|
|
234
429
|
* Get the parent element.
|
|
235
430
|
*/
|
|
236
431
|
readonly parent: Tag | undefined;
|
|
432
|
+
/**
|
|
433
|
+
* Get all ancestor elements (from parent toward root).
|
|
434
|
+
*/
|
|
435
|
+
readonly parents: Tag[];
|
|
237
436
|
/**
|
|
238
437
|
* Get the previous sibling element.
|
|
239
438
|
*/
|
|
240
439
|
readonly prevSibling: Tag | undefined;
|
|
440
|
+
/**
|
|
441
|
+
* Get all preceding sibling elements (in reverse order).
|
|
442
|
+
*/
|
|
443
|
+
readonly prevSiblings: Tag[];
|
|
444
|
+
/**
|
|
445
|
+
* Get all sibling elements (excluding self, in document order).
|
|
446
|
+
*/
|
|
447
|
+
readonly siblings: Tag[];
|
|
241
448
|
/**
|
|
242
449
|
* Get the text content of this element and all descendants.
|
|
243
450
|
*/
|
|
244
451
|
readonly text: string;
|
|
452
|
+
/**
|
|
453
|
+
* Get all direct text nodes (excluding descendants).
|
|
454
|
+
*
|
|
455
|
+
* @returns Array of text content strings
|
|
456
|
+
*
|
|
457
|
+
* @example
|
|
458
|
+
* ```javascript
|
|
459
|
+
* const soup = new Soup("<div>Text1<span>Inner</span>Text2</div>");
|
|
460
|
+
* const div = soup.find("div");
|
|
461
|
+
* const texts = div.textNodes;
|
|
462
|
+
* // texts: ["Text1", "Text2"]
|
|
463
|
+
* ```
|
|
464
|
+
*/
|
|
465
|
+
readonly textNodes: string[];
|
|
245
466
|
}
|
|
246
467
|
|
|
247
468
|
/**
|
|
@@ -292,16 +513,24 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
|
|
|
292
513
|
|
|
293
514
|
export interface InitOutput {
|
|
294
515
|
readonly memory: WebAssembly.Memory;
|
|
516
|
+
readonly __wbg_compiledselector_free: (a: number, b: number) => void;
|
|
295
517
|
readonly __wbg_soup_free: (a: number, b: number) => void;
|
|
296
518
|
readonly __wbg_soupconfig_free: (a: number, b: number) => void;
|
|
297
519
|
readonly __wbg_tag_free: (a: number, b: number) => void;
|
|
520
|
+
readonly compiledselector_compile: (a: number, b: number, c: number) => void;
|
|
521
|
+
readonly compiledselector_source: (a: number, b: number) => void;
|
|
298
522
|
readonly hasSimdSupport: () => number;
|
|
299
523
|
readonly parseBatch: (a: number, b: number, c: number) => void;
|
|
300
524
|
readonly soup_find: (a: number, b: number, c: number, d: number) => void;
|
|
301
525
|
readonly soup_findAll: (a: number, b: number, c: number, d: number) => void;
|
|
526
|
+
readonly soup_findCompiled: (a: number, b: number) => number;
|
|
302
527
|
readonly soup_length: (a: number) => number;
|
|
303
528
|
readonly soup_new: (a: number, b: number, c: number) => number;
|
|
529
|
+
readonly soup_parseFragment: (a: number, b: number, c: number, d: number, e: number) => number;
|
|
304
530
|
readonly soup_root: (a: number) => number;
|
|
531
|
+
readonly soup_selectAttr: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
532
|
+
readonly soup_selectCompiled: (a: number, b: number, c: number) => void;
|
|
533
|
+
readonly soup_selectText: (a: number, b: number, c: number, d: number) => void;
|
|
305
534
|
readonly soup_text: (a: number, b: number) => void;
|
|
306
535
|
readonly soup_title: (a: number, b: number) => void;
|
|
307
536
|
readonly soup_toHtml: (a: number, b: number) => void;
|
|
@@ -314,13 +543,18 @@ export interface InitOutput {
|
|
|
314
543
|
readonly soupconfig_set_preserveWhitespace: (a: number, b: number) => void;
|
|
315
544
|
readonly soupconfig_set_strictMode: (a: number, b: number) => void;
|
|
316
545
|
readonly soupconfig_strictMode: (a: number) => number;
|
|
546
|
+
readonly tag_ancestors: (a: number, b: number) => void;
|
|
317
547
|
readonly tag_attr: (a: number, b: number, c: number, d: number) => void;
|
|
318
548
|
readonly tag_attrs: (a: number) => number;
|
|
319
549
|
readonly tag_children: (a: number, b: number) => void;
|
|
550
|
+
readonly tag_childrenByClass: (a: number, b: number, c: number, d: number) => void;
|
|
551
|
+
readonly tag_childrenByName: (a: number, b: number, c: number, d: number) => void;
|
|
320
552
|
readonly tag_classes: (a: number, b: number) => void;
|
|
553
|
+
readonly tag_closest: (a: number, b: number, c: number, d: number) => void;
|
|
321
554
|
readonly tag_descendants: (a: number, b: number) => void;
|
|
322
555
|
readonly tag_find: (a: number, b: number, c: number, d: number) => void;
|
|
323
556
|
readonly tag_findAll: (a: number, b: number, c: number, d: number) => void;
|
|
557
|
+
readonly tag_findCompiled: (a: number, b: number) => number;
|
|
324
558
|
readonly tag_get: (a: number, b: number, c: number, d: number) => void;
|
|
325
559
|
readonly tag_hasAttr: (a: number, b: number, c: number) => number;
|
|
326
560
|
readonly tag_hasClass: (a: number, b: number, c: number) => number;
|
|
@@ -328,14 +562,22 @@ export interface InitOutput {
|
|
|
328
562
|
readonly tag_length: (a: number) => number;
|
|
329
563
|
readonly tag_name: (a: number, b: number) => void;
|
|
330
564
|
readonly tag_nextSibling: (a: number) => number;
|
|
565
|
+
readonly tag_nextSiblings: (a: number, b: number) => void;
|
|
331
566
|
readonly tag_outerHTML: (a: number, b: number) => void;
|
|
332
567
|
readonly tag_parent: (a: number) => number;
|
|
333
568
|
readonly tag_prevSibling: (a: number) => number;
|
|
569
|
+
readonly tag_prevSiblings: (a: number, b: number) => void;
|
|
570
|
+
readonly tag_selectAttr: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
571
|
+
readonly tag_selectCompiled: (a: number, b: number, c: number) => void;
|
|
572
|
+
readonly tag_selectText: (a: number, b: number, c: number, d: number) => void;
|
|
573
|
+
readonly tag_siblings: (a: number, b: number) => void;
|
|
334
574
|
readonly tag_text: (a: number, b: number) => void;
|
|
575
|
+
readonly tag_textNodes: (a: number, b: number) => void;
|
|
335
576
|
readonly version: (a: number) => void;
|
|
336
577
|
readonly init: () => void;
|
|
337
578
|
readonly soup_select: (a: number, b: number, c: number, d: number) => void;
|
|
338
579
|
readonly tag_select: (a: number, b: number, c: number, d: number) => void;
|
|
580
|
+
readonly tag_parents: (a: number, b: number) => void;
|
|
339
581
|
readonly __wbindgen_export: (a: number, b: number) => number;
|
|
340
582
|
readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
|
|
341
583
|
readonly __wbindgen_export3: (a: number, b: number, c: number) => void;
|
package/scrape_wasm.js
CHANGED
|
@@ -1,5 +1,91 @@
|
|
|
1
1
|
/* @ts-self-types="./scrape_wasm.d.ts" */
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* A pre-compiled CSS selector for efficient repeated matching.
|
|
5
|
+
*
|
|
6
|
+
* Compiled selectors avoid the overhead of parsing the selector string on each query.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```javascript
|
|
10
|
+
* import init, { CompiledSelector, Soup } from '@scrape-rs/wasm';
|
|
11
|
+
*
|
|
12
|
+
* await init();
|
|
13
|
+
*
|
|
14
|
+
* const selector = CompiledSelector.compile("div.item");
|
|
15
|
+
* const soup = new Soup("<div class='item'>A</div><div class='item'>B</div>");
|
|
16
|
+
* const items = soup.selectCompiled(selector);
|
|
17
|
+
* console.log(items.length); // 2
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
20
|
+
export class CompiledSelector {
|
|
21
|
+
static __wrap(ptr) {
|
|
22
|
+
ptr = ptr >>> 0;
|
|
23
|
+
const obj = Object.create(CompiledSelector.prototype);
|
|
24
|
+
obj.__wbg_ptr = ptr;
|
|
25
|
+
CompiledSelectorFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
26
|
+
return obj;
|
|
27
|
+
}
|
|
28
|
+
__destroy_into_raw() {
|
|
29
|
+
const ptr = this.__wbg_ptr;
|
|
30
|
+
this.__wbg_ptr = 0;
|
|
31
|
+
CompiledSelectorFinalization.unregister(this);
|
|
32
|
+
return ptr;
|
|
33
|
+
}
|
|
34
|
+
free() {
|
|
35
|
+
const ptr = this.__destroy_into_raw();
|
|
36
|
+
wasm.__wbg_compiledselector_free(ptr, 0);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Compile a CSS selector string.
|
|
40
|
+
*
|
|
41
|
+
* @param selector - The CSS selector to compile
|
|
42
|
+
* @returns A compiled selector
|
|
43
|
+
* @throws Error if the selector syntax is invalid
|
|
44
|
+
* @param {string} selector
|
|
45
|
+
* @returns {CompiledSelector}
|
|
46
|
+
*/
|
|
47
|
+
static compile(selector) {
|
|
48
|
+
try {
|
|
49
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
50
|
+
const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
51
|
+
const len0 = WASM_VECTOR_LEN;
|
|
52
|
+
wasm.compiledselector_compile(retptr, ptr0, len0);
|
|
53
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
54
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
55
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
56
|
+
if (r2) {
|
|
57
|
+
throw takeObject(r1);
|
|
58
|
+
}
|
|
59
|
+
return CompiledSelector.__wrap(r0);
|
|
60
|
+
} finally {
|
|
61
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Get the original selector string.
|
|
66
|
+
*
|
|
67
|
+
* @returns The selector string that was compiled
|
|
68
|
+
* @returns {string}
|
|
69
|
+
*/
|
|
70
|
+
get source() {
|
|
71
|
+
let deferred1_0;
|
|
72
|
+
let deferred1_1;
|
|
73
|
+
try {
|
|
74
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
75
|
+
wasm.compiledselector_source(retptr, this.__wbg_ptr);
|
|
76
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
77
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
78
|
+
deferred1_0 = r0;
|
|
79
|
+
deferred1_1 = r1;
|
|
80
|
+
return getStringFromWasm0(r0, r1);
|
|
81
|
+
} finally {
|
|
82
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
83
|
+
wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (Symbol.dispose) CompiledSelector.prototype[Symbol.dispose] = CompiledSelector.prototype.free;
|
|
88
|
+
|
|
3
89
|
/**
|
|
4
90
|
* A parsed HTML document.
|
|
5
91
|
*
|
|
@@ -90,6 +176,19 @@ export class Soup {
|
|
|
90
176
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
91
177
|
}
|
|
92
178
|
}
|
|
179
|
+
/**
|
|
180
|
+
* Find the first element matching a compiled selector.
|
|
181
|
+
*
|
|
182
|
+
* @param selector - A compiled CSS selector
|
|
183
|
+
* @returns The first matching Tag, or undefined if not found
|
|
184
|
+
* @param {CompiledSelector} selector
|
|
185
|
+
* @returns {Tag | undefined}
|
|
186
|
+
*/
|
|
187
|
+
findCompiled(selector) {
|
|
188
|
+
_assertClass(selector, CompiledSelector);
|
|
189
|
+
const ret = wasm.soup_findCompiled(this.__wbg_ptr, selector.__wbg_ptr);
|
|
190
|
+
return ret === 0 ? undefined : Tag.__wrap(ret);
|
|
191
|
+
}
|
|
93
192
|
/**
|
|
94
193
|
* Get the number of nodes in the document.
|
|
95
194
|
* @returns {number}
|
|
@@ -119,6 +218,42 @@ export class Soup {
|
|
|
119
218
|
SoupFinalization.register(this, this.__wbg_ptr, this);
|
|
120
219
|
return this;
|
|
121
220
|
}
|
|
221
|
+
/**
|
|
222
|
+
* Parse an HTML fragment without html/body wrapper.
|
|
223
|
+
*
|
|
224
|
+
* @param html - HTML fragment string to parse
|
|
225
|
+
* @param context - Optional context element name (default: "body")
|
|
226
|
+
* @param config - Optional parsing configuration
|
|
227
|
+
* @returns A new Soup instance containing the fragment
|
|
228
|
+
*
|
|
229
|
+
* @example
|
|
230
|
+
* ```javascript
|
|
231
|
+
* // Parse without wrapper
|
|
232
|
+
* const soup = Soup.parseFragment("<div>A</div><div>B</div>");
|
|
233
|
+
* const divs = soup.findAll("div");
|
|
234
|
+
* console.log(divs.length); // 2
|
|
235
|
+
*
|
|
236
|
+
* // Parse with td context
|
|
237
|
+
* const tdSoup = Soup.parseFragment("<td>Cell</td>", "tr");
|
|
238
|
+
* ```
|
|
239
|
+
* @param {string} html
|
|
240
|
+
* @param {string | null} [context]
|
|
241
|
+
* @param {SoupConfig | null} [config]
|
|
242
|
+
* @returns {Soup}
|
|
243
|
+
*/
|
|
244
|
+
static parseFragment(html, context, config) {
|
|
245
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
246
|
+
const len0 = WASM_VECTOR_LEN;
|
|
247
|
+
var ptr1 = isLikeNone(context) ? 0 : passStringToWasm0(context, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
248
|
+
var len1 = WASM_VECTOR_LEN;
|
|
249
|
+
let ptr2 = 0;
|
|
250
|
+
if (!isLikeNone(config)) {
|
|
251
|
+
_assertClass(config, SoupConfig);
|
|
252
|
+
ptr2 = config.__destroy_into_raw();
|
|
253
|
+
}
|
|
254
|
+
const ret = wasm.soup_parseFragment(ptr0, len0, ptr1, len1, ptr2);
|
|
255
|
+
return Soup.__wrap(ret);
|
|
256
|
+
}
|
|
122
257
|
/**
|
|
123
258
|
* Get the root element of the document.
|
|
124
259
|
*
|
|
@@ -157,6 +292,104 @@ export class Soup {
|
|
|
157
292
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
158
293
|
}
|
|
159
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Extract attribute values from all elements matching a selector.
|
|
297
|
+
*
|
|
298
|
+
* @param selector - CSS selector string
|
|
299
|
+
* @param attr - Attribute name to extract
|
|
300
|
+
* @returns Array of attribute values (undefined if attribute is missing)
|
|
301
|
+
* @throws Error if the selector syntax is invalid
|
|
302
|
+
*
|
|
303
|
+
* @example
|
|
304
|
+
* ```javascript
|
|
305
|
+
* const soup = new Soup("<a href='/a'>A</a><a href='/b'>B</a><a>C</a>");
|
|
306
|
+
* const hrefs = soup.selectAttr("a", "href");
|
|
307
|
+
* // hrefs: ["/a", "/b", undefined]
|
|
308
|
+
* ```
|
|
309
|
+
* @param {string} selector
|
|
310
|
+
* @param {string} attr
|
|
311
|
+
* @returns {any[]}
|
|
312
|
+
*/
|
|
313
|
+
selectAttr(selector, attr) {
|
|
314
|
+
try {
|
|
315
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
316
|
+
const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
317
|
+
const len0 = WASM_VECTOR_LEN;
|
|
318
|
+
const ptr1 = passStringToWasm0(attr, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
319
|
+
const len1 = WASM_VECTOR_LEN;
|
|
320
|
+
wasm.soup_selectAttr(retptr, this.__wbg_ptr, ptr0, len0, ptr1, len1);
|
|
321
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
322
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
323
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
324
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
325
|
+
if (r3) {
|
|
326
|
+
throw takeObject(r2);
|
|
327
|
+
}
|
|
328
|
+
var v3 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
329
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
330
|
+
return v3;
|
|
331
|
+
} finally {
|
|
332
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Find all elements matching a compiled selector.
|
|
337
|
+
*
|
|
338
|
+
* @param selector - A compiled CSS selector
|
|
339
|
+
* @returns Array of matching Tag instances
|
|
340
|
+
* @param {CompiledSelector} selector
|
|
341
|
+
* @returns {Tag[]}
|
|
342
|
+
*/
|
|
343
|
+
selectCompiled(selector) {
|
|
344
|
+
try {
|
|
345
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
346
|
+
_assertClass(selector, CompiledSelector);
|
|
347
|
+
wasm.soup_selectCompiled(retptr, this.__wbg_ptr, selector.__wbg_ptr);
|
|
348
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
349
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
350
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
351
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
352
|
+
return v1;
|
|
353
|
+
} finally {
|
|
354
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Extract text content from all elements matching a selector.
|
|
359
|
+
*
|
|
360
|
+
* @param selector - CSS selector string
|
|
361
|
+
* @returns Array of text content strings
|
|
362
|
+
* @throws Error if the selector syntax is invalid
|
|
363
|
+
*
|
|
364
|
+
* @example
|
|
365
|
+
* ```javascript
|
|
366
|
+
* const soup = new Soup("<div>A</div><div>B</div>");
|
|
367
|
+
* const texts = soup.selectText("div");
|
|
368
|
+
* // texts: ["A", "B"]
|
|
369
|
+
* ```
|
|
370
|
+
* @param {string} selector
|
|
371
|
+
* @returns {string[]}
|
|
372
|
+
*/
|
|
373
|
+
selectText(selector) {
|
|
374
|
+
try {
|
|
375
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
376
|
+
const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
377
|
+
const len0 = WASM_VECTOR_LEN;
|
|
378
|
+
wasm.soup_selectText(retptr, this.__wbg_ptr, ptr0, len0);
|
|
379
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
380
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
381
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
382
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
383
|
+
if (r3) {
|
|
384
|
+
throw takeObject(r2);
|
|
385
|
+
}
|
|
386
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
387
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
388
|
+
return v2;
|
|
389
|
+
} finally {
|
|
390
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
160
393
|
/**
|
|
161
394
|
* Get the text content of the entire document.
|
|
162
395
|
*
|
|
@@ -360,6 +593,23 @@ export class Tag {
|
|
|
360
593
|
const ptr = this.__destroy_into_raw();
|
|
361
594
|
wasm.__wbg_tag_free(ptr, 0);
|
|
362
595
|
}
|
|
596
|
+
/**
|
|
597
|
+
* Get all ancestor elements (alias for parents).
|
|
598
|
+
* @returns {Tag[]}
|
|
599
|
+
*/
|
|
600
|
+
get ancestors() {
|
|
601
|
+
try {
|
|
602
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
603
|
+
wasm.tag_ancestors(retptr, this.__wbg_ptr);
|
|
604
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
605
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
606
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
607
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
608
|
+
return v1;
|
|
609
|
+
} finally {
|
|
610
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
363
613
|
/**
|
|
364
614
|
* Get an attribute value by name (alias for get).
|
|
365
615
|
*
|
|
@@ -411,6 +661,68 @@ export class Tag {
|
|
|
411
661
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
412
662
|
}
|
|
413
663
|
}
|
|
664
|
+
/**
|
|
665
|
+
* Get all direct child elements with a specific class.
|
|
666
|
+
*
|
|
667
|
+
* @param className - The class name to filter by
|
|
668
|
+
* @returns Array of matching child Tag instances
|
|
669
|
+
*
|
|
670
|
+
* @example
|
|
671
|
+
* ```javascript
|
|
672
|
+
* const soup = new Soup("<div><p class='item'>A</p><span>B</span><p class='item'>C</p></div>");
|
|
673
|
+
* const div = soup.find("div");
|
|
674
|
+
* const items = div.childrenByClass("item");
|
|
675
|
+
* // items.length: 2
|
|
676
|
+
* ```
|
|
677
|
+
* @param {string} class_name
|
|
678
|
+
* @returns {Tag[]}
|
|
679
|
+
*/
|
|
680
|
+
childrenByClass(class_name) {
|
|
681
|
+
try {
|
|
682
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
683
|
+
const ptr0 = passStringToWasm0(class_name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
684
|
+
const len0 = WASM_VECTOR_LEN;
|
|
685
|
+
wasm.tag_childrenByClass(retptr, this.__wbg_ptr, ptr0, len0);
|
|
686
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
687
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
688
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
689
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
690
|
+
return v2;
|
|
691
|
+
} finally {
|
|
692
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
/**
|
|
696
|
+
* Get all direct child elements with a specific tag name.
|
|
697
|
+
*
|
|
698
|
+
* @param name - The tag name to filter by
|
|
699
|
+
* @returns Array of matching child Tag instances
|
|
700
|
+
*
|
|
701
|
+
* @example
|
|
702
|
+
* ```javascript
|
|
703
|
+
* const soup = new Soup("<div><p>A</p><span>B</span><p>C</p></div>");
|
|
704
|
+
* const div = soup.find("div");
|
|
705
|
+
* const paras = div.childrenByName("p");
|
|
706
|
+
* // paras.length: 2
|
|
707
|
+
* ```
|
|
708
|
+
* @param {string} name
|
|
709
|
+
* @returns {Tag[]}
|
|
710
|
+
*/
|
|
711
|
+
childrenByName(name) {
|
|
712
|
+
try {
|
|
713
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
714
|
+
const ptr0 = passStringToWasm0(name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
715
|
+
const len0 = WASM_VECTOR_LEN;
|
|
716
|
+
wasm.tag_childrenByName(retptr, this.__wbg_ptr, ptr0, len0);
|
|
717
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
718
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
719
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
720
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
721
|
+
return v2;
|
|
722
|
+
} finally {
|
|
723
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
414
726
|
/**
|
|
415
727
|
* Get all classes as an array.
|
|
416
728
|
* @returns {string[]}
|
|
@@ -428,6 +740,32 @@ export class Tag {
|
|
|
428
740
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
429
741
|
}
|
|
430
742
|
}
|
|
743
|
+
/**
|
|
744
|
+
* Find the nearest ancestor matching a CSS selector.
|
|
745
|
+
*
|
|
746
|
+
* @param selector - CSS selector string
|
|
747
|
+
* @returns The nearest matching ancestor Tag, or undefined if not found
|
|
748
|
+
* @throws Error if the selector syntax is invalid
|
|
749
|
+
* @param {string} selector
|
|
750
|
+
* @returns {Tag | undefined}
|
|
751
|
+
*/
|
|
752
|
+
closest(selector) {
|
|
753
|
+
try {
|
|
754
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
755
|
+
const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
756
|
+
const len0 = WASM_VECTOR_LEN;
|
|
757
|
+
wasm.tag_closest(retptr, this.__wbg_ptr, ptr0, len0);
|
|
758
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
759
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
760
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
761
|
+
if (r2) {
|
|
762
|
+
throw takeObject(r1);
|
|
763
|
+
}
|
|
764
|
+
return r0 === 0 ? undefined : Tag.__wrap(r0);
|
|
765
|
+
} finally {
|
|
766
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
431
769
|
/**
|
|
432
770
|
* Get all descendant elements.
|
|
433
771
|
* @returns {Tag[]}
|
|
@@ -500,6 +838,19 @@ export class Tag {
|
|
|
500
838
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
501
839
|
}
|
|
502
840
|
}
|
|
841
|
+
/**
|
|
842
|
+
* Find the first descendant matching a compiled selector.
|
|
843
|
+
*
|
|
844
|
+
* @param selector - A compiled CSS selector
|
|
845
|
+
* @returns The first matching Tag, or undefined if not found
|
|
846
|
+
* @param {CompiledSelector} selector
|
|
847
|
+
* @returns {Tag | undefined}
|
|
848
|
+
*/
|
|
849
|
+
findCompiled(selector) {
|
|
850
|
+
_assertClass(selector, CompiledSelector);
|
|
851
|
+
const ret = wasm.tag_findCompiled(this.__wbg_ptr, selector.__wbg_ptr);
|
|
852
|
+
return ret === 0 ? undefined : Tag.__wrap(ret);
|
|
853
|
+
}
|
|
503
854
|
/**
|
|
504
855
|
* Get an attribute value by name.
|
|
505
856
|
*
|
|
@@ -610,6 +961,23 @@ export class Tag {
|
|
|
610
961
|
const ret = wasm.tag_nextSibling(this.__wbg_ptr);
|
|
611
962
|
return ret === 0 ? undefined : Tag.__wrap(ret);
|
|
612
963
|
}
|
|
964
|
+
/**
|
|
965
|
+
* Get all following sibling elements.
|
|
966
|
+
* @returns {Tag[]}
|
|
967
|
+
*/
|
|
968
|
+
get nextSiblings() {
|
|
969
|
+
try {
|
|
970
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
971
|
+
wasm.tag_nextSiblings(retptr, this.__wbg_ptr);
|
|
972
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
973
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
974
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
975
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
976
|
+
return v1;
|
|
977
|
+
} finally {
|
|
978
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
979
|
+
}
|
|
980
|
+
}
|
|
613
981
|
/**
|
|
614
982
|
* Get the outer HTML (including this element's tags).
|
|
615
983
|
* @returns {string}
|
|
@@ -638,6 +1006,23 @@ export class Tag {
|
|
|
638
1006
|
const ret = wasm.tag_parent(this.__wbg_ptr);
|
|
639
1007
|
return ret === 0 ? undefined : Tag.__wrap(ret);
|
|
640
1008
|
}
|
|
1009
|
+
/**
|
|
1010
|
+
* Get all ancestor elements (from parent toward root).
|
|
1011
|
+
* @returns {Tag[]}
|
|
1012
|
+
*/
|
|
1013
|
+
get parents() {
|
|
1014
|
+
try {
|
|
1015
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1016
|
+
wasm.tag_ancestors(retptr, this.__wbg_ptr);
|
|
1017
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1018
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1019
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1020
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1021
|
+
return v1;
|
|
1022
|
+
} finally {
|
|
1023
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
641
1026
|
/**
|
|
642
1027
|
* Get the previous sibling element.
|
|
643
1028
|
* @returns {Tag | undefined}
|
|
@@ -646,6 +1031,23 @@ export class Tag {
|
|
|
646
1031
|
const ret = wasm.tag_prevSibling(this.__wbg_ptr);
|
|
647
1032
|
return ret === 0 ? undefined : Tag.__wrap(ret);
|
|
648
1033
|
}
|
|
1034
|
+
/**
|
|
1035
|
+
* Get all preceding sibling elements (in reverse order).
|
|
1036
|
+
* @returns {Tag[]}
|
|
1037
|
+
*/
|
|
1038
|
+
get prevSiblings() {
|
|
1039
|
+
try {
|
|
1040
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1041
|
+
wasm.tag_prevSiblings(retptr, this.__wbg_ptr);
|
|
1042
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1043
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1044
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1045
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1046
|
+
return v1;
|
|
1047
|
+
} finally {
|
|
1048
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
649
1051
|
/**
|
|
650
1052
|
* Find all descendants matching a CSS selector (alias for findAll).
|
|
651
1053
|
*
|
|
@@ -674,6 +1076,123 @@ export class Tag {
|
|
|
674
1076
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
675
1077
|
}
|
|
676
1078
|
}
|
|
1079
|
+
/**
|
|
1080
|
+
* Extract attribute values from all descendants matching a selector.
|
|
1081
|
+
*
|
|
1082
|
+
* @param selector - CSS selector string
|
|
1083
|
+
* @param attr - Attribute name to extract
|
|
1084
|
+
* @returns Array of attribute values (undefined if attribute is missing)
|
|
1085
|
+
* @throws Error if the selector syntax is invalid
|
|
1086
|
+
*
|
|
1087
|
+
* @example
|
|
1088
|
+
* ```javascript
|
|
1089
|
+
* const soup = new Soup("<div><a href='/a'>A</a><a href='/b'>B</a></div>");
|
|
1090
|
+
* const div = soup.find("div");
|
|
1091
|
+
* const hrefs = div.selectAttr("a", "href");
|
|
1092
|
+
* // hrefs: ["/a", "/b"]
|
|
1093
|
+
* ```
|
|
1094
|
+
* @param {string} selector
|
|
1095
|
+
* @param {string} attr
|
|
1096
|
+
* @returns {any[]}
|
|
1097
|
+
*/
|
|
1098
|
+
selectAttr(selector, attr) {
|
|
1099
|
+
try {
|
|
1100
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1101
|
+
const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
1102
|
+
const len0 = WASM_VECTOR_LEN;
|
|
1103
|
+
const ptr1 = passStringToWasm0(attr, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
1104
|
+
const len1 = WASM_VECTOR_LEN;
|
|
1105
|
+
wasm.tag_selectAttr(retptr, this.__wbg_ptr, ptr0, len0, ptr1, len1);
|
|
1106
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1107
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1108
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
1109
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
1110
|
+
if (r3) {
|
|
1111
|
+
throw takeObject(r2);
|
|
1112
|
+
}
|
|
1113
|
+
var v3 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1114
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1115
|
+
return v3;
|
|
1116
|
+
} finally {
|
|
1117
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
/**
|
|
1121
|
+
* Find all descendants matching a compiled selector.
|
|
1122
|
+
*
|
|
1123
|
+
* @param selector - A compiled CSS selector
|
|
1124
|
+
* @returns Array of matching Tag instances
|
|
1125
|
+
* @param {CompiledSelector} selector
|
|
1126
|
+
* @returns {Tag[]}
|
|
1127
|
+
*/
|
|
1128
|
+
selectCompiled(selector) {
|
|
1129
|
+
try {
|
|
1130
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1131
|
+
_assertClass(selector, CompiledSelector);
|
|
1132
|
+
wasm.tag_selectCompiled(retptr, this.__wbg_ptr, selector.__wbg_ptr);
|
|
1133
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1134
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1135
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1136
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1137
|
+
return v1;
|
|
1138
|
+
} finally {
|
|
1139
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
/**
|
|
1143
|
+
* Extract text content from all descendants matching a selector.
|
|
1144
|
+
*
|
|
1145
|
+
* @param selector - CSS selector string
|
|
1146
|
+
* @returns Array of text content strings
|
|
1147
|
+
* @throws Error if the selector syntax is invalid
|
|
1148
|
+
*
|
|
1149
|
+
* @example
|
|
1150
|
+
* ```javascript
|
|
1151
|
+
* const soup = new Soup("<div><p>A</p><p>B</p></div>");
|
|
1152
|
+
* const div = soup.find("div");
|
|
1153
|
+
* const texts = div.selectText("p");
|
|
1154
|
+
* // texts: ["A", "B"]
|
|
1155
|
+
* ```
|
|
1156
|
+
* @param {string} selector
|
|
1157
|
+
* @returns {string[]}
|
|
1158
|
+
*/
|
|
1159
|
+
selectText(selector) {
|
|
1160
|
+
try {
|
|
1161
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1162
|
+
const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
1163
|
+
const len0 = WASM_VECTOR_LEN;
|
|
1164
|
+
wasm.tag_selectText(retptr, this.__wbg_ptr, ptr0, len0);
|
|
1165
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1166
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1167
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
1168
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
1169
|
+
if (r3) {
|
|
1170
|
+
throw takeObject(r2);
|
|
1171
|
+
}
|
|
1172
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1173
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1174
|
+
return v2;
|
|
1175
|
+
} finally {
|
|
1176
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
/**
|
|
1180
|
+
* Get all sibling elements (excluding self, in document order).
|
|
1181
|
+
* @returns {Tag[]}
|
|
1182
|
+
*/
|
|
1183
|
+
get siblings() {
|
|
1184
|
+
try {
|
|
1185
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1186
|
+
wasm.tag_siblings(retptr, this.__wbg_ptr);
|
|
1187
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1188
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1189
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1190
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1191
|
+
return v1;
|
|
1192
|
+
} finally {
|
|
1193
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
677
1196
|
/**
|
|
678
1197
|
* Get the text content of this element and all descendants.
|
|
679
1198
|
* @returns {string}
|
|
@@ -694,6 +1213,33 @@ export class Tag {
|
|
|
694
1213
|
wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
|
|
695
1214
|
}
|
|
696
1215
|
}
|
|
1216
|
+
/**
|
|
1217
|
+
* Get all direct text nodes (excluding descendants).
|
|
1218
|
+
*
|
|
1219
|
+
* @returns Array of text content strings
|
|
1220
|
+
*
|
|
1221
|
+
* @example
|
|
1222
|
+
* ```javascript
|
|
1223
|
+
* const soup = new Soup("<div>Text1<span>Inner</span>Text2</div>");
|
|
1224
|
+
* const div = soup.find("div");
|
|
1225
|
+
* const texts = div.textNodes;
|
|
1226
|
+
* // texts: ["Text1", "Text2"]
|
|
1227
|
+
* ```
|
|
1228
|
+
* @returns {string[]}
|
|
1229
|
+
*/
|
|
1230
|
+
get textNodes() {
|
|
1231
|
+
try {
|
|
1232
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
1233
|
+
wasm.tag_textNodes(retptr, this.__wbg_ptr);
|
|
1234
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
1235
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
1236
|
+
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
1237
|
+
wasm.__wbindgen_export3(r0, r1 * 4, 4);
|
|
1238
|
+
return v1;
|
|
1239
|
+
} finally {
|
|
1240
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
697
1243
|
}
|
|
698
1244
|
if (Symbol.dispose) Tag.prototype[Symbol.dispose] = Tag.prototype.free;
|
|
699
1245
|
|
|
@@ -850,6 +1396,9 @@ function __wbg_get_imports() {
|
|
|
850
1396
|
};
|
|
851
1397
|
}
|
|
852
1398
|
|
|
1399
|
+
const CompiledSelectorFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
1400
|
+
? { register: () => {}, unregister: () => {} }
|
|
1401
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_compiledselector_free(ptr >>> 0, 1));
|
|
853
1402
|
const SoupFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
854
1403
|
? { register: () => {}, unregister: () => {} }
|
|
855
1404
|
: new FinalizationRegistry(ptr => wasm.__wbg_soup_free(ptr >>> 0, 1));
|
package/scrape_wasm_bg.wasm
CHANGED
|
Binary file
|