@fast-scrape/wasm 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -117,13 +117,15 @@ function extractLinks(soup: Soup): string[] {
117
117
 
118
118
  ## Bundle size
119
119
 
120
+ v0.2.0 optimization brings package to under 500 KB:
121
+
120
122
  | Build | Size |
121
123
  |-------|------|
122
124
  | Minified + gzip | ~150 KB |
123
125
  | Minified | ~400 KB |
124
126
 
125
127
  > [!TIP]
126
- > SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+.
128
+ > SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+. v0.2.0 includes zero-copy serialization for 50-70% memory savings in HTML extraction.
127
129
 
128
130
  ## Browser support
129
131
 
@@ -134,6 +136,10 @@ function extractLinks(soup: Soup): string[] {
134
136
  | Safari | 13+ | 16.4+ |
135
137
  | Edge | 80+ | 91+ |
136
138
 
139
+ ## Built on Servo
140
+
141
+ Powered by battle-tested libraries from the [Servo](https://servo.org/) browser engine: [html5ever](https://crates.io/crates/html5ever) (HTML5 parser) and [selectors](https://crates.io/crates/selectors) (CSS selector engine).
142
+
137
143
  ## Related packages
138
144
 
139
145
  | Platform | Package |
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "scrape-rs contributors"
6
6
  ],
7
7
  "description": "WebAssembly bindings for scrape-rs HTML parsing library",
8
- "version": "0.1.5",
8
+ "version": "0.2.0",
9
9
  "license": "MIT OR Apache-2.0",
10
10
  "repository": {
11
11
  "type": "git",
package/scrape_wasm.d.ts CHANGED
@@ -1,6 +1,43 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
 
4
+ /**
5
+ * A pre-compiled CSS selector for efficient repeated matching.
6
+ *
7
+ * Compiled selectors avoid the overhead of parsing the selector string on each query.
8
+ *
9
+ * @example
10
+ * ```javascript
11
+ * import init, { CompiledSelector, Soup } from '@scrape-rs/wasm';
12
+ *
13
+ * await init();
14
+ *
15
+ * const selector = CompiledSelector.compile("div.item");
16
+ * const soup = new Soup("<div class='item'>A</div><div class='item'>B</div>");
17
+ * const items = soup.selectCompiled(selector);
18
+ * console.log(items.length); // 2
19
+ * ```
20
+ */
21
+ export class CompiledSelector {
22
+ private constructor();
23
+ free(): void;
24
+ [Symbol.dispose](): void;
25
+ /**
26
+ * Compile a CSS selector string.
27
+ *
28
+ * @param selector - The CSS selector to compile
29
+ * @returns A compiled selector
30
+ * @throws Error if the selector syntax is invalid
31
+ */
32
+ static compile(selector: string): CompiledSelector;
33
+ /**
34
+ * Get the original selector string.
35
+ *
36
+ * @returns The selector string that was compiled
37
+ */
38
+ readonly source: string;
39
+ }
40
+
4
41
  /**
5
42
  * A parsed HTML document.
6
43
  *
@@ -37,6 +74,13 @@ export class Soup {
37
74
  * @throws Error if the selector syntax is invalid
38
75
  */
39
76
  findAll(selector: string): Tag[];
77
+ /**
78
+ * Find the first element matching a compiled selector.
79
+ *
80
+ * @param selector - A compiled CSS selector
81
+ * @returns The first matching Tag, or undefined if not found
82
+ */
83
+ findCompiled(selector: CompiledSelector): Tag | undefined;
40
84
  /**
41
85
  * Parses an HTML string into a Soup document.
42
86
  *
@@ -44,6 +88,26 @@ export class Soup {
44
88
  * @param config - Optional configuration options
45
89
  */
46
90
  constructor(html: string, config?: SoupConfig | null);
91
+ /**
92
+ * Parse an HTML fragment without html/body wrapper.
93
+ *
94
+ * @param html - HTML fragment string to parse
95
+ * @param context - Optional context element name (default: "body")
96
+ * @param config - Optional parsing configuration
97
+ * @returns A new Soup instance containing the fragment
98
+ *
99
+ * @example
100
+ * ```javascript
101
+ * // Parse without wrapper
102
+ * const soup = Soup.parseFragment("<div>A</div><div>B</div>");
103
+ * const divs = soup.findAll("div");
104
+ * console.log(divs.length); // 2
105
+ *
106
+ * // Parse with td context
107
+ * const tdSoup = Soup.parseFragment("<td>Cell</td>", "tr");
108
+ * ```
109
+ */
110
+ static parseFragment(html: string, context?: string | null, config?: SoupConfig | null): Soup;
47
111
  /**
48
112
  * Finds all elements matching a CSS selector (alias for findAll).
49
113
  *
@@ -51,6 +115,44 @@ export class Soup {
51
115
  * @returns Array of matching Tag instances
52
116
  */
53
117
  select(selector: string): Tag[];
118
+ /**
119
+ * Extract attribute values from all elements matching a selector.
120
+ *
121
+ * @param selector - CSS selector string
122
+ * @param attr - Attribute name to extract
123
+ * @returns Array of attribute values (undefined if attribute is missing)
124
+ * @throws Error if the selector syntax is invalid
125
+ *
126
+ * @example
127
+ * ```javascript
128
+ * const soup = new Soup("<a href='/a'>A</a><a href='/b'>B</a><a>C</a>");
129
+ * const hrefs = soup.selectAttr("a", "href");
130
+ * // hrefs: ["/a", "/b", undefined]
131
+ * ```
132
+ */
133
+ selectAttr(selector: string, attr: string): any[];
134
+ /**
135
+ * Find all elements matching a compiled selector.
136
+ *
137
+ * @param selector - A compiled CSS selector
138
+ * @returns Array of matching Tag instances
139
+ */
140
+ selectCompiled(selector: CompiledSelector): Tag[];
141
+ /**
142
+ * Extract text content from all elements matching a selector.
143
+ *
144
+ * @param selector - CSS selector string
145
+ * @returns Array of text content strings
146
+ * @throws Error if the selector syntax is invalid
147
+ *
148
+ * @example
149
+ * ```javascript
150
+ * const soup = new Soup("<div>A</div><div>B</div>");
151
+ * const texts = soup.selectText("div");
152
+ * // texts: ["A", "B"]
153
+ * ```
154
+ */
155
+ selectText(selector: string): string[];
54
156
  /**
55
157
  * Get the HTML representation of the document.
56
158
  *
@@ -150,6 +252,44 @@ export class Tag {
150
252
  * @returns The attribute value, or undefined if not present
151
253
  */
152
254
  attr(name: string): string | undefined;
255
+ /**
256
+ * Get all direct child elements with a specific class.
257
+ *
258
+ * @param className - The class name to filter by
259
+ * @returns Array of matching child Tag instances
260
+ *
261
+ * @example
262
+ * ```javascript
263
+ * const soup = new Soup("<div><p class='item'>A</p><span>B</span><p class='item'>C</p></div>");
264
+ * const div = soup.find("div");
265
+ * const items = div.childrenByClass("item");
266
+ * // items.length: 2
267
+ * ```
268
+ */
269
+ childrenByClass(class_name: string): Tag[];
270
+ /**
271
+ * Get all direct child elements with a specific tag name.
272
+ *
273
+ * @param name - The tag name to filter by
274
+ * @returns Array of matching child Tag instances
275
+ *
276
+ * @example
277
+ * ```javascript
278
+ * const soup = new Soup("<div><p>A</p><span>B</span><p>C</p></div>");
279
+ * const div = soup.find("div");
280
+ * const paras = div.childrenByName("p");
281
+ * // paras.length: 2
282
+ * ```
283
+ */
284
+ childrenByName(name: string): Tag[];
285
+ /**
286
+ * Find the nearest ancestor matching a CSS selector.
287
+ *
288
+ * @param selector - CSS selector string
289
+ * @returns The nearest matching ancestor Tag, or undefined if not found
290
+ * @throws Error if the selector syntax is invalid
291
+ */
292
+ closest(selector: string): Tag | undefined;
153
293
  /**
154
294
  * Find the first descendant matching a CSS selector.
155
295
  *
@@ -166,6 +306,13 @@ export class Tag {
166
306
  * @throws Error if the selector syntax is invalid
167
307
  */
168
308
  findAll(selector: string): Tag[];
309
+ /**
310
+ * Find the first descendant matching a compiled selector.
311
+ *
312
+ * @param selector - A compiled CSS selector
313
+ * @returns The first matching Tag, or undefined if not found
314
+ */
315
+ findCompiled(selector: CompiledSelector): Tag | undefined;
169
316
  /**
170
317
  * Get an attribute value by name.
171
318
  *
@@ -194,6 +341,50 @@ export class Tag {
194
341
  * @returns Array of matching Tag instances
195
342
  */
196
343
  select(selector: string): Tag[];
344
+ /**
345
+ * Extract attribute values from all descendants matching a selector.
346
+ *
347
+ * @param selector - CSS selector string
348
+ * @param attr - Attribute name to extract
349
+ * @returns Array of attribute values (undefined if attribute is missing)
350
+ * @throws Error if the selector syntax is invalid
351
+ *
352
+ * @example
353
+ * ```javascript
354
+ * const soup = new Soup("<div><a href='/a'>A</a><a href='/b'>B</a></div>");
355
+ * const div = soup.find("div");
356
+ * const hrefs = div.selectAttr("a", "href");
357
+ * // hrefs: ["/a", "/b"]
358
+ * ```
359
+ */
360
+ selectAttr(selector: string, attr: string): any[];
361
+ /**
362
+ * Find all descendants matching a compiled selector.
363
+ *
364
+ * @param selector - A compiled CSS selector
365
+ * @returns Array of matching Tag instances
366
+ */
367
+ selectCompiled(selector: CompiledSelector): Tag[];
368
+ /**
369
+ * Extract text content from all descendants matching a selector.
370
+ *
371
+ * @param selector - CSS selector string
372
+ * @returns Array of text content strings
373
+ * @throws Error if the selector syntax is invalid
374
+ *
375
+ * @example
376
+ * ```javascript
377
+ * const soup = new Soup("<div><p>A</p><p>B</p></div>");
378
+ * const div = soup.find("div");
379
+ * const texts = div.selectText("p");
380
+ * // texts: ["A", "B"]
381
+ * ```
382
+ */
383
+ selectText(selector: string): string[];
384
+ /**
385
+ * Get all ancestor elements (alias for parents).
386
+ */
387
+ readonly ancestors: Tag[];
197
388
  /**
198
389
  * Get all attributes as an object.
199
390
  */
@@ -226,6 +417,10 @@ export class Tag {
226
417
  * Get the next sibling element.
227
418
  */
228
419
  readonly nextSibling: Tag | undefined;
420
+ /**
421
+ * Get all following sibling elements.
422
+ */
423
+ readonly nextSiblings: Tag[];
229
424
  /**
230
425
  * Get the outer HTML (including this element's tags).
231
426
  */
@@ -234,14 +429,40 @@ export class Tag {
234
429
  * Get the parent element.
235
430
  */
236
431
  readonly parent: Tag | undefined;
432
+ /**
433
+ * Get all ancestor elements (from parent toward root).
434
+ */
435
+ readonly parents: Tag[];
237
436
  /**
238
437
  * Get the previous sibling element.
239
438
  */
240
439
  readonly prevSibling: Tag | undefined;
440
+ /**
441
+ * Get all preceding sibling elements (in reverse order).
442
+ */
443
+ readonly prevSiblings: Tag[];
444
+ /**
445
+ * Get all sibling elements (excluding self, in document order).
446
+ */
447
+ readonly siblings: Tag[];
241
448
  /**
242
449
  * Get the text content of this element and all descendants.
243
450
  */
244
451
  readonly text: string;
452
+ /**
453
+ * Get all direct text nodes (excluding descendants).
454
+ *
455
+ * @returns Array of text content strings
456
+ *
457
+ * @example
458
+ * ```javascript
459
+ * const soup = new Soup("<div>Text1<span>Inner</span>Text2</div>");
460
+ * const div = soup.find("div");
461
+ * const texts = div.textNodes;
462
+ * // texts: ["Text1", "Text2"]
463
+ * ```
464
+ */
465
+ readonly textNodes: string[];
245
466
  }
246
467
 
247
468
  /**
@@ -292,16 +513,24 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
292
513
 
293
514
  export interface InitOutput {
294
515
  readonly memory: WebAssembly.Memory;
516
+ readonly __wbg_compiledselector_free: (a: number, b: number) => void;
295
517
  readonly __wbg_soup_free: (a: number, b: number) => void;
296
518
  readonly __wbg_soupconfig_free: (a: number, b: number) => void;
297
519
  readonly __wbg_tag_free: (a: number, b: number) => void;
520
+ readonly compiledselector_compile: (a: number, b: number, c: number) => void;
521
+ readonly compiledselector_source: (a: number, b: number) => void;
298
522
  readonly hasSimdSupport: () => number;
299
523
  readonly parseBatch: (a: number, b: number, c: number) => void;
300
524
  readonly soup_find: (a: number, b: number, c: number, d: number) => void;
301
525
  readonly soup_findAll: (a: number, b: number, c: number, d: number) => void;
526
+ readonly soup_findCompiled: (a: number, b: number) => number;
302
527
  readonly soup_length: (a: number) => number;
303
528
  readonly soup_new: (a: number, b: number, c: number) => number;
529
+ readonly soup_parseFragment: (a: number, b: number, c: number, d: number, e: number) => number;
304
530
  readonly soup_root: (a: number) => number;
531
+ readonly soup_selectAttr: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
532
+ readonly soup_selectCompiled: (a: number, b: number, c: number) => void;
533
+ readonly soup_selectText: (a: number, b: number, c: number, d: number) => void;
305
534
  readonly soup_text: (a: number, b: number) => void;
306
535
  readonly soup_title: (a: number, b: number) => void;
307
536
  readonly soup_toHtml: (a: number, b: number) => void;
@@ -314,13 +543,18 @@ export interface InitOutput {
314
543
  readonly soupconfig_set_preserveWhitespace: (a: number, b: number) => void;
315
544
  readonly soupconfig_set_strictMode: (a: number, b: number) => void;
316
545
  readonly soupconfig_strictMode: (a: number) => number;
546
+ readonly tag_ancestors: (a: number, b: number) => void;
317
547
  readonly tag_attr: (a: number, b: number, c: number, d: number) => void;
318
548
  readonly tag_attrs: (a: number) => number;
319
549
  readonly tag_children: (a: number, b: number) => void;
550
+ readonly tag_childrenByClass: (a: number, b: number, c: number, d: number) => void;
551
+ readonly tag_childrenByName: (a: number, b: number, c: number, d: number) => void;
320
552
  readonly tag_classes: (a: number, b: number) => void;
553
+ readonly tag_closest: (a: number, b: number, c: number, d: number) => void;
321
554
  readonly tag_descendants: (a: number, b: number) => void;
322
555
  readonly tag_find: (a: number, b: number, c: number, d: number) => void;
323
556
  readonly tag_findAll: (a: number, b: number, c: number, d: number) => void;
557
+ readonly tag_findCompiled: (a: number, b: number) => number;
324
558
  readonly tag_get: (a: number, b: number, c: number, d: number) => void;
325
559
  readonly tag_hasAttr: (a: number, b: number, c: number) => number;
326
560
  readonly tag_hasClass: (a: number, b: number, c: number) => number;
@@ -328,14 +562,22 @@ export interface InitOutput {
328
562
  readonly tag_length: (a: number) => number;
329
563
  readonly tag_name: (a: number, b: number) => void;
330
564
  readonly tag_nextSibling: (a: number) => number;
565
+ readonly tag_nextSiblings: (a: number, b: number) => void;
331
566
  readonly tag_outerHTML: (a: number, b: number) => void;
332
567
  readonly tag_parent: (a: number) => number;
333
568
  readonly tag_prevSibling: (a: number) => number;
569
+ readonly tag_prevSiblings: (a: number, b: number) => void;
570
+ readonly tag_selectAttr: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
571
+ readonly tag_selectCompiled: (a: number, b: number, c: number) => void;
572
+ readonly tag_selectText: (a: number, b: number, c: number, d: number) => void;
573
+ readonly tag_siblings: (a: number, b: number) => void;
334
574
  readonly tag_text: (a: number, b: number) => void;
575
+ readonly tag_textNodes: (a: number, b: number) => void;
335
576
  readonly version: (a: number) => void;
336
577
  readonly init: () => void;
337
578
  readonly soup_select: (a: number, b: number, c: number, d: number) => void;
338
579
  readonly tag_select: (a: number, b: number, c: number, d: number) => void;
580
+ readonly tag_parents: (a: number, b: number) => void;
339
581
  readonly __wbindgen_export: (a: number, b: number) => number;
340
582
  readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
341
583
  readonly __wbindgen_export3: (a: number, b: number, c: number) => void;
package/scrape_wasm.js CHANGED
@@ -1,5 +1,91 @@
1
1
  /* @ts-self-types="./scrape_wasm.d.ts" */
2
2
 
3
+ /**
4
+ * A pre-compiled CSS selector for efficient repeated matching.
5
+ *
6
+ * Compiled selectors avoid the overhead of parsing the selector string on each query.
7
+ *
8
+ * @example
9
+ * ```javascript
10
+ * import init, { CompiledSelector, Soup } from '@scrape-rs/wasm';
11
+ *
12
+ * await init();
13
+ *
14
+ * const selector = CompiledSelector.compile("div.item");
15
+ * const soup = new Soup("<div class='item'>A</div><div class='item'>B</div>");
16
+ * const items = soup.selectCompiled(selector);
17
+ * console.log(items.length); // 2
18
+ * ```
19
+ */
20
+ export class CompiledSelector {
21
+ static __wrap(ptr) {
22
+ ptr = ptr >>> 0;
23
+ const obj = Object.create(CompiledSelector.prototype);
24
+ obj.__wbg_ptr = ptr;
25
+ CompiledSelectorFinalization.register(obj, obj.__wbg_ptr, obj);
26
+ return obj;
27
+ }
28
+ __destroy_into_raw() {
29
+ const ptr = this.__wbg_ptr;
30
+ this.__wbg_ptr = 0;
31
+ CompiledSelectorFinalization.unregister(this);
32
+ return ptr;
33
+ }
34
+ free() {
35
+ const ptr = this.__destroy_into_raw();
36
+ wasm.__wbg_compiledselector_free(ptr, 0);
37
+ }
38
+ /**
39
+ * Compile a CSS selector string.
40
+ *
41
+ * @param selector - The CSS selector to compile
42
+ * @returns A compiled selector
43
+ * @throws Error if the selector syntax is invalid
44
+ * @param {string} selector
45
+ * @returns {CompiledSelector}
46
+ */
47
+ static compile(selector) {
48
+ try {
49
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
50
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
51
+ const len0 = WASM_VECTOR_LEN;
52
+ wasm.compiledselector_compile(retptr, ptr0, len0);
53
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
54
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
55
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
56
+ if (r2) {
57
+ throw takeObject(r1);
58
+ }
59
+ return CompiledSelector.__wrap(r0);
60
+ } finally {
61
+ wasm.__wbindgen_add_to_stack_pointer(16);
62
+ }
63
+ }
64
+ /**
65
+ * Get the original selector string.
66
+ *
67
+ * @returns The selector string that was compiled
68
+ * @returns {string}
69
+ */
70
+ get source() {
71
+ let deferred1_0;
72
+ let deferred1_1;
73
+ try {
74
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
75
+ wasm.compiledselector_source(retptr, this.__wbg_ptr);
76
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
77
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
78
+ deferred1_0 = r0;
79
+ deferred1_1 = r1;
80
+ return getStringFromWasm0(r0, r1);
81
+ } finally {
82
+ wasm.__wbindgen_add_to_stack_pointer(16);
83
+ wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
84
+ }
85
+ }
86
+ }
87
+ if (Symbol.dispose) CompiledSelector.prototype[Symbol.dispose] = CompiledSelector.prototype.free;
88
+
3
89
  /**
4
90
  * A parsed HTML document.
5
91
  *
@@ -90,6 +176,19 @@ export class Soup {
90
176
  wasm.__wbindgen_add_to_stack_pointer(16);
91
177
  }
92
178
  }
179
+ /**
180
+ * Find the first element matching a compiled selector.
181
+ *
182
+ * @param selector - A compiled CSS selector
183
+ * @returns The first matching Tag, or undefined if not found
184
+ * @param {CompiledSelector} selector
185
+ * @returns {Tag | undefined}
186
+ */
187
+ findCompiled(selector) {
188
+ _assertClass(selector, CompiledSelector);
189
+ const ret = wasm.soup_findCompiled(this.__wbg_ptr, selector.__wbg_ptr);
190
+ return ret === 0 ? undefined : Tag.__wrap(ret);
191
+ }
93
192
  /**
94
193
  * Get the number of nodes in the document.
95
194
  * @returns {number}
@@ -119,6 +218,42 @@ export class Soup {
119
218
  SoupFinalization.register(this, this.__wbg_ptr, this);
120
219
  return this;
121
220
  }
221
+ /**
222
+ * Parse an HTML fragment without html/body wrapper.
223
+ *
224
+ * @param html - HTML fragment string to parse
225
+ * @param context - Optional context element name (default: "body")
226
+ * @param config - Optional parsing configuration
227
+ * @returns A new Soup instance containing the fragment
228
+ *
229
+ * @example
230
+ * ```javascript
231
+ * // Parse without wrapper
232
+ * const soup = Soup.parseFragment("<div>A</div><div>B</div>");
233
+ * const divs = soup.findAll("div");
234
+ * console.log(divs.length); // 2
235
+ *
236
+ * // Parse with td context
237
+ * const tdSoup = Soup.parseFragment("<td>Cell</td>", "tr");
238
+ * ```
239
+ * @param {string} html
240
+ * @param {string | null} [context]
241
+ * @param {SoupConfig | null} [config]
242
+ * @returns {Soup}
243
+ */
244
+ static parseFragment(html, context, config) {
245
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
246
+ const len0 = WASM_VECTOR_LEN;
247
+ var ptr1 = isLikeNone(context) ? 0 : passStringToWasm0(context, wasm.__wbindgen_export, wasm.__wbindgen_export2);
248
+ var len1 = WASM_VECTOR_LEN;
249
+ let ptr2 = 0;
250
+ if (!isLikeNone(config)) {
251
+ _assertClass(config, SoupConfig);
252
+ ptr2 = config.__destroy_into_raw();
253
+ }
254
+ const ret = wasm.soup_parseFragment(ptr0, len0, ptr1, len1, ptr2);
255
+ return Soup.__wrap(ret);
256
+ }
122
257
  /**
123
258
  * Get the root element of the document.
124
259
  *
@@ -157,6 +292,104 @@ export class Soup {
157
292
  wasm.__wbindgen_add_to_stack_pointer(16);
158
293
  }
159
294
  }
295
+ /**
296
+ * Extract attribute values from all elements matching a selector.
297
+ *
298
+ * @param selector - CSS selector string
299
+ * @param attr - Attribute name to extract
300
+ * @returns Array of attribute values (undefined if attribute is missing)
301
+ * @throws Error if the selector syntax is invalid
302
+ *
303
+ * @example
304
+ * ```javascript
305
+ * const soup = new Soup("<a href='/a'>A</a><a href='/b'>B</a><a>C</a>");
306
+ * const hrefs = soup.selectAttr("a", "href");
307
+ * // hrefs: ["/a", "/b", undefined]
308
+ * ```
309
+ * @param {string} selector
310
+ * @param {string} attr
311
+ * @returns {any[]}
312
+ */
313
+ selectAttr(selector, attr) {
314
+ try {
315
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
316
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
317
+ const len0 = WASM_VECTOR_LEN;
318
+ const ptr1 = passStringToWasm0(attr, wasm.__wbindgen_export, wasm.__wbindgen_export2);
319
+ const len1 = WASM_VECTOR_LEN;
320
+ wasm.soup_selectAttr(retptr, this.__wbg_ptr, ptr0, len0, ptr1, len1);
321
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
322
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
323
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
324
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
325
+ if (r3) {
326
+ throw takeObject(r2);
327
+ }
328
+ var v3 = getArrayJsValueFromWasm0(r0, r1).slice();
329
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
330
+ return v3;
331
+ } finally {
332
+ wasm.__wbindgen_add_to_stack_pointer(16);
333
+ }
334
+ }
335
+ /**
336
+ * Find all elements matching a compiled selector.
337
+ *
338
+ * @param selector - A compiled CSS selector
339
+ * @returns Array of matching Tag instances
340
+ * @param {CompiledSelector} selector
341
+ * @returns {Tag[]}
342
+ */
343
+ selectCompiled(selector) {
344
+ try {
345
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
346
+ _assertClass(selector, CompiledSelector);
347
+ wasm.soup_selectCompiled(retptr, this.__wbg_ptr, selector.__wbg_ptr);
348
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
349
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
350
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
351
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
352
+ return v1;
353
+ } finally {
354
+ wasm.__wbindgen_add_to_stack_pointer(16);
355
+ }
356
+ }
357
+ /**
358
+ * Extract text content from all elements matching a selector.
359
+ *
360
+ * @param selector - CSS selector string
361
+ * @returns Array of text content strings
362
+ * @throws Error if the selector syntax is invalid
363
+ *
364
+ * @example
365
+ * ```javascript
366
+ * const soup = new Soup("<div>A</div><div>B</div>");
367
+ * const texts = soup.selectText("div");
368
+ * // texts: ["A", "B"]
369
+ * ```
370
+ * @param {string} selector
371
+ * @returns {string[]}
372
+ */
373
+ selectText(selector) {
374
+ try {
375
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
376
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
377
+ const len0 = WASM_VECTOR_LEN;
378
+ wasm.soup_selectText(retptr, this.__wbg_ptr, ptr0, len0);
379
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
380
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
381
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
382
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
383
+ if (r3) {
384
+ throw takeObject(r2);
385
+ }
386
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
387
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
388
+ return v2;
389
+ } finally {
390
+ wasm.__wbindgen_add_to_stack_pointer(16);
391
+ }
392
+ }
160
393
  /**
161
394
  * Get the text content of the entire document.
162
395
  *
@@ -360,6 +593,23 @@ export class Tag {
360
593
  const ptr = this.__destroy_into_raw();
361
594
  wasm.__wbg_tag_free(ptr, 0);
362
595
  }
596
+ /**
597
+ * Get all ancestor elements (alias for parents).
598
+ * @returns {Tag[]}
599
+ */
600
+ get ancestors() {
601
+ try {
602
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
603
+ wasm.tag_ancestors(retptr, this.__wbg_ptr);
604
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
605
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
606
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
607
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
608
+ return v1;
609
+ } finally {
610
+ wasm.__wbindgen_add_to_stack_pointer(16);
611
+ }
612
+ }
363
613
  /**
364
614
  * Get an attribute value by name (alias for get).
365
615
  *
@@ -411,6 +661,68 @@ export class Tag {
411
661
  wasm.__wbindgen_add_to_stack_pointer(16);
412
662
  }
413
663
  }
664
+ /**
665
+ * Get all direct child elements with a specific class.
666
+ *
667
+ * @param className - The class name to filter by
668
+ * @returns Array of matching child Tag instances
669
+ *
670
+ * @example
671
+ * ```javascript
672
+ * const soup = new Soup("<div><p class='item'>A</p><span>B</span><p class='item'>C</p></div>");
673
+ * const div = soup.find("div");
674
+ * const items = div.childrenByClass("item");
675
+ * // items.length: 2
676
+ * ```
677
+ * @param {string} class_name
678
+ * @returns {Tag[]}
679
+ */
680
+ childrenByClass(class_name) {
681
+ try {
682
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
683
+ const ptr0 = passStringToWasm0(class_name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
684
+ const len0 = WASM_VECTOR_LEN;
685
+ wasm.tag_childrenByClass(retptr, this.__wbg_ptr, ptr0, len0);
686
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
687
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
688
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
689
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
690
+ return v2;
691
+ } finally {
692
+ wasm.__wbindgen_add_to_stack_pointer(16);
693
+ }
694
+ }
695
+ /**
696
+ * Get all direct child elements with a specific tag name.
697
+ *
698
+ * @param name - The tag name to filter by
699
+ * @returns Array of matching child Tag instances
700
+ *
701
+ * @example
702
+ * ```javascript
703
+ * const soup = new Soup("<div><p>A</p><span>B</span><p>C</p></div>");
704
+ * const div = soup.find("div");
705
+ * const paras = div.childrenByName("p");
706
+ * // paras.length: 2
707
+ * ```
708
+ * @param {string} name
709
+ * @returns {Tag[]}
710
+ */
711
+ childrenByName(name) {
712
+ try {
713
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
714
+ const ptr0 = passStringToWasm0(name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
715
+ const len0 = WASM_VECTOR_LEN;
716
+ wasm.tag_childrenByName(retptr, this.__wbg_ptr, ptr0, len0);
717
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
718
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
719
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
720
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
721
+ return v2;
722
+ } finally {
723
+ wasm.__wbindgen_add_to_stack_pointer(16);
724
+ }
725
+ }
414
726
  /**
415
727
  * Get all classes as an array.
416
728
  * @returns {string[]}
@@ -428,6 +740,32 @@ export class Tag {
428
740
  wasm.__wbindgen_add_to_stack_pointer(16);
429
741
  }
430
742
  }
743
+ /**
744
+ * Find the nearest ancestor matching a CSS selector.
745
+ *
746
+ * @param selector - CSS selector string
747
+ * @returns The nearest matching ancestor Tag, or undefined if not found
748
+ * @throws Error if the selector syntax is invalid
749
+ * @param {string} selector
750
+ * @returns {Tag | undefined}
751
+ */
752
+ closest(selector) {
753
+ try {
754
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
755
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
756
+ const len0 = WASM_VECTOR_LEN;
757
+ wasm.tag_closest(retptr, this.__wbg_ptr, ptr0, len0);
758
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
759
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
760
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
761
+ if (r2) {
762
+ throw takeObject(r1);
763
+ }
764
+ return r0 === 0 ? undefined : Tag.__wrap(r0);
765
+ } finally {
766
+ wasm.__wbindgen_add_to_stack_pointer(16);
767
+ }
768
+ }
431
769
  /**
432
770
  * Get all descendant elements.
433
771
  * @returns {Tag[]}
@@ -500,6 +838,19 @@ export class Tag {
500
838
  wasm.__wbindgen_add_to_stack_pointer(16);
501
839
  }
502
840
  }
841
+ /**
842
+ * Find the first descendant matching a compiled selector.
843
+ *
844
+ * @param selector - A compiled CSS selector
845
+ * @returns The first matching Tag, or undefined if not found
846
+ * @param {CompiledSelector} selector
847
+ * @returns {Tag | undefined}
848
+ */
849
+ findCompiled(selector) {
850
+ _assertClass(selector, CompiledSelector);
851
+ const ret = wasm.tag_findCompiled(this.__wbg_ptr, selector.__wbg_ptr);
852
+ return ret === 0 ? undefined : Tag.__wrap(ret);
853
+ }
503
854
  /**
504
855
  * Get an attribute value by name.
505
856
  *
@@ -610,6 +961,23 @@ export class Tag {
610
961
  const ret = wasm.tag_nextSibling(this.__wbg_ptr);
611
962
  return ret === 0 ? undefined : Tag.__wrap(ret);
612
963
  }
964
+ /**
965
+ * Get all following sibling elements.
966
+ * @returns {Tag[]}
967
+ */
968
+ get nextSiblings() {
969
+ try {
970
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
971
+ wasm.tag_nextSiblings(retptr, this.__wbg_ptr);
972
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
973
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
974
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
975
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
976
+ return v1;
977
+ } finally {
978
+ wasm.__wbindgen_add_to_stack_pointer(16);
979
+ }
980
+ }
613
981
  /**
614
982
  * Get the outer HTML (including this element's tags).
615
983
  * @returns {string}
@@ -638,6 +1006,23 @@ export class Tag {
638
1006
  const ret = wasm.tag_parent(this.__wbg_ptr);
639
1007
  return ret === 0 ? undefined : Tag.__wrap(ret);
640
1008
  }
1009
+ /**
1010
+ * Get all ancestor elements (from parent toward root).
1011
+ * @returns {Tag[]}
1012
+ */
1013
+ get parents() {
1014
+ try {
1015
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1016
+ wasm.tag_ancestors(retptr, this.__wbg_ptr);
1017
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1018
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1019
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
1020
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1021
+ return v1;
1022
+ } finally {
1023
+ wasm.__wbindgen_add_to_stack_pointer(16);
1024
+ }
1025
+ }
641
1026
  /**
642
1027
  * Get the previous sibling element.
643
1028
  * @returns {Tag | undefined}
@@ -646,6 +1031,23 @@ export class Tag {
646
1031
  const ret = wasm.tag_prevSibling(this.__wbg_ptr);
647
1032
  return ret === 0 ? undefined : Tag.__wrap(ret);
648
1033
  }
1034
+ /**
1035
+ * Get all preceding sibling elements (in reverse order).
1036
+ * @returns {Tag[]}
1037
+ */
1038
+ get prevSiblings() {
1039
+ try {
1040
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1041
+ wasm.tag_prevSiblings(retptr, this.__wbg_ptr);
1042
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1043
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1044
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
1045
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1046
+ return v1;
1047
+ } finally {
1048
+ wasm.__wbindgen_add_to_stack_pointer(16);
1049
+ }
1050
+ }
649
1051
  /**
650
1052
  * Find all descendants matching a CSS selector (alias for findAll).
651
1053
  *
@@ -674,6 +1076,123 @@ export class Tag {
674
1076
  wasm.__wbindgen_add_to_stack_pointer(16);
675
1077
  }
676
1078
  }
1079
+ /**
1080
+ * Extract attribute values from all descendants matching a selector.
1081
+ *
1082
+ * @param selector - CSS selector string
1083
+ * @param attr - Attribute name to extract
1084
+ * @returns Array of attribute values (undefined if attribute is missing)
1085
+ * @throws Error if the selector syntax is invalid
1086
+ *
1087
+ * @example
1088
+ * ```javascript
1089
+ * const soup = new Soup("<div><a href='/a'>A</a><a href='/b'>B</a></div>");
1090
+ * const div = soup.find("div");
1091
+ * const hrefs = div.selectAttr("a", "href");
1092
+ * // hrefs: ["/a", "/b"]
1093
+ * ```
1094
+ * @param {string} selector
1095
+ * @param {string} attr
1096
+ * @returns {any[]}
1097
+ */
1098
+ selectAttr(selector, attr) {
1099
+ try {
1100
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1101
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
1102
+ const len0 = WASM_VECTOR_LEN;
1103
+ const ptr1 = passStringToWasm0(attr, wasm.__wbindgen_export, wasm.__wbindgen_export2);
1104
+ const len1 = WASM_VECTOR_LEN;
1105
+ wasm.tag_selectAttr(retptr, this.__wbg_ptr, ptr0, len0, ptr1, len1);
1106
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1107
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1108
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
1109
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
1110
+ if (r3) {
1111
+ throw takeObject(r2);
1112
+ }
1113
+ var v3 = getArrayJsValueFromWasm0(r0, r1).slice();
1114
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1115
+ return v3;
1116
+ } finally {
1117
+ wasm.__wbindgen_add_to_stack_pointer(16);
1118
+ }
1119
+ }
1120
+ /**
1121
+ * Find all descendants matching a compiled selector.
1122
+ *
1123
+ * @param selector - A compiled CSS selector
1124
+ * @returns Array of matching Tag instances
1125
+ * @param {CompiledSelector} selector
1126
+ * @returns {Tag[]}
1127
+ */
1128
+ selectCompiled(selector) {
1129
+ try {
1130
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1131
+ _assertClass(selector, CompiledSelector);
1132
+ wasm.tag_selectCompiled(retptr, this.__wbg_ptr, selector.__wbg_ptr);
1133
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1134
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1135
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
1136
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1137
+ return v1;
1138
+ } finally {
1139
+ wasm.__wbindgen_add_to_stack_pointer(16);
1140
+ }
1141
+ }
1142
+ /**
1143
+ * Extract text content from all descendants matching a selector.
1144
+ *
1145
+ * @param selector - CSS selector string
1146
+ * @returns Array of text content strings
1147
+ * @throws Error if the selector syntax is invalid
1148
+ *
1149
+ * @example
1150
+ * ```javascript
1151
+ * const soup = new Soup("<div><p>A</p><p>B</p></div>");
1152
+ * const div = soup.find("div");
1153
+ * const texts = div.selectText("p");
1154
+ * // texts: ["A", "B"]
1155
+ * ```
1156
+ * @param {string} selector
1157
+ * @returns {string[]}
1158
+ */
1159
+ selectText(selector) {
1160
+ try {
1161
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1162
+ const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
1163
+ const len0 = WASM_VECTOR_LEN;
1164
+ wasm.tag_selectText(retptr, this.__wbg_ptr, ptr0, len0);
1165
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1166
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1167
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
1168
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
1169
+ if (r3) {
1170
+ throw takeObject(r2);
1171
+ }
1172
+ var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
1173
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1174
+ return v2;
1175
+ } finally {
1176
+ wasm.__wbindgen_add_to_stack_pointer(16);
1177
+ }
1178
+ }
1179
+ /**
1180
+ * Get all sibling elements (excluding self, in document order).
1181
+ * @returns {Tag[]}
1182
+ */
1183
+ get siblings() {
1184
+ try {
1185
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1186
+ wasm.tag_siblings(retptr, this.__wbg_ptr);
1187
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1188
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1189
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
1190
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1191
+ return v1;
1192
+ } finally {
1193
+ wasm.__wbindgen_add_to_stack_pointer(16);
1194
+ }
1195
+ }
677
1196
  /**
678
1197
  * Get the text content of this element and all descendants.
679
1198
  * @returns {string}
@@ -694,6 +1213,33 @@ export class Tag {
694
1213
  wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1);
695
1214
  }
696
1215
  }
1216
+ /**
1217
+ * Get all direct text nodes (excluding descendants).
1218
+ *
1219
+ * @returns Array of text content strings
1220
+ *
1221
+ * @example
1222
+ * ```javascript
1223
+ * const soup = new Soup("<div>Text1<span>Inner</span>Text2</div>");
1224
+ * const div = soup.find("div");
1225
+ * const texts = div.textNodes;
1226
+ * // texts: ["Text1", "Text2"]
1227
+ * ```
1228
+ * @returns {string[]}
1229
+ */
1230
+ get textNodes() {
1231
+ try {
1232
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
1233
+ wasm.tag_textNodes(retptr, this.__wbg_ptr);
1234
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
1235
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
1236
+ var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
1237
+ wasm.__wbindgen_export3(r0, r1 * 4, 4);
1238
+ return v1;
1239
+ } finally {
1240
+ wasm.__wbindgen_add_to_stack_pointer(16);
1241
+ }
1242
+ }
697
1243
  }
698
1244
  if (Symbol.dispose) Tag.prototype[Symbol.dispose] = Tag.prototype.free;
699
1245
 
@@ -850,6 +1396,9 @@ function __wbg_get_imports() {
850
1396
  };
851
1397
  }
852
1398
 
1399
+ const CompiledSelectorFinalization = (typeof FinalizationRegistry === 'undefined')
1400
+ ? { register: () => {}, unregister: () => {} }
1401
+ : new FinalizationRegistry(ptr => wasm.__wbg_compiledselector_free(ptr >>> 0, 1));
853
1402
  const SoupFinalization = (typeof FinalizationRegistry === 'undefined')
854
1403
  ? { register: () => {}, unregister: () => {} }
855
1404
  : new FinalizationRegistry(ptr => wasm.__wbg_soup_free(ptr >>> 0, 1));
Binary file