@fast-scrape/wasm 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,54 +1,50 @@
1
- # @scrape-rs/wasm
1
+ # @fast-scrape/wasm
2
2
 
3
- [![npm](https://img.shields.io/npm/v/@scrape-rs/wasm)](https://www.npmjs.com/package/@scrape-rs/wasm)
4
- [![Bundle Size](https://img.shields.io/bundlephobia/minzip/@scrape-rs/wasm)](https://bundlephobia.com/package/@scrape-rs/wasm)
3
+ [![npm](https://img.shields.io/npm/v/@fast-scrape/wasm)](https://www.npmjs.com/package/@fast-scrape/wasm)
4
+ [![Bundle Size](https://img.shields.io/bundlephobia/minzip/@fast-scrape/wasm)](https://bundlephobia.com/package/@fast-scrape/wasm)
5
5
  [![TypeScript](https://img.shields.io/badge/TypeScript-Ready-blue)](https://www.typescriptlang.org/)
6
- [![codecov](https://codecov.io/gh/bug-ops/scrape-rs/graph/badge.svg?token=6MQTONGT95&flag=wasm)](https://codecov.io/gh/bug-ops/scrape-rs)
7
- [![License](https://img.shields.io/npm/l/@scrape-rs/wasm)](../../LICENSE-MIT)
6
+ [![License](https://img.shields.io/npm/l/@fast-scrape/wasm)](../../LICENSE-MIT)
8
7
 
9
- WebAssembly bindings for scrape-rs, a high-performance HTML parsing library. Run native-speed parsing in the browser.
8
+ **10-50x faster** HTML parsing in the browser. Native-speed parsing via WebAssembly.
10
9
 
11
10
  ## Installation
12
11
 
13
12
  ```bash
14
- # npm
15
- npm install @scrape-rs/wasm
16
-
17
- # yarn
18
- yarn add @scrape-rs/wasm
13
+ npm install @fast-scrape/wasm
14
+ ```
19
15
 
20
- # pnpm
21
- pnpm add @scrape-rs/wasm
16
+ <details>
17
+ <summary>Other package managers</summary>
22
18
 
23
- # bun
24
- bun add @scrape-rs/wasm
19
+ ```bash
20
+ yarn add @fast-scrape/wasm
21
+ pnpm add @fast-scrape/wasm
22
+ bun add @fast-scrape/wasm
25
23
  ```
26
24
 
25
+ </details>
26
+
27
27
  ## Quick start
28
28
 
29
29
  ```typescript
30
- import init, { Soup } from '@scrape-rs/wasm';
30
+ import init, { Soup } from '@fast-scrape/wasm';
31
31
 
32
- // Initialize WASM module (required once)
33
- await init();
32
+ await init(); // Initialize WASM module (once)
34
33
 
35
- const html = "<html><body><div class='content'>Hello, World!</div></body></html>";
36
- const soup = new Soup(html);
37
-
38
- const div = soup.find("div");
39
- console.log(div.text);
40
- // Hello, World!
34
+ const soup = new Soup("<html><body><div class='content'>Hello, World!</div></body></html>");
35
+ console.log(soup.find("div").text); // Hello, World!
41
36
  ```
42
37
 
43
38
  > [!IMPORTANT]
44
- > Call `init()` once before using any other functions. It loads and compiles the WASM module.
39
+ > Call `init()` once before using any other functions.
45
40
 
46
41
  ## Usage
47
42
 
48
- ### Find elements
43
+ <details open>
44
+ <summary><strong>Find elements</strong></summary>
49
45
 
50
46
  ```typescript
51
- import init, { Soup } from '@scrape-rs/wasm';
47
+ import init, { Soup } from '@fast-scrape/wasm';
52
48
 
53
49
  await init();
54
50
 
@@ -66,15 +62,16 @@ for (const el of soup.select("div.content > p")) {
66
62
  }
67
63
  ```
68
64
 
69
- ### With bundlers
65
+ </details>
66
+
67
+ <details>
68
+ <summary><strong>Bundlers</strong></summary>
70
69
 
71
70
  **Vite:**
72
71
 
73
72
  ```typescript
74
- import init, { Soup } from '@scrape-rs/wasm';
75
-
76
- // Vite handles WASM automatically
77
- await init();
73
+ import init, { Soup } from '@fast-scrape/wasm';
74
+ await init(); // Vite handles WASM automatically
78
75
  ```
79
76
 
80
77
  **Webpack 5:**
@@ -82,41 +79,32 @@ await init();
82
79
  ```javascript
83
80
  // webpack.config.js
84
81
  module.exports = {
85
- experiments: {
86
- asyncWebAssembly: true,
87
- },
82
+ experiments: { asyncWebAssembly: true },
88
83
  };
89
84
  ```
90
85
 
91
- ### CDN usage
86
+ </details>
87
+
88
+ <details>
89
+ <summary><strong>CDN usage</strong></summary>
92
90
 
93
91
  ```html
94
92
  <script type="module">
95
- import init, { Soup } from 'https://esm.sh/@scrape-rs/wasm';
93
+ import init, { Soup } from 'https://esm.sh/@fast-scrape/wasm';
96
94
 
97
95
  await init();
98
-
99
96
  const soup = new Soup('<div>Hello</div>');
100
97
  console.log(soup.find('div').text);
101
98
  </script>
102
99
  ```
103
100
 
104
- ## Bundle size
101
+ </details>
105
102
 
106
- | Build | Size |
107
- |-------|------|
108
- | Minified + gzip | ~150 KB |
109
- | Minified | ~400 KB |
110
-
111
- > [!TIP]
112
- > The WASM module includes SIMD optimizations. Modern browsers (Chrome 91+, Firefox 89+, Safari 16.4+) run SIMD automatically.
113
-
114
- ## TypeScript
115
-
116
- Full TypeScript support with exported types:
103
+ <details>
104
+ <summary><strong>TypeScript</strong></summary>
117
105
 
118
106
  ```typescript
119
- import init, { Soup, Tag } from '@scrape-rs/wasm';
107
+ import init, { Soup, Tag } from '@fast-scrape/wasm';
120
108
 
121
109
  await init();
122
110
 
@@ -125,6 +113,18 @@ function extractLinks(soup: Soup): string[] {
125
113
  }
126
114
  ```
127
115
 
116
+ </details>
117
+
118
+ ## Bundle size
119
+
120
+ | Build | Size |
121
+ |-------|------|
122
+ | Minified + gzip | ~150 KB |
123
+ | Minified | ~400 KB |
124
+
125
+ > [!TIP]
126
+ > SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+.
127
+
128
128
  ## Browser support
129
129
 
130
130
  | Browser | Version | SIMD |
@@ -134,20 +134,14 @@ function extractLinks(soup: Soup): string[] {
134
134
  | Safari | 13+ | 16.4+ |
135
135
  | Edge | 80+ | 91+ |
136
136
 
137
- ## Limitations
138
-
139
- - No parallel processing (WASM threads have limited browser support)
140
- - Must call `init()` before using the API
141
- - Slightly higher memory usage than native bindings
142
-
143
137
  ## Related packages
144
138
 
145
- Part of the [scrape-rs](https://github.com/bug-ops/scrape-rs) project:
146
-
147
- - `scrape-core` — Rust core library
148
- - `scrape-rs` (PyPI) — Python bindings
149
- - `scrape-rs` (npm) — Node.js bindings
139
+ | Platform | Package |
140
+ |----------|---------|
141
+ | Rust | [`scrape-core`](https://crates.io/crates/scrape-core) |
142
+ | Python | [`fast-scrape`](https://pypi.org/project/fast-scrape) |
143
+ | Node.js | [`@fast-scrape/node`](https://www.npmjs.com/package/@fast-scrape/node) |
150
144
 
151
145
  ## License
152
146
 
153
- Licensed under either of [Apache License, Version 2.0](../../LICENSE-APACHE) or [MIT License](../../LICENSE-MIT) at your option.
147
+ MIT OR Apache-2.0
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "scrape-rs contributors"
6
6
  ],
7
7
  "description": "WebAssembly bindings for scrape-rs HTML parsing library",
8
- "version": "0.1.0",
8
+ "version": "0.1.2",
9
9
  "license": "MIT OR Apache-2.0",
10
10
  "repository": {
11
11
  "type": "git",
package/scrape_wasm.d.ts CHANGED
@@ -292,7 +292,28 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
292
292
 
293
293
  export interface InitOutput {
294
294
  readonly memory: WebAssembly.Memory;
295
+ readonly __wbg_soup_free: (a: number, b: number) => void;
296
+ readonly __wbg_soupconfig_free: (a: number, b: number) => void;
295
297
  readonly __wbg_tag_free: (a: number, b: number) => void;
298
+ readonly hasSimdSupport: () => number;
299
+ readonly parseBatch: (a: number, b: number, c: number) => void;
300
+ readonly soup_find: (a: number, b: number, c: number, d: number) => void;
301
+ readonly soup_findAll: (a: number, b: number, c: number, d: number) => void;
302
+ readonly soup_length: (a: number) => number;
303
+ readonly soup_new: (a: number, b: number, c: number) => number;
304
+ readonly soup_root: (a: number) => number;
305
+ readonly soup_text: (a: number, b: number) => void;
306
+ readonly soup_title: (a: number, b: number) => void;
307
+ readonly soup_toHtml: (a: number, b: number) => void;
308
+ readonly soupconfig_includeComments: (a: number) => number;
309
+ readonly soupconfig_maxDepth: (a: number) => number;
310
+ readonly soupconfig_new: () => number;
311
+ readonly soupconfig_preserveWhitespace: (a: number) => number;
312
+ readonly soupconfig_set_includeComments: (a: number, b: number) => void;
313
+ readonly soupconfig_set_maxDepth: (a: number, b: number) => void;
314
+ readonly soupconfig_set_preserveWhitespace: (a: number, b: number) => void;
315
+ readonly soupconfig_set_strictMode: (a: number, b: number) => void;
316
+ readonly soupconfig_strictMode: (a: number) => number;
296
317
  readonly tag_attr: (a: number, b: number, c: number, d: number) => void;
297
318
  readonly tag_attrs: (a: number) => number;
298
319
  readonly tag_children: (a: number, b: number) => void;
@@ -310,32 +331,11 @@ export interface InitOutput {
310
331
  readonly tag_outerHTML: (a: number, b: number) => void;
311
332
  readonly tag_parent: (a: number) => number;
312
333
  readonly tag_prevSibling: (a: number) => number;
313
- readonly tag_select: (a: number, b: number, c: number, d: number) => void;
314
334
  readonly tag_text: (a: number, b: number) => void;
315
- readonly __wbg_soupconfig_free: (a: number, b: number) => void;
316
- readonly soupconfig_includeComments: (a: number) => number;
317
- readonly soupconfig_maxDepth: (a: number) => number;
318
- readonly soupconfig_new: () => number;
319
- readonly soupconfig_preserveWhitespace: (a: number) => number;
320
- readonly soupconfig_set_includeComments: (a: number, b: number) => void;
321
- readonly soupconfig_set_maxDepth: (a: number, b: number) => void;
322
- readonly soupconfig_set_preserveWhitespace: (a: number, b: number) => void;
323
- readonly soupconfig_set_strictMode: (a: number, b: number) => void;
324
- readonly soupconfig_strictMode: (a: number) => number;
325
- readonly __wbg_soup_free: (a: number, b: number) => void;
326
- readonly soup_find: (a: number, b: number, c: number, d: number) => void;
327
- readonly soup_findAll: (a: number, b: number, c: number, d: number) => void;
328
- readonly soup_length: (a: number) => number;
329
- readonly soup_new: (a: number, b: number, c: number) => number;
330
- readonly soup_root: (a: number) => number;
331
- readonly soup_select: (a: number, b: number, c: number, d: number) => void;
332
- readonly soup_text: (a: number, b: number) => void;
333
- readonly soup_title: (a: number, b: number) => void;
334
- readonly soup_toHtml: (a: number, b: number) => void;
335
- readonly hasSimdSupport: () => number;
336
- readonly parseBatch: (a: number, b: number, c: number) => void;
337
335
  readonly version: (a: number) => void;
338
336
  readonly init: () => void;
337
+ readonly soup_select: (a: number, b: number, c: number, d: number) => void;
338
+ readonly tag_select: (a: number, b: number, c: number, d: number) => void;
339
339
  readonly __wbindgen_export: (a: number, b: number) => number;
340
340
  readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
341
341
  readonly __wbindgen_export3: (a: number, b: number, c: number) => void;
package/scrape_wasm.js CHANGED
@@ -142,7 +142,7 @@ export class Soup {
142
142
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
143
143
  const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
144
144
  const len0 = WASM_VECTOR_LEN;
145
- wasm.soup_select(retptr, this.__wbg_ptr, ptr0, len0);
145
+ wasm.soup_findAll(retptr, this.__wbg_ptr, ptr0, len0);
146
146
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
147
147
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
148
148
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
@@ -659,7 +659,7 @@ export class Tag {
659
659
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
660
660
  const ptr0 = passStringToWasm0(selector, wasm.__wbindgen_export, wasm.__wbindgen_export2);
661
661
  const len0 = WASM_VECTOR_LEN;
662
- wasm.tag_select(retptr, this.__wbg_ptr, ptr0, len0);
662
+ wasm.tag_findAll(retptr, this.__wbg_ptr, ptr0, len0);
663
663
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
664
664
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
665
665
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
Binary file