@fast-scrape/wasm 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![TypeScript](https://img.shields.io/badge/TypeScript-Ready-blue)](https://www.typescriptlang.org/)
6
6
  [![License](https://img.shields.io/npm/l/@fast-scrape/wasm)](../../LICENSE-MIT)
7
7
 
8
- **10-50x faster** HTML parsing in the browser. Native-speed parsing via WebAssembly.
8
+ **Native-comparable** HTML parsing in the browser via WebAssembly. Achieves **1.5-2x faster** performance than DOMParser on large documents.
9
9
 
10
10
  ## Installation
11
11
 
@@ -115,17 +115,39 @@ function extractLinks(soup: Soup): string[] {
115
115
 
116
116
  </details>
117
117
 
118
+ ## Performance
119
+
120
+ Native-speed parsing in browsers with SIMD acceleration:
121
+
122
+ <details open>
123
+ <summary><strong>Browser performance vs native DOMParser</strong></summary>
124
+
125
+ | Operation | @fast-scrape/wasm | Native DOMParser | Notes |
126
+ |-----------|------------------|------------------|-------|
127
+ | Parse 100KB HTML | **2.1 ms** | 3.2 ms | 1.5x faster |
128
+ | find(".class") | **0.3 µs** | N/A | CSS selector optimization |
129
+ | find("#id") | **0.2 µs** | N/A | ID selector optimization |
130
+ | Memory (100KB doc) | **8.4 MB** | 12.2 MB | 30% more efficient |
131
+
132
+ **Key advantages:**
133
+ - Compiled Rust guarantees memory safety
134
+ - CSS selectors run in nanoseconds
135
+ - Automatic SIMD acceleration on modern browsers
136
+ - 50-70% memory reduction via zero-copy serialization
137
+
138
+ </details>
139
+
118
140
  ## Bundle size
119
141
 
120
- v0.2.0 optimization brings package to under 500 KB:
142
+ Optimized package under 500 KB:
121
143
 
122
144
  | Build | Size |
123
145
  |-------|------|
124
- | Minified + gzip | ~150 KB |
146
+ | Minified + gzip | **285 KB** |
125
147
  | Minified | ~400 KB |
126
148
 
127
149
  > [!TIP]
128
- > SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+. v0.2.0 includes zero-copy serialization for 50-70% memory savings in HTML extraction.
150
+ > SIMD enabled automatically on Chrome 91+, Firefox 89+, Safari 16.4+. Zero-copy serialization provides 50-70% memory savings in HTML extraction.
129
151
 
130
152
  ## Browser support
131
153
 
@@ -136,9 +158,14 @@ v0.2.0 optimization brings package to under 500 KB:
136
158
  | Safari | 13+ | 16.4+ |
137
159
  | Edge | 80+ | 91+ |
138
160
 
139
- ## Built on Servo
161
+ ## Built on Servo and Cloudflare
162
+
163
+ **Parsing & Selection (Servo browser engine):**
164
+ - [html5ever](https://crates.io/crates/html5ever) — Spec-compliant HTML5 parser
165
+ - [selectors](https://crates.io/crates/selectors) — CSS selector matching engine
140
166
 
141
- Powered by battle-tested libraries from the [Servo](https://servo.org/) browser engine: [html5ever](https://crates.io/crates/html5ever) (HTML5 parser) and [selectors](https://crates.io/crates/selectors) (CSS selector engine).
167
+ **Streaming Parser (Cloudflare):**
168
+ - [lol_html](https://github.com/cloudflare/lol_html) — High-performance streaming HTML parser with constant-memory event-driven API
142
169
 
143
170
  ## Related packages
144
171
 
package/package.json CHANGED
@@ -2,10 +2,10 @@
2
2
  "name": "@fast-scrape/wasm",
3
3
  "type": "module",
4
4
  "collaborators": [
5
- "scrape-rs contributors"
5
+ "Andrei G. <k05h31@gmail.com>"
6
6
  ],
7
7
  "description": "WebAssembly bindings for scrape-rs HTML parsing library",
8
- "version": "0.2.0",
8
+ "version": "0.2.1",
9
9
  "license": "MIT OR Apache-2.0",
10
10
  "repository": {
11
11
  "type": "git",
package/scrape_wasm.d.ts CHANGED
@@ -555,7 +555,6 @@ export interface InitOutput {
555
555
  readonly tag_find: (a: number, b: number, c: number, d: number) => void;
556
556
  readonly tag_findAll: (a: number, b: number, c: number, d: number) => void;
557
557
  readonly tag_findCompiled: (a: number, b: number) => number;
558
- readonly tag_get: (a: number, b: number, c: number, d: number) => void;
559
558
  readonly tag_hasAttr: (a: number, b: number, c: number) => number;
560
559
  readonly tag_hasClass: (a: number, b: number, c: number) => number;
561
560
  readonly tag_innerHTML: (a: number, b: number) => void;
@@ -578,6 +577,7 @@ export interface InitOutput {
578
577
  readonly soup_select: (a: number, b: number, c: number, d: number) => void;
579
578
  readonly tag_select: (a: number, b: number, c: number, d: number) => void;
580
579
  readonly tag_parents: (a: number, b: number) => void;
580
+ readonly tag_get: (a: number, b: number, c: number, d: number) => void;
581
581
  readonly __wbindgen_export: (a: number, b: number) => number;
582
582
  readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
583
583
  readonly __wbindgen_export3: (a: number, b: number, c: number) => void;
package/scrape_wasm.js CHANGED
@@ -864,7 +864,7 @@ export class Tag {
864
864
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
865
865
  const ptr0 = passStringToWasm0(name, wasm.__wbindgen_export, wasm.__wbindgen_export2);
866
866
  const len0 = WASM_VECTOR_LEN;
867
- wasm.tag_get(retptr, this.__wbg_ptr, ptr0, len0);
867
+ wasm.tag_attr(retptr, this.__wbg_ptr, ptr0, len0);
868
868
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
869
869
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
870
870
  let v2;
Binary file