@kreuzberg/html-to-markdown-wasm 2.19.8 → 2.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/README.md CHANGED
@@ -40,7 +40,7 @@
40
40
  </div>
41
41
 
42
42
 
43
- <img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/1bd52e37-c45d-4f5c-8408-ee12997f6cfd" />
43
+ <img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/478a83da-237b-446b-b3a8-e564c13e00a8" />
44
44
 
45
45
 
46
46
  <div align="center" style="margin-top: 20px;">
@@ -125,6 +125,65 @@ See language-specific documentation for `preserveTags` configuration.
125
125
 
126
126
  </details>
127
127
 
128
+ <details>
129
+ <summary><strong>Skipping Images</strong></summary>
130
+
131
+ Skip all images during conversion using the `skip_images` option. Useful for text-only extraction or when you want to filter out visual content.
132
+
133
+ **Rust:**
134
+ ```rust
135
+ use html_to_markdown_rs::{convert, ConversionOptions};
136
+
137
+ let options = ConversionOptions {
138
+ skip_images: true,
139
+ ..Default::default()
140
+ };
141
+
142
+ let html = r#"<p>Text with <img src="image.jpg" alt="pic"> image</p>"#;
143
+ let markdown = convert(html, Some(options))?;
144
+ // Output: "Text with image" (image tags are removed)
145
+ ```
146
+
147
+ **Python:**
148
+ ```python
149
+ from html_to_markdown import convert, ConversionOptions
150
+
151
+ options = ConversionOptions(skip_images=True)
152
+ markdown = convert(html, options)
153
+ ```
154
+
155
+ **TypeScript/Node.js:**
156
+ ```typescript
157
+ import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown-node';
158
+
159
+ const options: ConversionOptions = {
160
+ skipImages: true,
161
+ };
162
+
163
+ const markdown = convert(html, options);
164
+ ```
165
+
166
+ **Ruby:**
167
+ ```ruby
168
+ require 'html_to_markdown'
169
+
170
+ options = HtmlToMarkdown::ConversionOptions.new(skip_images: true)
171
+ markdown = HtmlToMarkdown.convert(html, options)
172
+ ```
173
+
174
+ **PHP:**
175
+ ```php
176
+ use Goldziher\HtmlToMarkdown\HtmlToMarkdown;
177
+ use Goldziher\HtmlToMarkdown\Options;
178
+
179
+ $options = new Options(['skip_images' => true]);
180
+ $markdown = HtmlToMarkdown::convert($html, $options);
181
+ ```
182
+
183
+ This option is available across all language bindings. When enabled, all `<img>` tags and their associated markdown image syntax are removed from the output.
184
+
185
+ </details>
186
+
128
187
  <details>
129
188
  <summary><strong>Secure by Default</strong></summary>
130
189
 
Binary file
package/dist/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
7
  "description": "HTML to Markdown conversion for WebAssembly targets",
8
- "version": "2.19.8",
8
+ "version": "2.21.1",
9
9
  "license": "MIT",
10
10
  "repository": {
11
11
  "type": "git",
@@ -22,5 +22,11 @@
22
22
  "sideEffects": [
23
23
  "./html_to_markdown_wasm.js",
24
24
  "./snippets/*"
25
+ ],
26
+ "keywords": [
27
+ "html",
28
+ "markdown",
29
+ "wasm",
30
+ "webassembly"
25
31
  ]
26
32
  }
@@ -40,7 +40,7 @@
40
40
  </div>
41
41
 
42
42
 
43
- <img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/1bd52e37-c45d-4f5c-8408-ee12997f6cfd" />
43
+ <img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/478a83da-237b-446b-b3a8-e564c13e00a8" />
44
44
 
45
45
 
46
46
  <div align="center" style="margin-top: 20px;">
@@ -125,6 +125,65 @@ See language-specific documentation for `preserveTags` configuration.
125
125
 
126
126
  </details>
127
127
 
128
+ <details>
129
+ <summary><strong>Skipping Images</strong></summary>
130
+
131
+ Skip all images during conversion using the `skip_images` option. Useful for text-only extraction or when you want to filter out visual content.
132
+
133
+ **Rust:**
134
+ ```rust
135
+ use html_to_markdown_rs::{convert, ConversionOptions};
136
+
137
+ let options = ConversionOptions {
138
+ skip_images: true,
139
+ ..Default::default()
140
+ };
141
+
142
+ let html = r#"<p>Text with <img src="image.jpg" alt="pic"> image</p>"#;
143
+ let markdown = convert(html, Some(options))?;
144
+ // Output: "Text with image" (image tags are removed)
145
+ ```
146
+
147
+ **Python:**
148
+ ```python
149
+ from html_to_markdown import convert, ConversionOptions
150
+
151
+ options = ConversionOptions(skip_images=True)
152
+ markdown = convert(html, options)
153
+ ```
154
+
155
+ **TypeScript/Node.js:**
156
+ ```typescript
157
+ import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown-node';
158
+
159
+ const options: ConversionOptions = {
160
+ skipImages: true,
161
+ };
162
+
163
+ const markdown = convert(html, options);
164
+ ```
165
+
166
+ **Ruby:**
167
+ ```ruby
168
+ require 'html_to_markdown'
169
+
170
+ options = HtmlToMarkdown::ConversionOptions.new(skip_images: true)
171
+ markdown = HtmlToMarkdown.convert(html, options)
172
+ ```
173
+
174
+ **PHP:**
175
+ ```php
176
+ use Goldziher\HtmlToMarkdown\HtmlToMarkdown;
177
+ use Goldziher\HtmlToMarkdown\Options;
178
+
179
+ $options = new Options(['skip_images' => true]);
180
+ $markdown = HtmlToMarkdown::convert($html, $options);
181
+ ```
182
+
183
+ This option is available across all language bindings. When enabled, all `<img>` tags and their associated markdown image syntax are removed from the output.
184
+
185
+ </details>
186
+
128
187
  <details>
129
188
  <summary><strong>Secure by Default</strong></summary>
130
189
 
@@ -4,7 +4,7 @@
4
4
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
5
5
  ],
6
6
  "description": "HTML to Markdown conversion for WebAssembly targets",
7
- "version": "2.19.8",
7
+ "version": "2.21.1",
8
8
  "license": "MIT",
9
9
  "repository": {
10
10
  "type": "git",
@@ -17,5 +17,11 @@
17
17
  ],
18
18
  "main": "html_to_markdown_wasm.js",
19
19
  "homepage": "https://github.com/kreuzberg-dev/html-to-markdown",
20
- "types": "html_to_markdown_wasm.d.ts"
20
+ "types": "html_to_markdown_wasm.d.ts",
21
+ "keywords": [
22
+ "html",
23
+ "markdown",
24
+ "wasm",
25
+ "webassembly"
26
+ ]
21
27
  }
@@ -40,7 +40,7 @@
40
40
  </div>
41
41
 
42
42
 
43
- <img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/1bd52e37-c45d-4f5c-8408-ee12997f6cfd" />
43
+ <img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/478a83da-237b-446b-b3a8-e564c13e00a8" />
44
44
 
45
45
 
46
46
  <div align="center" style="margin-top: 20px;">
@@ -125,6 +125,65 @@ See language-specific documentation for `preserveTags` configuration.
125
125
 
126
126
  </details>
127
127
 
128
+ <details>
129
+ <summary><strong>Skipping Images</strong></summary>
130
+
131
+ Skip all images during conversion using the `skip_images` option. Useful for text-only extraction or when you want to filter out visual content.
132
+
133
+ **Rust:**
134
+ ```rust
135
+ use html_to_markdown_rs::{convert, ConversionOptions};
136
+
137
+ let options = ConversionOptions {
138
+ skip_images: true,
139
+ ..Default::default()
140
+ };
141
+
142
+ let html = r#"<p>Text with <img src="image.jpg" alt="pic"> image</p>"#;
143
+ let markdown = convert(html, Some(options))?;
144
+ // Output: "Text with image" (image tags are removed)
145
+ ```
146
+
147
+ **Python:**
148
+ ```python
149
+ from html_to_markdown import convert, ConversionOptions
150
+
151
+ options = ConversionOptions(skip_images=True)
152
+ markdown = convert(html, options)
153
+ ```
154
+
155
+ **TypeScript/Node.js:**
156
+ ```typescript
157
+ import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown-node';
158
+
159
+ const options: ConversionOptions = {
160
+ skipImages: true,
161
+ };
162
+
163
+ const markdown = convert(html, options);
164
+ ```
165
+
166
+ **Ruby:**
167
+ ```ruby
168
+ require 'html_to_markdown'
169
+
170
+ options = HtmlToMarkdown::ConversionOptions.new(skip_images: true)
171
+ markdown = HtmlToMarkdown.convert(html, options)
172
+ ```
173
+
174
+ **PHP:**
175
+ ```php
176
+ use Goldziher\HtmlToMarkdown\HtmlToMarkdown;
177
+ use Goldziher\HtmlToMarkdown\Options;
178
+
179
+ $options = new Options(['skip_images' => true]);
180
+ $markdown = HtmlToMarkdown::convert($html, $options);
181
+ ```
182
+
183
+ This option is available across all language bindings. When enabled, all `<img>` tags and their associated markdown image syntax are removed from the output.
184
+
185
+ </details>
186
+
128
187
  <details>
129
188
  <summary><strong>Secure by Default</strong></summary>
130
189
 
@@ -5,7 +5,7 @@
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
7
  "description": "HTML to Markdown conversion for WebAssembly targets",
8
- "version": "2.19.8",
8
+ "version": "2.21.1",
9
9
  "license": "MIT",
10
10
  "repository": {
11
11
  "type": "git",
@@ -21,5 +21,11 @@
21
21
  "types": "html_to_markdown_wasm.d.ts",
22
22
  "sideEffects": [
23
23
  "./snippets/*"
24
+ ],
25
+ "keywords": [
26
+ "html",
27
+ "markdown",
28
+ "wasm",
29
+ "webassembly"
24
30
  ]
25
31
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kreuzberg/html-to-markdown-wasm",
3
- "version": "2.19.8",
3
+ "version": "2.21.1",
4
4
  "description": "High-performance HTML to Markdown converter - WebAssembly bindings",
5
5
  "main": "dist/html_to_markdown_wasm.js",
6
6
  "types": "dist/html_to_markdown_wasm.d.ts",
@@ -61,8 +61,5 @@
61
61
  "publishConfig": {
62
62
  "registry": "https://registry.npmjs.org/",
63
63
  "access": "public"
64
- },
65
- "dependencies": {
66
- "up": "^1.0.2"
67
64
  }
68
65
  }