@kreuzberg/html-to-markdown-wasm 2.19.8 → 2.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/README.md +60 -1
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/package.json +7 -1
- package/dist-node/README.md +60 -1
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/package.json +8 -2
- package/dist-web/README.md +60 -1
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/package.json +7 -1
- package/package.json +1 -4
package/dist/README.md
CHANGED
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
</div>
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/
|
|
43
|
+
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/478a83da-237b-446b-b3a8-e564c13e00a8" />
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
<div align="center" style="margin-top: 20px;">
|
|
@@ -125,6 +125,65 @@ See language-specific documentation for `preserveTags` configuration.
|
|
|
125
125
|
|
|
126
126
|
</details>
|
|
127
127
|
|
|
128
|
+
<details>
|
|
129
|
+
<summary><strong>Skipping Images</strong></summary>
|
|
130
|
+
|
|
131
|
+
Skip all images during conversion using the `skip_images` option. Useful for text-only extraction or when you want to filter out visual content.
|
|
132
|
+
|
|
133
|
+
**Rust:**
|
|
134
|
+
```rust
|
|
135
|
+
use html_to_markdown_rs::{convert, ConversionOptions};
|
|
136
|
+
|
|
137
|
+
let options = ConversionOptions {
|
|
138
|
+
skip_images: true,
|
|
139
|
+
..Default::default()
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
let html = r#"<p>Text with <img src="image.jpg" alt="pic"> image</p>"#;
|
|
143
|
+
let markdown = convert(html, Some(options))?;
|
|
144
|
+
// Output: "Text with image" (image tags are removed)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Python:**
|
|
148
|
+
```python
|
|
149
|
+
from html_to_markdown import convert, ConversionOptions
|
|
150
|
+
|
|
151
|
+
options = ConversionOptions(skip_images=True)
|
|
152
|
+
markdown = convert(html, options)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**TypeScript/Node.js:**
|
|
156
|
+
```typescript
|
|
157
|
+
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown-node';
|
|
158
|
+
|
|
159
|
+
const options: ConversionOptions = {
|
|
160
|
+
skipImages: true,
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const markdown = convert(html, options);
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Ruby:**
|
|
167
|
+
```ruby
|
|
168
|
+
require 'html_to_markdown'
|
|
169
|
+
|
|
170
|
+
options = HtmlToMarkdown::ConversionOptions.new(skip_images: true)
|
|
171
|
+
markdown = HtmlToMarkdown.convert(html, options)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**PHP:**
|
|
175
|
+
```php
|
|
176
|
+
use Goldziher\HtmlToMarkdown\HtmlToMarkdown;
|
|
177
|
+
use Goldziher\HtmlToMarkdown\Options;
|
|
178
|
+
|
|
179
|
+
$options = new Options(['skip_images' => true]);
|
|
180
|
+
$markdown = HtmlToMarkdown::convert($html, $options);
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
This option is available across all language bindings. When enabled, all `<img>` tags and their associated markdown image syntax are removed from the output.
|
|
184
|
+
|
|
185
|
+
</details>
|
|
186
|
+
|
|
128
187
|
<details>
|
|
129
188
|
<summary><strong>Secure by Default</strong></summary>
|
|
130
189
|
|
|
Binary file
|
package/dist/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"Na'aman Hirschfeld <nhirschfeld@gmail.com>"
|
|
6
6
|
],
|
|
7
7
|
"description": "HTML to Markdown conversion for WebAssembly targets",
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.21.1",
|
|
9
9
|
"license": "MIT",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -22,5 +22,11 @@
|
|
|
22
22
|
"sideEffects": [
|
|
23
23
|
"./html_to_markdown_wasm.js",
|
|
24
24
|
"./snippets/*"
|
|
25
|
+
],
|
|
26
|
+
"keywords": [
|
|
27
|
+
"html",
|
|
28
|
+
"markdown",
|
|
29
|
+
"wasm",
|
|
30
|
+
"webassembly"
|
|
25
31
|
]
|
|
26
32
|
}
|
package/dist-node/README.md
CHANGED
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
</div>
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/
|
|
43
|
+
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/478a83da-237b-446b-b3a8-e564c13e00a8" />
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
<div align="center" style="margin-top: 20px;">
|
|
@@ -125,6 +125,65 @@ See language-specific documentation for `preserveTags` configuration.
|
|
|
125
125
|
|
|
126
126
|
</details>
|
|
127
127
|
|
|
128
|
+
<details>
|
|
129
|
+
<summary><strong>Skipping Images</strong></summary>
|
|
130
|
+
|
|
131
|
+
Skip all images during conversion using the `skip_images` option. Useful for text-only extraction or when you want to filter out visual content.
|
|
132
|
+
|
|
133
|
+
**Rust:**
|
|
134
|
+
```rust
|
|
135
|
+
use html_to_markdown_rs::{convert, ConversionOptions};
|
|
136
|
+
|
|
137
|
+
let options = ConversionOptions {
|
|
138
|
+
skip_images: true,
|
|
139
|
+
..Default::default()
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
let html = r#"<p>Text with <img src="image.jpg" alt="pic"> image</p>"#;
|
|
143
|
+
let markdown = convert(html, Some(options))?;
|
|
144
|
+
// Output: "Text with image" (image tags are removed)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Python:**
|
|
148
|
+
```python
|
|
149
|
+
from html_to_markdown import convert, ConversionOptions
|
|
150
|
+
|
|
151
|
+
options = ConversionOptions(skip_images=True)
|
|
152
|
+
markdown = convert(html, options)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**TypeScript/Node.js:**
|
|
156
|
+
```typescript
|
|
157
|
+
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown-node';
|
|
158
|
+
|
|
159
|
+
const options: ConversionOptions = {
|
|
160
|
+
skipImages: true,
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const markdown = convert(html, options);
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Ruby:**
|
|
167
|
+
```ruby
|
|
168
|
+
require 'html_to_markdown'
|
|
169
|
+
|
|
170
|
+
options = HtmlToMarkdown::ConversionOptions.new(skip_images: true)
|
|
171
|
+
markdown = HtmlToMarkdown.convert(html, options)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**PHP:**
|
|
175
|
+
```php
|
|
176
|
+
use Goldziher\HtmlToMarkdown\HtmlToMarkdown;
|
|
177
|
+
use Goldziher\HtmlToMarkdown\Options;
|
|
178
|
+
|
|
179
|
+
$options = new Options(['skip_images' => true]);
|
|
180
|
+
$markdown = HtmlToMarkdown::convert($html, $options);
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
This option is available across all language bindings. When enabled, all `<img>` tags and their associated markdown image syntax are removed from the output.
|
|
184
|
+
|
|
185
|
+
</details>
|
|
186
|
+
|
|
128
187
|
<details>
|
|
129
188
|
<summary><strong>Secure by Default</strong></summary>
|
|
130
189
|
|
|
Binary file
|
package/dist-node/package.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"Na'aman Hirschfeld <nhirschfeld@gmail.com>"
|
|
5
5
|
],
|
|
6
6
|
"description": "HTML to Markdown conversion for WebAssembly targets",
|
|
7
|
-
"version": "2.
|
|
7
|
+
"version": "2.21.1",
|
|
8
8
|
"license": "MIT",
|
|
9
9
|
"repository": {
|
|
10
10
|
"type": "git",
|
|
@@ -17,5 +17,11 @@
|
|
|
17
17
|
],
|
|
18
18
|
"main": "html_to_markdown_wasm.js",
|
|
19
19
|
"homepage": "https://github.com/kreuzberg-dev/html-to-markdown",
|
|
20
|
-
"types": "html_to_markdown_wasm.d.ts"
|
|
20
|
+
"types": "html_to_markdown_wasm.d.ts",
|
|
21
|
+
"keywords": [
|
|
22
|
+
"html",
|
|
23
|
+
"markdown",
|
|
24
|
+
"wasm",
|
|
25
|
+
"webassembly"
|
|
26
|
+
]
|
|
21
27
|
}
|
package/dist-web/README.md
CHANGED
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
</div>
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/
|
|
43
|
+
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/478a83da-237b-446b-b3a8-e564c13e00a8" />
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
<div align="center" style="margin-top: 20px;">
|
|
@@ -125,6 +125,65 @@ See language-specific documentation for `preserveTags` configuration.
|
|
|
125
125
|
|
|
126
126
|
</details>
|
|
127
127
|
|
|
128
|
+
<details>
|
|
129
|
+
<summary><strong>Skipping Images</strong></summary>
|
|
130
|
+
|
|
131
|
+
Skip all images during conversion using the `skip_images` option. Useful for text-only extraction or when you want to filter out visual content.
|
|
132
|
+
|
|
133
|
+
**Rust:**
|
|
134
|
+
```rust
|
|
135
|
+
use html_to_markdown_rs::{convert, ConversionOptions};
|
|
136
|
+
|
|
137
|
+
let options = ConversionOptions {
|
|
138
|
+
skip_images: true,
|
|
139
|
+
..Default::default()
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
let html = r#"<p>Text with <img src="image.jpg" alt="pic"> image</p>"#;
|
|
143
|
+
let markdown = convert(html, Some(options))?;
|
|
144
|
+
// Output: "Text with image" (image tags are removed)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Python:**
|
|
148
|
+
```python
|
|
149
|
+
from html_to_markdown import convert, ConversionOptions
|
|
150
|
+
|
|
151
|
+
options = ConversionOptions(skip_images=True)
|
|
152
|
+
markdown = convert(html, options)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**TypeScript/Node.js:**
|
|
156
|
+
```typescript
|
|
157
|
+
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown-node';
|
|
158
|
+
|
|
159
|
+
const options: ConversionOptions = {
|
|
160
|
+
skipImages: true,
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const markdown = convert(html, options);
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Ruby:**
|
|
167
|
+
```ruby
|
|
168
|
+
require 'html_to_markdown'
|
|
169
|
+
|
|
170
|
+
options = HtmlToMarkdown::ConversionOptions.new(skip_images: true)
|
|
171
|
+
markdown = HtmlToMarkdown.convert(html, options)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**PHP:**
|
|
175
|
+
```php
|
|
176
|
+
use Goldziher\HtmlToMarkdown\HtmlToMarkdown;
|
|
177
|
+
use Goldziher\HtmlToMarkdown\Options;
|
|
178
|
+
|
|
179
|
+
$options = new Options(['skip_images' => true]);
|
|
180
|
+
$markdown = HtmlToMarkdown::convert($html, $options);
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
This option is available across all language bindings. When enabled, all `<img>` tags and their associated markdown image syntax are removed from the output.
|
|
184
|
+
|
|
185
|
+
</details>
|
|
186
|
+
|
|
128
187
|
<details>
|
|
129
188
|
<summary><strong>Secure by Default</strong></summary>
|
|
130
189
|
|
|
Binary file
|
package/dist-web/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"Na'aman Hirschfeld <nhirschfeld@gmail.com>"
|
|
6
6
|
],
|
|
7
7
|
"description": "HTML to Markdown conversion for WebAssembly targets",
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.21.1",
|
|
9
9
|
"license": "MIT",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -21,5 +21,11 @@
|
|
|
21
21
|
"types": "html_to_markdown_wasm.d.ts",
|
|
22
22
|
"sideEffects": [
|
|
23
23
|
"./snippets/*"
|
|
24
|
+
],
|
|
25
|
+
"keywords": [
|
|
26
|
+
"html",
|
|
27
|
+
"markdown",
|
|
28
|
+
"wasm",
|
|
29
|
+
"webassembly"
|
|
24
30
|
]
|
|
25
31
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/html-to-markdown-wasm",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.21.1",
|
|
4
4
|
"description": "High-performance HTML to Markdown converter - WebAssembly bindings",
|
|
5
5
|
"main": "dist/html_to_markdown_wasm.js",
|
|
6
6
|
"types": "dist/html_to_markdown_wasm.d.ts",
|
|
@@ -61,8 +61,5 @@
|
|
|
61
61
|
"publishConfig": {
|
|
62
62
|
"registry": "https://registry.npmjs.org/",
|
|
63
63
|
"access": "public"
|
|
64
|
-
},
|
|
65
|
-
"dependencies": {
|
|
66
|
-
"up": "^1.0.2"
|
|
67
64
|
}
|
|
68
65
|
}
|