html-to-markdown 2.25.0 → 2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/html-to-markdown-rb/native/Cargo.lock +7 -7
- data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
- data/rust-vendor/html-to-markdown-rs/src/rcdom.rs +3 -0
- data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
- data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
- data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
- data/rust-vendor/memmap2/CHANGELOG.md +8 -0
- data/rust-vendor/memmap2/Cargo.lock +1 -1
- data/rust-vendor/memmap2/Cargo.toml +2 -1
- data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
- data/rust-vendor/memmap2/src/lib.rs +25 -1
- data/rust-vendor/memmap2/src/stub.rs +1 -4
- data/rust-vendor/memmap2/src/unix.rs +14 -1
- data/rust-vendor/syn/.cargo-checksum.json +1 -1
- data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
- data/rust-vendor/syn/Cargo.lock +40 -41
- data/rust-vendor/syn/Cargo.toml +1 -1
- data/rust-vendor/syn/Cargo.toml.orig +1 -1
- data/rust-vendor/syn/src/item.rs +61 -40
- data/rust-vendor/syn/src/lib.rs +2 -1
- data/rust-vendor/syn/tests/test_item.rs +54 -0
- data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
- data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
- data/rust-vendor/unicode-ident/Cargo.lock +21 -21
- data/rust-vendor/unicode-ident/Cargo.toml +1 -1
- data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
- data/rust-vendor/unicode-ident/src/lib.rs +1 -1
- data/rust-vendor/unicode-ident/src/tables.rs +87 -97
- data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: deb434d4fa161dcd1df3b836eac2ce0133bac0124f88a890757042044501acf2
|
|
4
|
+
data.tar.gz: 76a70b3e1a1abafa465b0d076526d1b58d50f3a39453e406bc2d4b4c8b669ce6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 59404b56deed91b2f0c65bf358fd05b685042804b30a2ee92ebb5cdb220a6b89c9f3dbc62d5576ef7d818c6372c42c1f8a2cc8bfbd2b8db16c8ecdbcdd60d427
|
|
7
|
+
data.tar.gz: 30f12a84b9b2b32aa6a1c1c3499ff8777e22994c84a38a22d4127eb91274ba4c9923270c2e8583e56b6cf758202581c58b38315a38fbd8c28ec737251ae32288
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
html-to-markdown (2.25.
|
|
4
|
+
html-to-markdown (2.25.1)
|
|
5
5
|
rb_sys (>= 0.9, < 1.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -161,7 +161,7 @@ CHECKSUMS
|
|
|
161
161
|
ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5
|
|
162
162
|
ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
|
|
163
163
|
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
|
|
164
|
-
html-to-markdown (2.25.
|
|
164
|
+
html-to-markdown (2.25.1)
|
|
165
165
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
166
166
|
json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
|
|
167
167
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
data/README.md
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
|
|
19
19
|
</a>
|
|
20
20
|
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown">
|
|
21
|
-
<img src="https://img.shields.io/badge/Go-v2.
|
|
21
|
+
<img src="https://img.shields.io/badge/Go-v2.25.1-007ec6" alt="Go">
|
|
22
22
|
</a>
|
|
23
23
|
<a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
|
|
24
24
|
<img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
|
|
@@ -424,7 +424,7 @@ dependencies = [
|
|
|
424
424
|
|
|
425
425
|
[[package]]
|
|
426
426
|
name = "html-to-markdown-rb"
|
|
427
|
-
version = "2.25.
|
|
427
|
+
version = "2.25.1"
|
|
428
428
|
dependencies = [
|
|
429
429
|
"html-to-markdown-rs",
|
|
430
430
|
"magnus",
|
|
@@ -670,9 +670,9 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
|
670
670
|
|
|
671
671
|
[[package]]
|
|
672
672
|
name = "memmap2"
|
|
673
|
-
version = "0.9.
|
|
673
|
+
version = "0.9.10"
|
|
674
674
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
675
|
-
checksum = "
|
|
675
|
+
checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
|
|
676
676
|
dependencies = [
|
|
677
677
|
"libc",
|
|
678
678
|
]
|
|
@@ -1204,9 +1204,9 @@ dependencies = [
|
|
|
1204
1204
|
|
|
1205
1205
|
[[package]]
|
|
1206
1206
|
name = "syn"
|
|
1207
|
-
version = "2.0.
|
|
1207
|
+
version = "2.0.116"
|
|
1208
1208
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1209
|
-
checksum = "
|
|
1209
|
+
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
|
|
1210
1210
|
dependencies = [
|
|
1211
1211
|
"proc-macro2",
|
|
1212
1212
|
"quote",
|
|
@@ -1259,9 +1259,9 @@ dependencies = [
|
|
|
1259
1259
|
|
|
1260
1260
|
[[package]]
|
|
1261
1261
|
name = "unicode-ident"
|
|
1262
|
-
version = "1.0.
|
|
1262
|
+
version = "1.0.24"
|
|
1263
1263
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1264
|
-
checksum = "
|
|
1264
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
1265
1265
|
|
|
1266
1266
|
[[package]]
|
|
1267
1267
|
name = "unicode-xid"
|
|
@@ -57,17 +57,30 @@ pub fn detect_heading_paragraph(element: &HocrElement, text: &str) -> Option<Str
|
|
|
57
57
|
return None;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
-
let
|
|
60
|
+
let line_children: Vec<&HocrElement> = element
|
|
61
61
|
.children
|
|
62
62
|
.iter()
|
|
63
63
|
.filter(|child| matches!(child.element_type, HocrElementType::OcrLine | HocrElementType::OcrxLine))
|
|
64
|
-
.
|
|
64
|
+
.collect();
|
|
65
65
|
|
|
66
|
-
if
|
|
66
|
+
if line_children.len() != 1 {
|
|
67
67
|
return None;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
// Determine effective font size from child line elements.
|
|
71
|
+
// First check x_fsize, then fall back to bbox height as a proxy.
|
|
72
|
+
let font_size = line_children.iter().find_map(|child| {
|
|
73
|
+
child
|
|
74
|
+
.properties
|
|
75
|
+
.x_fsize
|
|
76
|
+
.or_else(|| child.properties.bbox.map(|b| b.height()))
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
let has_large_font = font_size.is_some_and(|size| size >= 14);
|
|
80
|
+
|
|
81
|
+
let char_limit = if has_large_font { 80 } else { 60 };
|
|
82
|
+
|
|
83
|
+
if text.is_empty() || text.len() > char_limit || text.contains(':') || text.contains('\n') {
|
|
71
84
|
return None;
|
|
72
85
|
}
|
|
73
86
|
|
|
@@ -83,7 +96,9 @@ pub fn detect_heading_paragraph(element: &HocrElement, text: &str) -> Option<Str
|
|
|
83
96
|
}
|
|
84
97
|
}
|
|
85
98
|
|
|
86
|
-
|
|
99
|
+
// Allow single-word headings when font size is large
|
|
100
|
+
let min_words = if has_large_font { 1 } else { 2 };
|
|
101
|
+
if word_count < min_words {
|
|
87
102
|
return None;
|
|
88
103
|
}
|
|
89
104
|
|
|
@@ -350,3 +350,160 @@ fn test_container_elements() {
|
|
|
350
350
|
assert!(markdown.contains("Layout analysis"));
|
|
351
351
|
assert!(markdown.contains("Block content"));
|
|
352
352
|
}
|
|
353
|
+
|
|
354
|
+
#[test]
|
|
355
|
+
fn test_ocr_header_renders_as_italic_not_heading() {
|
|
356
|
+
// OcrHeader is a "page running header" (repeated at top of pages),
|
|
357
|
+
// NOT a section heading. It must render as italic (*text*), not as # heading.
|
|
358
|
+
let hocr = r#"<div class="ocr_page" title="bbox 0 0 1000 1000">
|
|
359
|
+
<div class="ocr_header" title="bbox 0 0 1000 50">
|
|
360
|
+
<span class="ocr_line" title="bbox 0 0 500 30">
|
|
361
|
+
<span class="ocrx_word" title="bbox 0 0 100 30; x_wconf 95">Chapter</span>
|
|
362
|
+
<span class="ocrx_word" title="bbox 110 0 200 30; x_wconf 95">One</span>
|
|
363
|
+
</span>
|
|
364
|
+
</div>
|
|
365
|
+
<p class="ocr_par" title="bbox 0 100 900 200">
|
|
366
|
+
<span class="ocr_line" title="bbox 0 100 800 130">
|
|
367
|
+
<span class="ocrx_word" title="bbox 0 100 50 130; x_wconf 95">Some</span>
|
|
368
|
+
<span class="ocrx_word" title="bbox 60 100 120 130; x_wconf 95">body</span>
|
|
369
|
+
<span class="ocrx_word" title="bbox 130 100 180 130; x_wconf 95">text</span>
|
|
370
|
+
<span class="ocrx_word" title="bbox 190 100 240 130; x_wconf 95">here</span>
|
|
371
|
+
</span>
|
|
372
|
+
</p>
|
|
373
|
+
</div>"#;
|
|
374
|
+
|
|
375
|
+
let dom = tl::parse(hocr, tl::ParserOptions::default()).unwrap();
|
|
376
|
+
let (elements, _) = extract_hocr_document(&dom);
|
|
377
|
+
let markdown = convert_to_markdown(&elements, true);
|
|
378
|
+
|
|
379
|
+
// OcrHeader must render as italic
|
|
380
|
+
assert!(
|
|
381
|
+
markdown.contains("*Chapter One*"),
|
|
382
|
+
"OcrHeader should render as italic (*text*), got: {markdown}"
|
|
383
|
+
);
|
|
384
|
+
// It must NOT render as a markdown heading
|
|
385
|
+
assert!(
|
|
386
|
+
!markdown.contains("# Chapter One"),
|
|
387
|
+
"OcrHeader must NOT render as a markdown heading, got: {markdown}"
|
|
388
|
+
);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
#[test]
|
|
392
|
+
fn test_heading_detection_with_x_fsize_on_line_child() {
|
|
393
|
+
// A paragraph containing a single ocr_line child with x_fsize 18 (large font)
|
|
394
|
+
// and short capitalized text should be detected as a heading.
|
|
395
|
+
let hocr = r#"<div class="ocr_page" title="bbox 0 0 1000 1000">
|
|
396
|
+
<div class="ocr_carea" title="bbox 0 0 1000 500">
|
|
397
|
+
<p class="ocr_par" title="bbox 0 0 500 40">
|
|
398
|
+
<span class="ocr_line" title="bbox 0 0 500 30; x_fsize 18">
|
|
399
|
+
<span class="ocrx_word" title="bbox 0 0 120 30; x_wconf 95">Important</span>
|
|
400
|
+
<span class="ocrx_word" title="bbox 130 0 250 30; x_wconf 95">Section</span>
|
|
401
|
+
<span class="ocrx_word" title="bbox 260 0 350 30; x_wconf 95">Title</span>
|
|
402
|
+
</span>
|
|
403
|
+
</p>
|
|
404
|
+
<p class="ocr_par" title="bbox 0 60 900 200">
|
|
405
|
+
<span class="ocr_line" title="bbox 0 60 800 90; x_fsize 12">
|
|
406
|
+
<span class="ocrx_word" title="bbox 0 60 50 90; x_wconf 95">This</span>
|
|
407
|
+
<span class="ocrx_word" title="bbox 60 60 90 90; x_wconf 92">is</span>
|
|
408
|
+
<span class="ocrx_word" title="bbox 100 60 200 90; x_wconf 98">regular</span>
|
|
409
|
+
<span class="ocrx_word" title="bbox 210 60 280 90; x_wconf 98">body</span>
|
|
410
|
+
<span class="ocrx_word" title="bbox 290 60 340 90; x_wconf 98">text</span>
|
|
411
|
+
<span class="ocrx_word" title="bbox 350 60 430 90; x_wconf 98">content.</span>
|
|
412
|
+
</span>
|
|
413
|
+
</p>
|
|
414
|
+
</div>
|
|
415
|
+
</div>"#;
|
|
416
|
+
|
|
417
|
+
let dom = tl::parse(hocr, tl::ParserOptions::default()).unwrap();
|
|
418
|
+
let (elements, _) = extract_hocr_document(&dom);
|
|
419
|
+
let markdown = convert_to_markdown(&elements, true);
|
|
420
|
+
|
|
421
|
+
// The large-font paragraph should be detected as a heading
|
|
422
|
+
assert!(
|
|
423
|
+
markdown.contains("# Important Section Title"),
|
|
424
|
+
"Large font paragraph should be detected as heading, got: {markdown}"
|
|
425
|
+
);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
#[test]
|
|
429
|
+
fn test_single_word_heading_with_large_font() {
|
|
430
|
+
// A single-word paragraph with large font size should be detected as a heading.
|
|
431
|
+
// Without font size awareness, single-word paragraphs are rejected.
|
|
432
|
+
let hocr = r#"<div class="ocr_page" title="bbox 0 0 1000 1000">
|
|
433
|
+
<div class="ocr_carea" title="bbox 0 0 1000 500">
|
|
434
|
+
<p class="ocr_par" title="bbox 0 0 300 40">
|
|
435
|
+
<span class="ocr_line" title="bbox 0 0 300 30; x_fsize 24">
|
|
436
|
+
<span class="ocrx_word" title="bbox 0 0 200 30; x_wconf 95">Introduction</span>
|
|
437
|
+
</span>
|
|
438
|
+
</p>
|
|
439
|
+
<p class="ocr_par" title="bbox 0 60 900 200">
|
|
440
|
+
<span class="ocr_line" title="bbox 0 60 800 90; x_fsize 12">
|
|
441
|
+
<span class="ocrx_word" title="bbox 0 60 50 90; x_wconf 95">Some</span>
|
|
442
|
+
<span class="ocrx_word" title="bbox 60 60 120 90; x_wconf 92">body</span>
|
|
443
|
+
<span class="ocrx_word" title="bbox 130 60 180 90; x_wconf 98">text</span>
|
|
444
|
+
<span class="ocrx_word" title="bbox 190 60 280 90; x_wconf 98">follows.</span>
|
|
445
|
+
</span>
|
|
446
|
+
</p>
|
|
447
|
+
</div>
|
|
448
|
+
</div>"#;
|
|
449
|
+
|
|
450
|
+
let dom = tl::parse(hocr, tl::ParserOptions::default()).unwrap();
|
|
451
|
+
let (elements, _) = extract_hocr_document(&dom);
|
|
452
|
+
let markdown = convert_to_markdown(&elements, true);
|
|
453
|
+
|
|
454
|
+
// Single word with large font should be detected as heading
|
|
455
|
+
assert!(
|
|
456
|
+
markdown.contains("# Introduction"),
|
|
457
|
+
"Single word with large font should be detected as heading, got: {markdown}"
|
|
458
|
+
);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
#[test]
|
|
462
|
+
fn test_single_word_without_large_font_not_heading() {
|
|
463
|
+
// A single-word paragraph without large font should NOT be detected as heading.
|
|
464
|
+
// This ensures we haven't broken the existing behavior.
|
|
465
|
+
let hocr = r#"<div class="ocr_page" title="bbox 0 0 1000 1000">
|
|
466
|
+
<div class="ocr_carea" title="bbox 0 0 1000 500">
|
|
467
|
+
<p class="ocr_par" title="bbox 0 0 300 20">
|
|
468
|
+
<span class="ocr_line" title="bbox 0 0 300 12; x_fsize 10">
|
|
469
|
+
<span class="ocrx_word" title="bbox 0 0 100 12; x_wconf 95">Word</span>
|
|
470
|
+
</span>
|
|
471
|
+
</p>
|
|
472
|
+
</div>
|
|
473
|
+
</div>"#;
|
|
474
|
+
|
|
475
|
+
let dom = tl::parse(hocr, tl::ParserOptions::default()).unwrap();
|
|
476
|
+
let (elements, _) = extract_hocr_document(&dom);
|
|
477
|
+
let markdown = convert_to_markdown(&elements, true);
|
|
478
|
+
|
|
479
|
+
// Single word with small font should NOT be a heading
|
|
480
|
+
assert!(
|
|
481
|
+
!markdown.contains("# Word"),
|
|
482
|
+
"Single word with small font should not be detected as heading, got: {markdown}"
|
|
483
|
+
);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
#[test]
|
|
487
|
+
fn test_heading_detection_with_bbox_height_proxy() {
|
|
488
|
+
// When x_fsize is absent, bbox height should serve as a font-size proxy.
|
|
489
|
+
// A bbox height of 30 pixels (>= 14) indicates large text.
|
|
490
|
+
let hocr = r#"<div class="ocr_page" title="bbox 0 0 1000 1000">
|
|
491
|
+
<div class="ocr_carea" title="bbox 0 0 1000 500">
|
|
492
|
+
<p class="ocr_par" title="bbox 0 0 500 40">
|
|
493
|
+
<span class="ocr_line" title="bbox 0 0 500 30">
|
|
494
|
+
<span class="ocrx_word" title="bbox 0 0 200 30; x_wconf 95">Summary</span>
|
|
495
|
+
</span>
|
|
496
|
+
</p>
|
|
497
|
+
</div>
|
|
498
|
+
</div>"#;
|
|
499
|
+
|
|
500
|
+
let dom = tl::parse(hocr, tl::ParserOptions::default()).unwrap();
|
|
501
|
+
let (elements, _) = extract_hocr_document(&dom);
|
|
502
|
+
let markdown = convert_to_markdown(&elements, true);
|
|
503
|
+
|
|
504
|
+
// bbox height of 30 (y2=30 - y1=0) should serve as proxy for large font
|
|
505
|
+
assert!(
|
|
506
|
+
markdown.contains("# Summary"),
|
|
507
|
+
"Single word with tall bbox (height=30) should be detected as heading via bbox proxy, got: {markdown}"
|
|
508
|
+
);
|
|
509
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"files":{".cargo_vcs_info.json":"
|
|
1
|
+
{"files":{".cargo_vcs_info.json":"57a15fc6b2ddb894def64e09a11e5cc52e575be77740a6fc0aa3c451c8c0fa49",".github/workflows/main.yml":"e2b3d5678a31325a616bae0c1ccb223f9ed2f5b43d39a134c5c45858e4fbf4ca","CHANGELOG.md":"832a95917c80f443c113ede708a5602550d081bd79cc33f4152365357ab36bb8","Cargo.lock":"6ddd1fa91a86a6e8bbbc17161703f21799213d823ea26d7f7ce86181295a3fc8","Cargo.toml":"01ff6425d680ddac0a8efb1b41e6b0503d0153388fadcc4bf584ca38c4dca221","Cargo.toml.orig":"507519ce5facd6b78a80e3cfab1437c7b6b5a1fc99c65e24552525c884d4ed3e","LICENSE-APACHE":"04ea4849dba9dcae07113850c6f1b1a69052c625210639914eee352023f750ad","LICENSE-MIT":"0d25d03b5ab49576178ad0cae7a2648d12c17ad0452fe49c07e55e4b59aa5257","README.md":"e3388f55065d69e076d90871c0a91dc97420bd0d07b4f154b08e40ac47b115eb","examples/cat.rs":"594b9457ca6eb4ce9b840133da5076fa7b96334953df03f894233169564622f6","src/advice.rs":"a4c023982a598a77c23b5a4e524de581329d42287d639be88e2ffda3bd929511","src/lib.rs":"d93f73dd80b5bfdecc10836a7ebcd04c124f6283f9a104686fe48a18d34764ab","src/stub.rs":"beccccb0233903df5de1773674b2dcd9b0991889a10c23719f5aee8f7496f958","src/unix.rs":"fea7c7c21a6082bc77052e5e40a2bff1311103a19c2cd281ff383604fa799b35","src/windows.rs":"0fbd1efc122a7e83defb5d0a401e973a3876e90c6a1f14f2ac1976462633dd79"},"package":"714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"}
|
|
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [0.9.10] - 2026-02-15
|
|
8
|
+
### Fixed
|
|
9
|
+
- Fix compilation on AIX targets.
|
|
10
|
+
[@xingxue-ibm](https://github.com/xingxue-ibm)
|
|
11
|
+
### Changed
|
|
12
|
+
- Return `ErrorKind::Unsupported` on unsupported platforms to allow reliable runtime detection.
|
|
13
|
+
[@daxpedda](https://github.com/daxpedda)
|
|
14
|
+
|
|
7
15
|
## [0.9.9] - 2025-10-21
|
|
8
16
|
### Fixed
|
|
9
17
|
- Fix compilation on visionOS (and other apple operating systems).
|
|
@@ -13,10 +13,11 @@
|
|
|
13
13
|
edition = "2021"
|
|
14
14
|
rust-version = "1.63"
|
|
15
15
|
name = "memmap2"
|
|
16
|
-
version = "0.9.
|
|
16
|
+
version = "0.9.10"
|
|
17
17
|
authors = [
|
|
18
18
|
"Dan Burkert <dan@danburkert.com>",
|
|
19
19
|
"Yevhenii Reizner <razrfalcon@gmail.com>",
|
|
20
|
+
"The Contributors",
|
|
20
21
|
]
|
|
21
22
|
build = false
|
|
22
23
|
autolib = false
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "memmap2"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.10"
|
|
4
4
|
authors = [
|
|
5
5
|
"Dan Burkert <dan@danburkert.com>",
|
|
6
6
|
"Yevhenii Reizner <razrfalcon@gmail.com>",
|
|
7
|
+
"The Contributors",
|
|
7
8
|
]
|
|
8
9
|
license = "MIT OR Apache-2.0"
|
|
9
10
|
repository = "https://github.com/RazrFalcon/memmap2-rs"
|
|
@@ -402,6 +402,8 @@ impl MmapOptions {
|
|
|
402
402
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
403
403
|
/// variety of reasons, such as when the file is not open with read permissions.
|
|
404
404
|
///
|
|
405
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
406
|
+
///
|
|
405
407
|
/// # Example
|
|
406
408
|
///
|
|
407
409
|
/// ```
|
|
@@ -446,6 +448,8 @@ impl MmapOptions {
|
|
|
446
448
|
///
|
|
447
449
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
448
450
|
/// variety of reasons, such as when the file is not open with read permissions.
|
|
451
|
+
///
|
|
452
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
449
453
|
pub unsafe fn map_exec<T: MmapAsRawDesc>(&self, file: T) -> Result<Mmap> {
|
|
450
454
|
let desc = file.as_raw_desc();
|
|
451
455
|
|
|
@@ -470,6 +474,8 @@ impl MmapOptions {
|
|
|
470
474
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
471
475
|
/// variety of reasons, such as when the file is not open with read and write permissions.
|
|
472
476
|
///
|
|
477
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
478
|
+
///
|
|
473
479
|
/// # Example
|
|
474
480
|
///
|
|
475
481
|
/// ```
|
|
@@ -520,6 +526,8 @@ impl MmapOptions {
|
|
|
520
526
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
521
527
|
/// variety of reasons, such as when the file is not open with writable permissions.
|
|
522
528
|
///
|
|
529
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
530
|
+
///
|
|
523
531
|
/// # Example
|
|
524
532
|
///
|
|
525
533
|
/// ```
|
|
@@ -558,6 +566,8 @@ impl MmapOptions {
|
|
|
558
566
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
559
567
|
/// variety of reasons, such as when the file is not open with read permissions.
|
|
560
568
|
///
|
|
569
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
570
|
+
///
|
|
561
571
|
/// # Example
|
|
562
572
|
///
|
|
563
573
|
/// ```
|
|
@@ -602,6 +612,8 @@ impl MmapOptions {
|
|
|
602
612
|
///
|
|
603
613
|
/// This method returns an error when the underlying system call fails or
|
|
604
614
|
/// when `len > isize::MAX`.
|
|
615
|
+
///
|
|
616
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
605
617
|
pub fn map_anon(&self) -> Result<MmapMut> {
|
|
606
618
|
let len = self.len.unwrap_or(0);
|
|
607
619
|
|
|
@@ -624,6 +636,8 @@ impl MmapOptions {
|
|
|
624
636
|
///
|
|
625
637
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
626
638
|
/// variety of reasons, such as when the file is not open with read and write permissions.
|
|
639
|
+
///
|
|
640
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
627
641
|
pub fn map_raw<T: MmapAsRawDesc>(&self, file: T) -> Result<MmapRaw> {
|
|
628
642
|
let desc = file.as_raw_desc();
|
|
629
643
|
|
|
@@ -644,7 +658,9 @@ impl MmapOptions {
|
|
|
644
658
|
///
|
|
645
659
|
/// # Errors
|
|
646
660
|
///
|
|
647
|
-
/// This method returns an error when the underlying system call fails
|
|
661
|
+
/// This method returns an error when the underlying system call fails.
|
|
662
|
+
///
|
|
663
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
648
664
|
pub fn map_raw_read_only<T: MmapAsRawDesc>(&self, file: T) -> Result<MmapRaw> {
|
|
649
665
|
let desc = file.as_raw_desc();
|
|
650
666
|
|
|
@@ -720,6 +736,8 @@ impl Mmap {
|
|
|
720
736
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
721
737
|
/// variety of reasons, such as when the file is not open with read permissions.
|
|
722
738
|
///
|
|
739
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
740
|
+
///
|
|
723
741
|
/// # Example
|
|
724
742
|
///
|
|
725
743
|
/// ```
|
|
@@ -922,6 +940,8 @@ impl MmapRaw {
|
|
|
922
940
|
///
|
|
923
941
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
924
942
|
/// variety of reasons, such as when the file is not open with read and write permissions.
|
|
943
|
+
///
|
|
944
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
925
945
|
pub fn map_raw<T: MmapAsRawDesc>(file: T) -> Result<MmapRaw> {
|
|
926
946
|
MmapOptions::new().map_raw(file)
|
|
927
947
|
}
|
|
@@ -1184,6 +1204,8 @@ impl MmapMut {
|
|
|
1184
1204
|
/// This method returns an error when the underlying system call fails, which can happen for a
|
|
1185
1205
|
/// variety of reasons, such as when the file is not open with read and write permissions.
|
|
1186
1206
|
///
|
|
1207
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
1208
|
+
///
|
|
1187
1209
|
/// # Example
|
|
1188
1210
|
///
|
|
1189
1211
|
/// ```
|
|
@@ -1222,6 +1244,8 @@ impl MmapMut {
|
|
|
1222
1244
|
///
|
|
1223
1245
|
/// This method returns an error when the underlying system call fails or
|
|
1224
1246
|
/// when `len > isize::MAX`.
|
|
1247
|
+
///
|
|
1248
|
+
/// Returns [`ErrorKind::Unsupported`] on unsupported platforms.
|
|
1225
1249
|
pub fn map_anon(length: usize) -> Result<MmapMut> {
|
|
1226
1250
|
MmapOptions::new().len(length).map_anon()
|
|
1227
1251
|
}
|
|
@@ -10,10 +10,7 @@ pub struct MmapInner {
|
|
|
10
10
|
|
|
11
11
|
impl MmapInner {
|
|
12
12
|
fn new() -> io::Result<MmapInner> {
|
|
13
|
-
Err(io::
|
|
14
|
-
io::ErrorKind::Other,
|
|
15
|
-
"platform not supported",
|
|
16
|
-
))
|
|
13
|
+
Err(io::ErrorKind::Unsupported.into())
|
|
17
14
|
}
|
|
18
15
|
|
|
19
16
|
pub fn map(_: usize, _: &File, _: u64, _: bool, _: bool) -> io::Result<MmapInner> {
|
|
@@ -426,7 +426,20 @@ impl MmapInner {
|
|
|
426
426
|
let offset = offset as isize - alignment as isize;
|
|
427
427
|
let len = len + alignment;
|
|
428
428
|
unsafe {
|
|
429
|
-
|
|
429
|
+
let ptr = {
|
|
430
|
+
// The AIX signature of 'madvise()' differs from the POSIX
|
|
431
|
+
// specification, which expects 'void *' as the type of the
|
|
432
|
+
// 'addr' argument, whereas AIX uses 'caddr_t' (i.e., 'char *').
|
|
433
|
+
#[cfg(target_os = "aix")]
|
|
434
|
+
{
|
|
435
|
+
self.ptr.offset(offset) as *mut u8
|
|
436
|
+
}
|
|
437
|
+
#[cfg(not(target_os = "aix"))]
|
|
438
|
+
{
|
|
439
|
+
self.ptr.offset(offset)
|
|
440
|
+
}
|
|
441
|
+
};
|
|
442
|
+
if libc::madvise(ptr, len, advice) != 0 {
|
|
430
443
|
Err(io::Error::last_os_error())
|
|
431
444
|
} else {
|
|
432
445
|
Ok(())
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"files":{".cargo_vcs_info.json":"
|
|
1
|
+
{"files":{".cargo_vcs_info.json":"6402b2ff827b513eb74fc84160cb9c6477299014d33fef8392ee572a7bca8768","Cargo.lock":"6e1651679f9e799959a2b7e920bcb2912a83840ca2894575bff44d36e64d59e2","Cargo.toml":"9a07b798897556cdba0170b758e8ff096d5242a426c9232e3b9b3d233cf2fab3","Cargo.toml.orig":"051087ef93f16c496ce8b532a236ded9fb816d3aaadd01ed1d9167124f96ab6a","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"84d8f027ce3c19ae9cf91e68207b0bc737b71aa25706ba575bef7d4cba6fc937","benches/file.rs":"6f2ae7748d1576cff48e4ef55d4e87e2f5bb7898a36aa99ea6dd6ed0f72a4e3c","benches/rust.rs":"cc050157e925cf54e39575a8de7b3d58aede4afa582c21023bd4bf9f03e3a8ee","src/attr.rs":"2108cf48c98ed0a7833ec73b841fccc254c733453a7e69127b0255541bbc9572","src/bigint.rs":"21cf6cf0c8e175421a6579321878e140d6cd2c975e5bdac4eadde25907d7e879","src/buffer.rs":"5b4daf35958f18dd389bcd49be73087b2c6f1971412eac32c1b66968ec786912","src/classify.rs":"200e81f2245dc23621026849542d95d1e7c320c775362e080857673b51bc82b7","src/custom_keyword.rs":"322114e36ae43a2f8605506fb4568efdbc2986853e2fee74bd10a4ca0fb60c69","src/custom_punctuation.rs":"67134b7685ce89eddb7bf4ee3c9a48e5eb8ecbb1703cd49a01cbb115239fb814","src/data.rs":"7fa9741c5527b790d968430e69a5c0f34ab37360e0a8af2f273a70d8f702b29c","src/derive.rs":"4927cfd64d2c61bb9ee8462b83d5bbce77e3f85c3a03a5565ecf6f08d55367c3","src/discouraged.rs":"d5538b42691e418faf3a0b5fb2b16813efc6a80ffb59dab58a0f170c4a71cbfb","src/drops.rs":"c48e679507424c08716b48256a06d59fe55e877de336c40f0df56cfd80e23826","src/error.rs":"34a045645056452a76862f08084ae9a92aa24a38daa25586e9ab12f5fa9c3b23","src/export.rs":"d9039ba7f1455089139cf8a6871159f0ec57949cc730b42fe3c318a8397d3368","src/expr.rs":"9ab5bf8bd9d45647f6f23fbe3641b90f837226eec124dd0973b7332ae41b7f52","src/ext.rs":"8bc0d0ffab88c73d2305c9f588e6b5285501de8869308162f91019c83bc5bdf1","src/file.rs":"0cc23703e5c0478f14d8ec285ca5c925a0c556c686f4f5d0fc55e19fc03e33f6","src/fixup.rs":"7647cde30efdce96b1488ae805788c168d4499b464b7d421abc17ea8ffde66f2","src/gen/clone.rs":"7af00b0a240453d7aac178be1b6cdf9df3b33f1281da35e02f66ba7ba55d060c","src/gen/debug.rs":"7d249085b57e0f751f4e8b9b8fe38cd056a4d56b2f474d4b4f8ba3d489068322","src/gen/eq.rs":"fa2c783b0eb58af46d9a599cce0131379c616d396c8e1124970502673e9001a3","src/gen/fold.rs":"30a91e710427a15d197957d7471282f4a1231181736ed7a8686c9b0f27248e41","src/gen/hash.rs":"f4a58047eb829f16c9ce9db08b2e73c2cc2369275cc47ef32147365778de49ba","src/gen/token.css":"3a5882d0b3aefbf56ca5d08c28c117e25ac2327eadf7242202d906b2ddb2232e","src/gen/visit.rs":"fe1443aa7953eaca10d6bf982396e627e31ce6b8aea8eb7cf949e0adeea5badb","src/gen/visit_mut.rs":"a48310aa9580af2eff96ff872b3b5093656cf48ddc0c22c02ef33884e477d633","src/generics.rs":"4c9f5236b029ee7d95a52f607afdc06a6169bf02292b27b904928c0eb8726c67","src/group.rs":"61e067094aa930b6550dc3717a5aaeef7dabfdcf1a177f83fb636598fa7e4e72","src/ident.rs":"4e50859365382708c2c09abfb88eac4b0d35c9b3f884e58cc22392d91be2a0e1","src/item.rs":"278f0f7467169463b430476f10715d893ed2a3f8d9aab0260f8bc00d80ba5c73","src/lib.rs":"a1f43525e61c4c9365e09097dd3ba3dbc2a7b902f79f939db3d17439f748b2b4","src/lifetime.rs":"5bd6d716e46d489f52f4ac3075710328cae5a01a0b37818bfe26f4293f7984c7","src/lit.rs":"7c1007408070abe4c3bdc228c803777274ff842e48e1d1daad0442783c49bb46","src/lookahead.rs":"ea51bbf9a23d68238f8324a51675f5815fb3e5fc08c6bab93bc2648d3342c38f","src/mac.rs":"fdce8291f71adef3f69975f229156dca2309ca232ed943061afaf96220908ab8","src/macros.rs":"9809598f9d7e533c737c2a6df994b40b7c9ca4bbdf90bb2289da72a8065345dd","src/meta.rs":"d8703f8cbc5fd6465c367214eac38efca0f8388bdef8ac5e6f863835fbd3b30f","src/op.rs":"a61757370f802e44efa3c4a1057ae2cd26e64e273f7d76c06d5ffb49602319e2","src/parse.rs":"e692d6e895ac79a14680b89a7c29611995126953238176bdb50d2a3c2fb2d9e3","src/parse_macro_input.rs":"e4e22b63d0496d06a4ca17742a22467ed93f08a739081324773828bad63175ee","src/parse_quote.rs":"32fbdf959ee601cc2d184b6a358c542dbede0da5ad69d47588f7c405258a6dec","src/pat.rs":"861e3d8f86da16acf7440f9c0ed477a7c80bb0109b93aff0e3cd941198595b3e","src/path.rs":"8dc389f21f378c5f0dddda17543e59601b5b22cbabfbedf9a6ece499c519d81c","src/precedence.rs":"301f5cc51e1bbb35aeb64f6ed1b21d6f61d83f6c048aec9896976ee0011b948e","src/print.rs":"22910bf0521ab868ebd7c62601c55912d12cfb400c65723e08e5cfa3a2d111c0","src/punctuated.rs":"89b2666be8354215c63f8af7c92d6aed3a6d309cf384965ae360d351a5007afd","src/restriction.rs":"f5099d4cf5f9d61baace5b0e16f1631c72c15bba62529c7d4598565539d29f7d","src/scan_expr.rs":"e199c35e8bbf3e2c70901e1175df8dd446f4cb67b60100647f478f2dc31f6f12","src/sealed.rs":"6ece3b3dcb30f6bb98b93d83759ca7712ee8592bef9c0511141039c38765db0e","src/span.rs":"0a48e375e5c9768f6f64174a91ba6a255f4b021e2fb3548d8494e617f142601b","src/spanned.rs":"b53ce23f10935cf202da0f5a430e9feca9f4858bbd6efbe896acf17d3cfce540","src/stmt.rs":"46a6f36ec8780e69de73eb396021fd6faf609016cf204d7fea46d6aeccc2a42e","src/thread.rs":"b91e33c563c2ea584fcd7c5454998303173f4227bdb19504776e61d09b60675c","src/token.rs":"48da90e8fc2a0419f63f910cf7b043008b3abd5df2eb82a3609adde80a65dae0","src/tt.rs":"d5b9df076504b1506e0b05dafb9006fd0772fdb8d4b8b8dcc315dbca3277331a","src/ty.rs":"ca9f2155a0a39fc78af46fcbbb5189f0c4751642fcd53331a3a38d83dc22db2f","src/verbatim.rs":"aa1958887a7f4adedf824a59e175ffe83572d31ca278e5f95623068dff0a845b","src/whitespace.rs":"9cdcbfe9045b259046329a795bc1105ab5a871471a6d3f7318d275ee53f7a825","tests/common/eq.rs":"29015e3786750552493663396476f8c0347da991906153b20309ee90dc00e844","tests/common/mod.rs":"b752aa8f1faf8c6abf1286a12fb50b6c257ec1889d81bcdb3dc3257134695a89","tests/common/parse.rs":"b1685e9e0f0569f48d66ddd92d787a12933a1d2f95e7c9ae974a3648763f989b","tests/common/visit.rs":"a260ecd2ce7853cd3644e19aba08e8d358a656fd3fb0f1287cea40c59c9e62c9","tests/debug/gen.rs":"d7f27966ca645d6c099d5cfdac5d211f4ed3a9cb4996f6c754181936cd51edde","tests/debug/mod.rs":"1259df940bbcaa968a837e402d6853f2efa38d2260e306d42f17f9e8ef74fae5","tests/macros/mod.rs":"d2294a79e341c623ae671dd363e99965d78dda7f340b0cc038267207adfacae2","tests/regression.rs":"e9565ea0efecb4136f099164ffcfa26e1996b0a27fb9c6659e90ad9bdd42e7b6","tests/regression/issue1108.rs":"f32db35244a674e22ff824ca9e5bbec2184e287b59f022db68c418b5878a2edc","tests/regression/issue1235.rs":"a2266b10c3f7c7af5734817ab0a3e8b309b51e7d177b63f26e67e6b744d280b0","tests/repo/mod.rs":"4e2d370876192fc0514962e1eeb9e1e4a96e3805b1f87257ba4d1eeda8b1db73","tests/repo/progress.rs":"11219e4ae5e0175d5ddc743907a57b8c4e750e709687c3df53728d6af994a86f","tests/snapshot/mod.rs":"4a101272c5abe6ca9f3501e0cacacee9a0ccf7ca773348a239e5b046d0316a7e","tests/test_asyncness.rs":"971d560d927d5a8494eaa7fce8f0d062d6971c17c4c464fcfc31570572b7d3d7","tests/test_attribute.rs":"8a4429b7cfe2360bb73beae54a62ae3255ebbd5181467a8608d6f858c2711728","tests/test_derive_input.rs":"c8f5dbac6482dadd0fab30d0b1fe3254869256c48ea68ea484cad7f7406c8568","tests/test_expr.rs":"055cb9b33a5bb6ed5dc67491e6f1ae794a53a5a091245debd464ef57144f5edb","tests/test_generics.rs":"5c9b34813a442311fdca36b56736dd15760e7f6d6a668f82673c21711e1aacd8","tests/test_grouping.rs":"fe3de6e8824f0722ab6450c6dfc374f6e0f8fe75c87c4dd56b2cb00a2197ed58","tests/test_ident.rs":"d5850e817720e774cd397a46dbc5298c57933823c18e20805e84503fc9387e8f","tests/test_item.rs":"07cfe67e302d56dd2b2f89e1278d6ee6202c042f5c9e76fdcd88c60534d71a13","tests/test_lit.rs":"4130efa425d14ed3ad9a1c2a00ef4b29782c9d1cf9e29ff9dddd3b23b2e3ddee","tests/test_meta.rs":"5b0fdee0decbd07476c9673403a662de385901b4bf60600c26ac879893f5bf9c","tests/test_parse_buffer.rs":"1c8cbbf457603bd0a83348079591f843f7ef0533a6275a0c76533e2d07dd321a","tests/test_parse_quote.rs":"85d90d2d51b82aab7c30159dd884f26c592ddb28ed31ef2baf371ee31349694c","tests/test_parse_stream.rs":"b6b533432173123d6d01d8d2cb33714bc50b30b16ffbb6116f93937221ad4594","tests/test_pat.rs":"dafa3e1f51812e8c852dc5210640a4adf6fff7cd0a0790ee17d2c4c115321846","tests/test_path.rs":"7a6763a262c41a9522068887702fe7cd4ff72b07da5253ac47761d73315b021d","tests/test_precedence.rs":"f0654b7bf85e2852f7229b14aaba7fcae87d9691f6fa91ab315bc6b0249ffccf","tests/test_punctuated.rs":"efed2c281b6965d71b065c7606631ba1989af6e7b5f5d1ca1033f8b968dc076c","tests/test_receiver.rs":"2053028236f95f3cb508ebf2eb606df43cae4f9f4dd27823661459ff6c54a39c","tests/test_round_trip.rs":"aecd7b37cf937f005ee0858f38482782341d7fa037ec8f15b3e8b26bfd6f6104","tests/test_shebang.rs":"9bc24b1ee2947b06a279d2ed40039cb45bba6caf7cd40530d93f7e2355de53c6","tests/test_size.rs":"03efaf829b80b7db1f831474c1d3ce268914fc499d0e2a7eea03cad04a482974","tests/test_stmt.rs":"b3c120059d7b56388963b85234feb2e4d379e32a0bf7f29b6683eca000dd3919","tests/test_token_trees.rs":"c30b921a96739c9334ec2bdd06552729891e0251b9d8fbdf0b8f5cc897babee5","tests/test_ty.rs":"9bb5f632941451ca6b200100310b55e62a9956190df3efe28b80d42843e75362","tests/test_unparenthesize.rs":"e5c047819afd5f70fde1bdd095467b1291d0854641f21e8183e50919986d8ce7","tests/test_visibility.rs":"7d05f05b0782976369d21477ac9f4d35a7c7f36faa42127e3a9c12ada270baf8","tests/zzz_stable.rs":"2a862e59cb446235ed99aec0e6ada8e16d3ecc30229b29d825b7c0bbc2602989"},"package":"3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"}
|