kreuzberg 4.7.2 → 4.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +3 -3
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/vendor/Cargo.toml +4 -4
- data/vendor/kreuzberg/Cargo.toml +3 -3
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/extractors/pdf/mod.rs +15 -5
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +22 -8
- data/vendor/kreuzberg/src/pdf/structure/adapters.rs +1 -0
- data/vendor/kreuzberg/src/pdf/structure/bridge.rs +16 -0
- data/vendor/kreuzberg/src/pdf/text.rs +2 -2
- data/vendor/kreuzberg-ffi/Cargo.toml +3 -3
- data/vendor/kreuzberg-ffi/kreuzberg.h +2 -2
- data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
- data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/src/api.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/leptonica.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/monitor.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +1 -1
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0f26fc086a0221056b94cd10141832240ef9563783835ef66555445e0d33442d
|
|
4
|
+
data.tar.gz: ed8488fcdd8bd12266ec2be8984ec5f5b60a32aaa562e64b442d9a4a641c8841
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 485460f24ddbf58b82a41873a61c5d78408012b415e62a52daeed43df2d150b317115f3ca79bc029a9394188da70d6733f79ba8e61f589c58bee7d1a845b17d8
|
|
7
|
+
data.tar.gz: 5e80e6e34fe8125a934b141ad3ab058240b81f4b82d609e0e2aea822131cbd4954dd7112e152a90542fc56d494519182c62cc14b387f658944ebbc73bc5037a1
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.7.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.7.3" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -1565,9 +1565,9 @@ dependencies = [
|
|
|
1565
1565
|
|
|
1566
1566
|
[[package]]
|
|
1567
1567
|
name = "fastrand"
|
|
1568
|
-
version = "2.
|
|
1568
|
+
version = "2.4.0"
|
|
1569
1569
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1570
|
-
checksum = "
|
|
1570
|
+
checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f"
|
|
1571
1571
|
|
|
1572
1572
|
[[package]]
|
|
1573
1573
|
name = "fax"
|
|
@@ -2938,7 +2938,7 @@ dependencies = [
|
|
|
2938
2938
|
|
|
2939
2939
|
[[package]]
|
|
2940
2940
|
name = "kreuzberg-rb"
|
|
2941
|
-
version = "4.7.
|
|
2941
|
+
version = "4.7.2"
|
|
2942
2942
|
dependencies = [
|
|
2943
2943
|
"async-trait",
|
|
2944
2944
|
"html-to-markdown-rs",
|
data/lib/kreuzberg/version.rb
CHANGED
data/vendor/Cargo.toml
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
members = ["kreuzberg", "kreuzberg-ffi", "kreuzberg-tesseract", "kreuzberg-paddle-ocr", "kreuzberg-pdfium-render"]
|
|
3
3
|
|
|
4
4
|
[workspace.package]
|
|
5
|
-
version = "4.7.
|
|
5
|
+
version = "4.7.3"
|
|
6
6
|
edition = "2024"
|
|
7
7
|
rust-version = "1.91"
|
|
8
8
|
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
|
|
@@ -32,8 +32,8 @@ html-to-markdown-rs = { version = "3.1.0", default-features = false }
|
|
|
32
32
|
image = { version = "0.25.10", default-features = false }
|
|
33
33
|
itertools = "0.14"
|
|
34
34
|
js-sys = "0.3"
|
|
35
|
-
kreuzberg = { path = "./crates/kreuzberg", version = "4.7.
|
|
36
|
-
kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.7.
|
|
35
|
+
kreuzberg = { path = "./crates/kreuzberg", version = "4.7.3", default-features = false }
|
|
36
|
+
kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.7.3" }
|
|
37
37
|
lazy_static = "1.5.0"
|
|
38
38
|
libc = "0.2.184"
|
|
39
39
|
log = "0.4"
|
|
@@ -43,7 +43,7 @@ num_cpus = "1.17.0"
|
|
|
43
43
|
once_cell = "1.21.4"
|
|
44
44
|
ort = { version = "2.0.0-rc.12", features = ["std", "api-18"], default-features = false }
|
|
45
45
|
parking_lot = "0.12.5"
|
|
46
|
-
pdf_oxide = { version = "0.3.
|
|
46
|
+
pdf_oxide = { version = "0.3.20", default-features = false }
|
|
47
47
|
pdfium-render = { package = "kreuzberg-pdfium-render", path = "crates/kreuzberg-pdfium-render", version = "4.3" }
|
|
48
48
|
rayon = "1.11.0"
|
|
49
49
|
reqwest = { version = "0.13.2", default-features = false }
|
data/vendor/kreuzberg/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg"
|
|
3
|
-
version = "4.7.
|
|
3
|
+
version = "4.7.3"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
|
|
@@ -307,7 +307,7 @@ ort = { version = "2.0.0-rc.12", default-features = false, features = [
|
|
|
307
307
|
outlook-pst = { version = "1.2.0", optional = true }
|
|
308
308
|
parking_lot = "0.12.5"
|
|
309
309
|
pastey = "0.2"
|
|
310
|
-
pdf_oxide = { version = "0.3.
|
|
310
|
+
pdf_oxide = { version = "0.3.20", default-features = false, optional = true }
|
|
311
311
|
pdfium-render = { package = "kreuzberg-pdfium-render", path = "../kreuzberg-pdfium-render", features = ["thread_safe", "image_latest"], optional = true }
|
|
312
312
|
pulldown-cmark = { version = "0.13" }
|
|
313
313
|
quick-xml = { version = "0.39.2", features = ["serialize"], optional = true }
|
|
@@ -404,4 +404,4 @@ tempfile = "3.27.0"
|
|
|
404
404
|
tokio = { version = "1.51.0", features = ["macros", "time"] }
|
|
405
405
|
tokio-test = "0.4"
|
|
406
406
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
407
|
-
zip = { version = ">=7.0.0, <
|
|
407
|
+
zip = { version = ">=7.0.0, <8.6.0", default-features = false, features = ["deflate-flate2"] }
|
data/vendor/kreuzberg/README.md
CHANGED
|
@@ -18,7 +18,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
|
|
|
18
18
|
|
|
19
19
|
This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
|
|
20
20
|
|
|
21
|
-
> **🚀 Version 4.7.
|
|
21
|
+
> **🚀 Version 4.7.3 Release**
|
|
22
22
|
> This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
|
|
23
23
|
>
|
|
24
24
|
> **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
|
|
@@ -593,11 +593,21 @@ impl PdfExtractor {
|
|
|
593
593
|
);
|
|
594
594
|
(native_text, false)
|
|
595
595
|
} else if decision.fallback || has_font_encoding_issues {
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
596
|
+
match run_ocr_with_layout(content, config, path).await {
|
|
597
|
+
Ok((ocr_text, ocr_tbls, ocr_elems, ocr_doc)) => {
|
|
598
|
+
ocr_tables = ocr_tbls;
|
|
599
|
+
_ocr_elements_from_ocr = ocr_elems;
|
|
600
|
+
ocr_internal_doc = ocr_doc;
|
|
601
|
+
(ocr_text, true)
|
|
602
|
+
}
|
|
603
|
+
Err(e) => {
|
|
604
|
+
tracing::warn!(
|
|
605
|
+
error = %e,
|
|
606
|
+
"OCR fallback failed; using native text extraction result"
|
|
607
|
+
);
|
|
608
|
+
(native_text, false)
|
|
609
|
+
}
|
|
610
|
+
}
|
|
601
611
|
} else {
|
|
602
612
|
(native_text, false)
|
|
603
613
|
}
|
|
@@ -197,13 +197,20 @@ impl OcrBackend for TesseractBackend {
|
|
|
197
197
|
let processor = Arc::clone(&self.processor);
|
|
198
198
|
let image_bytes = image_bytes.to_vec();
|
|
199
199
|
|
|
200
|
-
let ocr_result = tokio::task::spawn_blocking(move ||
|
|
201
|
-
|
|
202
|
-
|
|
200
|
+
let ocr_result = tokio::task::spawn_blocking(move || {
|
|
201
|
+
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| match output_format {
|
|
202
|
+
Some(fmt) => processor.process_image_with_format(&image_bytes, &tess_config_clone, fmt),
|
|
203
|
+
None => processor.process_image(&image_bytes, &tess_config_clone),
|
|
204
|
+
}))
|
|
205
|
+
.unwrap_or_else(|_| {
|
|
206
|
+
Err(crate::ocr::error::OcrError::ProcessingFailed(
|
|
207
|
+
"Tesseract/Leptonica foreign exception caught".to_string(),
|
|
208
|
+
))
|
|
209
|
+
})
|
|
203
210
|
})
|
|
204
211
|
.await
|
|
205
212
|
.map_err(|e| crate::KreuzbergError::Plugin {
|
|
206
|
-
message: format!("Tesseract task panicked: {}", e),
|
|
213
|
+
message: format!("Tesseract task panicked or caught foreign exception: {}", e),
|
|
207
214
|
plugin_name: "tesseract".to_string(),
|
|
208
215
|
})?
|
|
209
216
|
.map_err(|e| crate::KreuzbergError::Ocr {
|
|
@@ -302,13 +309,20 @@ impl OcrBackend for TesseractBackend {
|
|
|
302
309
|
let processor = Arc::clone(&self.processor);
|
|
303
310
|
let path_str = path.to_string_lossy().to_string();
|
|
304
311
|
|
|
305
|
-
let ocr_result = tokio::task::spawn_blocking(move ||
|
|
306
|
-
|
|
307
|
-
|
|
312
|
+
let ocr_result = tokio::task::spawn_blocking(move || {
|
|
313
|
+
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| match output_format {
|
|
314
|
+
Some(fmt) => processor.process_image_file_with_format(&path_str, &tess_config_clone, fmt),
|
|
315
|
+
None => processor.process_image_file(&path_str, &tess_config_clone),
|
|
316
|
+
}))
|
|
317
|
+
.unwrap_or_else(|_| {
|
|
318
|
+
Err(crate::ocr::error::OcrError::ProcessingFailed(
|
|
319
|
+
"Tesseract/Leptonica foreign exception caught".to_string(),
|
|
320
|
+
))
|
|
321
|
+
})
|
|
308
322
|
})
|
|
309
323
|
.await
|
|
310
324
|
.map_err(|e| crate::KreuzbergError::Plugin {
|
|
311
|
-
message: format!("Tesseract task panicked: {}", e),
|
|
325
|
+
message: format!("Tesseract task panicked or caught foreign exception: {}", e),
|
|
312
326
|
plugin_name: "tesseract".to_string(),
|
|
313
327
|
})?
|
|
314
328
|
.map_err(|e| crate::KreuzbergError::Ocr {
|
|
@@ -83,6 +83,7 @@ fn map_content_role(role: &ContentRole) -> (SemanticRole, Option<String>) {
|
|
|
83
83
|
/// The resulting paragraphs feed into `apply_layout_overrides` and
|
|
84
84
|
/// `assemble_internal_document`, matching the pdfium native text pipeline.
|
|
85
85
|
#[cfg(feature = "ocr")]
|
|
86
|
+
#[allow(dead_code)] // Called from extractors/pdf/ocr.rs only when layout-detection is also enabled
|
|
86
87
|
pub(crate) fn ocr_doc_to_paragraphs(
|
|
87
88
|
doc: &crate::types::internal::InternalDocument,
|
|
88
89
|
page_height_px: u32,
|
|
@@ -211,6 +211,15 @@ fn finalize_paragraph(
|
|
|
211
211
|
|
|
212
212
|
// Join line texts with newlines (preserving full_text content exactly).
|
|
213
213
|
let text: String = lines.iter().map(|l| l.text.as_str()).collect::<Vec<_>>().join("\n");
|
|
214
|
+
|
|
215
|
+
// Convert embedded HTML to markdown if detected (e.g., PDFs with HTML in text layer).
|
|
216
|
+
#[cfg(feature = "html")]
|
|
217
|
+
let text = if crate::pdf::text::contains_html_markup(&text) {
|
|
218
|
+
crate::pdf::text::convert_html_page_text(&text)
|
|
219
|
+
} else {
|
|
220
|
+
text
|
|
221
|
+
};
|
|
222
|
+
|
|
214
223
|
let trimmed = text.trim();
|
|
215
224
|
if trimmed.is_empty() {
|
|
216
225
|
return None;
|
|
@@ -546,6 +555,13 @@ struct CharFontInfo {
|
|
|
546
555
|
fn extract_page_blocks(page: &PdfPage) -> Option<(Vec<SegmentData>, String, Vec<f32>)> {
|
|
547
556
|
let text_api = page.text().ok()?;
|
|
548
557
|
let full_text = text_api.all();
|
|
558
|
+
// Convert embedded HTML to markdown if detected (PDFs with HTML in text layer).
|
|
559
|
+
#[cfg(feature = "html")]
|
|
560
|
+
let full_text = if crate::pdf::text::contains_html_markup(&full_text) {
|
|
561
|
+
crate::pdf::text::convert_html_page_text(&full_text)
|
|
562
|
+
} else {
|
|
563
|
+
full_text
|
|
564
|
+
};
|
|
549
565
|
if full_text.trim().is_empty() {
|
|
550
566
|
return None;
|
|
551
567
|
}
|
|
@@ -53,7 +53,7 @@ fn fix_pdf_control_chars(text: &str) -> Cow<'_, str> {
|
|
|
53
53
|
/// Some PDFs embed raw HTML in their text layer (e.g. from web-to-PDF converters).
|
|
54
54
|
/// This function detects common HTML tags to determine if the text should be
|
|
55
55
|
/// converted from HTML to markdown rather than used as-is.
|
|
56
|
-
fn contains_html_markup(text: &str) -> bool {
|
|
56
|
+
pub(crate) fn contains_html_markup(text: &str) -> bool {
|
|
57
57
|
if !text.contains('<') {
|
|
58
58
|
return false;
|
|
59
59
|
}
|
|
@@ -72,7 +72,7 @@ fn contains_html_markup(text: &str) -> bool {
|
|
|
72
72
|
/// Falls back to the original text if the `html` feature is not enabled
|
|
73
73
|
/// or if conversion fails.
|
|
74
74
|
#[cfg(feature = "html")]
|
|
75
|
-
fn convert_html_page_text(text: &str) -> String {
|
|
75
|
+
pub(crate) fn convert_html_page_text(text: &str) -> String {
|
|
76
76
|
match crate::extraction::html::convert_html_to_markdown(text, None, None) {
|
|
77
77
|
Ok(converted) => converted,
|
|
78
78
|
Err(_) => text.to_owned(),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg-ffi"
|
|
3
|
-
version = "4.7.
|
|
3
|
+
version = "4.7.3"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
|
|
@@ -41,7 +41,7 @@ serde_json = { version = "1.0.149" }
|
|
|
41
41
|
tokio = { version = "1.51.0", features = ["rt", "rt-multi-thread", "macros", "sync", "process", "fs", "time", "io-util"] }
|
|
42
42
|
|
|
43
43
|
[target.'cfg(all(windows, target_env = "gnu"))'.dependencies]
|
|
44
|
-
kreuzberg = { path = "../kreuzberg", version = "4.7.
|
|
44
|
+
kreuzberg = { path = "../kreuzberg", version = "4.7.3", default-features = false, features = [
|
|
45
45
|
"pdf",
|
|
46
46
|
"excel",
|
|
47
47
|
"office",
|
|
@@ -64,7 +64,7 @@ kreuzberg = { path = "../kreuzberg", version = "4.7.2", default-features = false
|
|
|
64
64
|
] }
|
|
65
65
|
|
|
66
66
|
[target.'cfg(not(all(windows, target_env = "gnu")))'.dependencies]
|
|
67
|
-
kreuzberg = { path = "../kreuzberg", version = "4.7.
|
|
67
|
+
kreuzberg = { path = "../kreuzberg", version = "4.7.3", default-features = false, features = ["bundled-pdfium", "full"] }
|
|
68
68
|
|
|
69
69
|
[build-dependencies]
|
|
70
70
|
cbindgen = "0.29"
|
|
@@ -2160,7 +2160,7 @@ impl Clone for TesseractAPI {
|
|
|
2160
2160
|
}
|
|
2161
2161
|
|
|
2162
2162
|
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
|
|
2163
|
-
unsafe extern "C" {
|
|
2163
|
+
unsafe extern "C-unwind" {
|
|
2164
2164
|
fn TessBaseAPIMeanTextConf(handle: *mut c_void) -> c_int;
|
|
2165
2165
|
fn TessBaseAPISetVariable(handle: *mut c_void, name: *const c_char, value: *const c_char) -> c_int;
|
|
2166
2166
|
fn TessBaseAPIGetStringVariable(handle: *mut c_void, name: *const c_char) -> *const c_char;
|
|
@@ -69,7 +69,7 @@ impl Drop for ChoiceIterator {
|
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
-
unsafe extern "C" {
|
|
72
|
+
unsafe extern "C-unwind" {
|
|
73
73
|
fn TessChoiceIteratorDelete(handle: *mut c_void);
|
|
74
74
|
fn TessChoiceIteratorNext(handle: *mut c_void) -> c_int;
|
|
75
75
|
fn TessChoiceIteratorGetUTF8Text(handle: *mut c_void) -> *mut c_char;
|
|
@@ -29,7 +29,7 @@ use std::ffi::c_void;
|
|
|
29
29
|
// ---------------------------------------------------------------------------
|
|
30
30
|
|
|
31
31
|
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
|
|
32
|
-
unsafe extern "C" {
|
|
32
|
+
unsafe extern "C-unwind" {
|
|
33
33
|
/// Allocates a new Pix with the given dimensions and bit depth.
|
|
34
34
|
fn pixCreate(width: i32, height: i32, depth: i32) -> *mut c_void;
|
|
35
35
|
|
|
@@ -60,7 +60,7 @@ impl Drop for TessMonitor {
|
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
unsafe extern "C" {
|
|
63
|
+
unsafe extern "C-unwind" {
|
|
64
64
|
pub fn TessMonitorCreate() -> *mut c_void;
|
|
65
65
|
pub fn TessMonitorDelete(monitor: *mut c_void);
|
|
66
66
|
pub fn TessMonitorSetDeadlineMSecs(monitor: *mut c_void, deadline: c_int);
|
|
@@ -380,7 +380,7 @@ impl Drop for PageIterator {
|
|
|
380
380
|
}
|
|
381
381
|
}
|
|
382
382
|
|
|
383
|
-
unsafe extern "C" {
|
|
383
|
+
unsafe extern "C-unwind" {
|
|
384
384
|
pub fn TessPageIteratorDelete(handle: *mut c_void);
|
|
385
385
|
pub fn TessPageIteratorBegin(handle: *mut c_void);
|
|
386
386
|
pub fn TessPageIteratorNext(handle: *mut c_void, level: c_int) -> c_int;
|
|
@@ -555,7 +555,7 @@ impl Drop for ResultIterator {
|
|
|
555
555
|
}
|
|
556
556
|
|
|
557
557
|
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
|
|
558
|
-
unsafe extern "C" {
|
|
558
|
+
unsafe extern "C-unwind" {
|
|
559
559
|
pub fn TessResultIteratorDelete(handle: *mut c_void);
|
|
560
560
|
pub fn TessPageIteratorBegin(handle: *mut c_void);
|
|
561
561
|
pub fn TessResultIteratorGetUTF8Text(handle: *mut c_void, level: c_int) -> *mut c_char;
|
|
@@ -198,7 +198,7 @@ impl Drop for TessResultRenderer {
|
|
|
198
198
|
}
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
-
unsafe extern "C" {
|
|
201
|
+
unsafe extern "C-unwind" {
|
|
202
202
|
pub fn TessTextRendererCreate(outputbase: *const c_char) -> *mut c_void;
|
|
203
203
|
pub fn TessHOcrRendererCreate(outputbase: *const c_char) -> *mut c_void;
|
|
204
204
|
pub fn TessPDFRendererCreate(outputbase: *const c_char, datadir: *const c_char, textonly: c_int) -> *mut c_void;
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.7.
|
|
4
|
+
version: 4.7.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|