kreuzberg 4.0.0.pre.rc.6 → 4.0.0.pre.rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/README.md +15 -9
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +511 -325
- data/ext/kreuzberg_rb/native/Cargo.toml +13 -3
- data/ext/kreuzberg_rb/native/src/lib.rs +139 -2
- data/kreuzberg.gemspec +38 -4
- data/lib/kreuzberg/config.rb +34 -1
- data/lib/kreuzberg/result.rb +77 -14
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +23 -6
- data/vendor/kreuzberg/Cargo.toml +32 -11
- data/vendor/kreuzberg/README.md +54 -8
- data/vendor/kreuzberg/build.rs +549 -132
- data/vendor/kreuzberg/src/chunking/mod.rs +1279 -79
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -0
- data/vendor/kreuzberg/src/core/config.rs +49 -1
- data/vendor/kreuzberg/src/core/extractor.rs +134 -2
- data/vendor/kreuzberg/src/core/mod.rs +4 -2
- data/vendor/kreuzberg/src/core/pipeline.rs +188 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +358 -0
- data/vendor/kreuzberg/src/extraction/html.rs +24 -8
- data/vendor/kreuzberg/src/extraction/image.rs +124 -1
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -2
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -3
- data/vendor/kreuzberg/src/extraction/pptx.rs +187 -87
- data/vendor/kreuzberg/src/extractors/archive.rs +1 -0
- data/vendor/kreuzberg/src/extractors/bibtex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +50 -17
- data/vendor/kreuzberg/src/extractors/email.rs +29 -15
- data/vendor/kreuzberg/src/extractors/epub.rs +1 -0
- data/vendor/kreuzberg/src/extractors/excel.rs +2 -0
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +1 -0
- data/vendor/kreuzberg/src/extractors/html.rs +29 -15
- data/vendor/kreuzberg/src/extractors/image.rs +25 -4
- data/vendor/kreuzberg/src/extractors/jats.rs +3 -0
- data/vendor/kreuzberg/src/extractors/jupyter.rs +1 -0
- data/vendor/kreuzberg/src/extractors/latex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/markdown.rs +1 -0
- data/vendor/kreuzberg/src/extractors/mod.rs +78 -14
- data/vendor/kreuzberg/src/extractors/odt.rs +3 -3
- data/vendor/kreuzberg/src/extractors/opml.rs +1 -0
- data/vendor/kreuzberg/src/extractors/orgmode.rs +1 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +197 -17
- data/vendor/kreuzberg/src/extractors/pptx.rs +32 -13
- data/vendor/kreuzberg/src/extractors/rst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/rtf.rs +3 -4
- data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
- data/vendor/kreuzberg/src/extractors/text.rs +7 -2
- data/vendor/kreuzberg/src/extractors/typst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/xml.rs +27 -15
- data/vendor/kreuzberg/src/keywords/processor.rs +9 -1
- data/vendor/kreuzberg/src/language_detection/mod.rs +43 -0
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -0
- data/vendor/kreuzberg/src/lib.rs +10 -2
- data/vendor/kreuzberg/src/mcp/mod.rs +3 -0
- data/vendor/kreuzberg/src/mcp/server.rs +120 -12
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +2 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +328 -0
- data/vendor/kreuzberg/src/pdf/error.rs +8 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +238 -95
- data/vendor/kreuzberg/src/pdf/mod.rs +18 -2
- data/vendor/kreuzberg/src/pdf/rendering.rs +1 -2
- data/vendor/kreuzberg/src/pdf/table.rs +26 -2
- data/vendor/kreuzberg/src/pdf/text.rs +89 -7
- data/vendor/kreuzberg/src/plugins/extractor.rs +34 -3
- data/vendor/kreuzberg/src/plugins/mod.rs +3 -0
- data/vendor/kreuzberg/src/plugins/ocr.rs +22 -3
- data/vendor/kreuzberg/src/plugins/processor.rs +8 -0
- data/vendor/kreuzberg/src/plugins/registry.rs +2 -0
- data/vendor/kreuzberg/src/plugins/validator.rs +11 -0
- data/vendor/kreuzberg/src/text/mod.rs +6 -0
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -0
- data/vendor/kreuzberg/src/types.rs +173 -21
- data/vendor/kreuzberg/tests/archive_integration.rs +2 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +5 -3
- data/vendor/kreuzberg/tests/concurrency_stress.rs +14 -6
- data/vendor/kreuzberg/tests/config_features.rs +15 -1
- data/vendor/kreuzberg/tests/config_loading_tests.rs +1 -0
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/email_integration.rs +2 -0
- data/vendor/kreuzberg/tests/error_handling.rs +43 -34
- data/vendor/kreuzberg/tests/format_integration.rs +2 -0
- data/vendor/kreuzberg/tests/image_integration.rs +2 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +17 -16
- data/vendor/kreuzberg/tests/ocr_configuration.rs +4 -0
- data/vendor/kreuzberg/tests/ocr_errors.rs +22 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +2 -0
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -21
- data/vendor/kreuzberg/tests/pdf_integration.rs +2 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +25 -0
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +5 -0
- data/vendor/kreuzberg/tests/plugin_system.rs +6 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +1 -0
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -1
- data/vendor/kreuzberg/tests/security_validation.rs +1 -0
- data/vendor/kreuzberg/tests/test_fastembed.rs +45 -23
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1 -0
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +3 -2
- data/vendor/rb-sys/.cargo_vcs_info.json +2 -2
- data/vendor/rb-sys/Cargo.lock +15 -15
- data/vendor/rb-sys/Cargo.toml +4 -4
- data/vendor/rb-sys/Cargo.toml.orig +4 -4
- data/vendor/rb-sys/build/features.rs +5 -2
- data/vendor/rb-sys/build/main.rs +55 -15
- data/vendor/rb-sys/build/stable_api_config.rs +4 -2
- data/vendor/rb-sys/build/version.rs +3 -1
- data/vendor/rb-sys/src/lib.rs +1 -0
- data/vendor/rb-sys/src/macros.rs +2 -2
- data/vendor/rb-sys/src/special_consts.rs +1 -1
- data/vendor/rb-sys/src/stable_api/compiled.rs +1 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +19 -6
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +17 -5
- data/vendor/rb-sys/src/stable_api.rs +0 -1
- data/vendor/rb-sys/src/tracking_allocator.rs +1 -3
- metadata +13 -10
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
2
|
use crate::{
|
|
3
|
-
|
|
3
|
+
debug_ruby_assert_type,
|
|
4
4
|
internal::{RArray, RString, RTypedData},
|
|
5
5
|
ruby_value_type::RUBY_T_DATA,
|
|
6
|
-
value_type,
|
|
6
|
+
value_type, VALUE,
|
|
7
7
|
};
|
|
8
8
|
use std::{
|
|
9
9
|
ffi::c_void,
|
|
@@ -300,7 +300,11 @@ impl StableApiDefinition for Definition {
|
|
|
300
300
|
|
|
301
301
|
#[inline]
|
|
302
302
|
unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
|
|
303
|
-
debug_ruby_assert_type!(
|
|
303
|
+
debug_ruby_assert_type!(
|
|
304
|
+
obj,
|
|
305
|
+
RUBY_T_DATA,
|
|
306
|
+
"rtypeddata_type called on non-T_DATA object"
|
|
307
|
+
);
|
|
304
308
|
|
|
305
309
|
let rdata = obj as *const RTypedData;
|
|
306
310
|
(*rdata).type_
|
|
@@ -308,7 +312,11 @@ impl StableApiDefinition for Definition {
|
|
|
308
312
|
|
|
309
313
|
#[inline]
|
|
310
314
|
unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
|
|
311
|
-
debug_ruby_assert_type!(
|
|
315
|
+
debug_ruby_assert_type!(
|
|
316
|
+
obj,
|
|
317
|
+
RUBY_T_DATA,
|
|
318
|
+
"rtypeddata_get_data called on non-T_DATA object"
|
|
319
|
+
);
|
|
312
320
|
|
|
313
321
|
// For Ruby 3.1 and lower, simply return the data field
|
|
314
322
|
let rdata = obj as *const RTypedData;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
2
|
use crate::{
|
|
3
|
-
|
|
3
|
+
debug_ruby_assert_type,
|
|
4
4
|
internal::{RArray, RString, RTypedData},
|
|
5
5
|
ruby_value_type::RUBY_T_DATA,
|
|
6
|
-
value_type,
|
|
6
|
+
value_type, VALUE,
|
|
7
7
|
};
|
|
8
8
|
use std::{
|
|
9
9
|
ffi::c_void,
|
|
@@ -298,7 +298,11 @@ impl StableApiDefinition for Definition {
|
|
|
298
298
|
|
|
299
299
|
#[inline]
|
|
300
300
|
unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
|
|
301
|
-
debug_ruby_assert_type!(
|
|
301
|
+
debug_ruby_assert_type!(
|
|
302
|
+
obj,
|
|
303
|
+
RUBY_T_DATA,
|
|
304
|
+
"rtypeddata_type called on non-T_DATA object"
|
|
305
|
+
);
|
|
302
306
|
|
|
303
307
|
let rdata = obj as *const RTypedData;
|
|
304
308
|
(*rdata).type_
|
|
@@ -306,7 +310,11 @@ impl StableApiDefinition for Definition {
|
|
|
306
310
|
|
|
307
311
|
#[inline]
|
|
308
312
|
unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
|
|
309
|
-
debug_ruby_assert_type!(
|
|
313
|
+
debug_ruby_assert_type!(
|
|
314
|
+
obj,
|
|
315
|
+
RUBY_T_DATA,
|
|
316
|
+
"rtypeddata_get_data called on non-T_DATA object"
|
|
317
|
+
);
|
|
310
318
|
|
|
311
319
|
// For Ruby 3.2 and lower, simply return the data field
|
|
312
320
|
let rdata = obj as *const RTypedData;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
2
|
use crate::{
|
|
3
|
-
|
|
3
|
+
debug_ruby_assert_type,
|
|
4
4
|
internal::{RArray, RString},
|
|
5
5
|
ruby_value_type::RUBY_T_DATA,
|
|
6
|
-
value_type,
|
|
6
|
+
value_type, VALUE,
|
|
7
7
|
};
|
|
8
8
|
use std::{
|
|
9
9
|
os::raw::{c_char, c_long},
|
|
@@ -284,7 +284,11 @@ impl StableApiDefinition for Definition {
|
|
|
284
284
|
|
|
285
285
|
#[inline]
|
|
286
286
|
unsafe fn rtypeddata_embedded_p(&self, obj: VALUE) -> bool {
|
|
287
|
-
debug_ruby_assert_type!(
|
|
287
|
+
debug_ruby_assert_type!(
|
|
288
|
+
obj,
|
|
289
|
+
RUBY_T_DATA,
|
|
290
|
+
"rtypeddata_embedded_p called on non-T_DATA object"
|
|
291
|
+
);
|
|
288
292
|
|
|
289
293
|
let rdata = obj as *const crate::internal::RTypedData;
|
|
290
294
|
let typed_flag = (*rdata).typed_flag;
|
|
@@ -298,7 +302,11 @@ impl StableApiDefinition for Definition {
|
|
|
298
302
|
|
|
299
303
|
#[inline]
|
|
300
304
|
unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
|
|
301
|
-
debug_ruby_assert_type!(
|
|
305
|
+
debug_ruby_assert_type!(
|
|
306
|
+
obj,
|
|
307
|
+
RUBY_T_DATA,
|
|
308
|
+
"rtypeddata_type called on non-T_DATA object"
|
|
309
|
+
);
|
|
302
310
|
|
|
303
311
|
let rdata = obj as *const crate::internal::RTypedData;
|
|
304
312
|
(*rdata).type_
|
|
@@ -306,14 +314,19 @@ impl StableApiDefinition for Definition {
|
|
|
306
314
|
|
|
307
315
|
#[inline]
|
|
308
316
|
unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut std::ffi::c_void {
|
|
309
|
-
debug_ruby_assert_type!(
|
|
317
|
+
debug_ruby_assert_type!(
|
|
318
|
+
obj,
|
|
319
|
+
RUBY_T_DATA,
|
|
320
|
+
"rtypeddata_get_data called on non-T_DATA object"
|
|
321
|
+
);
|
|
310
322
|
|
|
311
323
|
if self.rtypeddata_embedded_p(obj) {
|
|
312
324
|
// For embedded data, calculate pointer based on struct layout
|
|
313
325
|
// The formula matches Ruby's implementation:
|
|
314
326
|
// embedded_typed_data_size = sizeof(RTypedData) - sizeof(void *)
|
|
315
327
|
const EMBEDDED_TYPED_DATA_SIZE: usize =
|
|
316
|
-
std::mem::size_of::<crate::internal::RTypedData>()
|
|
328
|
+
std::mem::size_of::<crate::internal::RTypedData>()
|
|
329
|
+
- std::mem::size_of::<*mut std::ffi::c_void>();
|
|
317
330
|
|
|
318
331
|
// Return address after the header as the data pointer
|
|
319
332
|
(obj as *mut u8).add(EMBEDDED_TYPED_DATA_SIZE) as *mut std::ffi::c_void
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
2
|
use crate::{
|
|
3
|
-
|
|
3
|
+
debug_ruby_assert_type,
|
|
4
4
|
internal::{RArray, RString, RTypedData},
|
|
5
5
|
ruby_value_type::RUBY_T_DATA,
|
|
6
|
-
value_type,
|
|
6
|
+
value_type, VALUE,
|
|
7
7
|
};
|
|
8
8
|
use std::{
|
|
9
9
|
ffi::c_void,
|
|
@@ -285,7 +285,11 @@ impl StableApiDefinition for Definition {
|
|
|
285
285
|
|
|
286
286
|
#[inline]
|
|
287
287
|
unsafe fn rtypeddata_embedded_p(&self, obj: VALUE) -> bool {
|
|
288
|
-
debug_ruby_assert_type!(
|
|
288
|
+
debug_ruby_assert_type!(
|
|
289
|
+
obj,
|
|
290
|
+
RUBY_T_DATA,
|
|
291
|
+
"rtypeddata_embedded_p called on non-T_DATA object"
|
|
292
|
+
);
|
|
289
293
|
|
|
290
294
|
let rdata = obj as *const RTypedData;
|
|
291
295
|
let typed_flag = (*rdata).typed_flag;
|
|
@@ -299,7 +303,11 @@ impl StableApiDefinition for Definition {
|
|
|
299
303
|
|
|
300
304
|
#[inline]
|
|
301
305
|
unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
|
|
302
|
-
debug_ruby_assert_type!(
|
|
306
|
+
debug_ruby_assert_type!(
|
|
307
|
+
obj,
|
|
308
|
+
RUBY_T_DATA,
|
|
309
|
+
"rtypeddata_type called on non-T_DATA object"
|
|
310
|
+
);
|
|
303
311
|
|
|
304
312
|
let rdata = obj as *const RTypedData;
|
|
305
313
|
(*rdata).type_
|
|
@@ -307,7 +315,11 @@ impl StableApiDefinition for Definition {
|
|
|
307
315
|
|
|
308
316
|
#[inline]
|
|
309
317
|
unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
|
|
310
|
-
debug_ruby_assert_type!(
|
|
318
|
+
debug_ruby_assert_type!(
|
|
319
|
+
obj,
|
|
320
|
+
RUBY_T_DATA,
|
|
321
|
+
"rtypeddata_get_data called on non-T_DATA object"
|
|
322
|
+
);
|
|
311
323
|
|
|
312
324
|
if self.rtypeddata_embedded_p(obj) {
|
|
313
325
|
// For embedded data, calculate pointer based on struct layout
|
|
@@ -226,7 +226,6 @@ mod compiled;
|
|
|
226
226
|
use compiled as api;
|
|
227
227
|
|
|
228
228
|
#[cfg(stable_api_include_rust_impl)]
|
|
229
|
-
#[cfg_attr(ruby_eq_2_6, path = "stable_api/ruby_2_6.rs")]
|
|
230
229
|
#[cfg_attr(ruby_eq_2_7, path = "stable_api/ruby_2_7.rs")]
|
|
231
230
|
#[cfg_attr(ruby_eq_3_0, path = "stable_api/ruby_3_0.rs")]
|
|
232
231
|
#[cfg_attr(ruby_eq_3_1, path = "stable_api/ruby_3_1.rs")]
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
use std::{
|
|
4
4
|
fmt::Formatter,
|
|
5
5
|
sync::{
|
|
6
|
-
Arc,
|
|
7
6
|
atomic::{AtomicIsize, Ordering},
|
|
7
|
+
Arc,
|
|
8
8
|
},
|
|
9
9
|
};
|
|
10
10
|
|
|
@@ -59,13 +59,11 @@ mod mri {
|
|
|
59
59
|
|
|
60
60
|
unsafe {
|
|
61
61
|
if is_ruby_vm_started() {
|
|
62
|
-
// On Windows, ssize_t is i32 even on 64-bit, so cast i64 to i32
|
|
63
62
|
#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
|
|
64
63
|
rb_gc_adjust_memory_usage(delta as i32);
|
|
65
64
|
|
|
66
65
|
#[cfg(not(all(target_pointer_width = "64", target_os = "windows")))]
|
|
67
66
|
rb_gc_adjust_memory_usage(delta);
|
|
68
|
-
|
|
69
67
|
delta as isize
|
|
70
68
|
} else {
|
|
71
69
|
0
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.0.0.pre.rc.
|
|
4
|
+
version: 4.0.0.pre.rc.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-12-
|
|
11
|
+
date: 2025-12-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -186,6 +186,7 @@ files:
|
|
|
186
186
|
- Steepfile
|
|
187
187
|
- examples/async_patterns.rb
|
|
188
188
|
- ext/kreuzberg_rb/extconf.rb
|
|
189
|
+
- ext/kreuzberg_rb/native/.cargo/config.toml
|
|
189
190
|
- ext/kreuzberg_rb/native/Cargo.lock
|
|
190
191
|
- ext/kreuzberg_rb/native/Cargo.toml
|
|
191
192
|
- ext/kreuzberg_rb/native/README.md
|
|
@@ -241,6 +242,7 @@ files:
|
|
|
241
242
|
- vendor/kreuzberg/src/api/types.rs
|
|
242
243
|
- vendor/kreuzberg/src/cache/mod.rs
|
|
243
244
|
- vendor/kreuzberg/src/chunking/mod.rs
|
|
245
|
+
- vendor/kreuzberg/src/chunking/processor.rs
|
|
244
246
|
- vendor/kreuzberg/src/core/batch_mode.rs
|
|
245
247
|
- vendor/kreuzberg/src/core/config.rs
|
|
246
248
|
- vendor/kreuzberg/src/core/extractor.rs
|
|
@@ -277,7 +279,6 @@ files:
|
|
|
277
279
|
- vendor/kreuzberg/src/extractors/epub.rs
|
|
278
280
|
- vendor/kreuzberg/src/extractors/excel.rs
|
|
279
281
|
- vendor/kreuzberg/src/extractors/fictionbook.rs
|
|
280
|
-
- vendor/kreuzberg/src/extractors/fictionbook.rs.backup2
|
|
281
282
|
- vendor/kreuzberg/src/extractors/html.rs
|
|
282
283
|
- vendor/kreuzberg/src/extractors/image.rs
|
|
283
284
|
- vendor/kreuzberg/src/extractors/jats.rs
|
|
@@ -309,6 +310,7 @@ files:
|
|
|
309
310
|
- vendor/kreuzberg/src/keywords/types.rs
|
|
310
311
|
- vendor/kreuzberg/src/keywords/yake.rs
|
|
311
312
|
- vendor/kreuzberg/src/language_detection/mod.rs
|
|
313
|
+
- vendor/kreuzberg/src/language_detection/processor.rs
|
|
312
314
|
- vendor/kreuzberg/src/lib.rs
|
|
313
315
|
- vendor/kreuzberg/src/mcp/mod.rs
|
|
314
316
|
- vendor/kreuzberg/src/mcp/server.rs
|
|
@@ -324,6 +326,7 @@ files:
|
|
|
324
326
|
- vendor/kreuzberg/src/ocr/utils.rs
|
|
325
327
|
- vendor/kreuzberg/src/ocr/validation.rs
|
|
326
328
|
- vendor/kreuzberg/src/panic_context.rs
|
|
329
|
+
- vendor/kreuzberg/src/pdf/bundled.rs
|
|
327
330
|
- vendor/kreuzberg/src/pdf/error.rs
|
|
328
331
|
- vendor/kreuzberg/src/pdf/images.rs
|
|
329
332
|
- vendor/kreuzberg/src/pdf/metadata.rs
|
|
@@ -341,6 +344,7 @@ files:
|
|
|
341
344
|
- vendor/kreuzberg/src/stopwords/mod.rs
|
|
342
345
|
- vendor/kreuzberg/src/text/mod.rs
|
|
343
346
|
- vendor/kreuzberg/src/text/quality.rs
|
|
347
|
+
- vendor/kreuzberg/src/text/quality_processor.rs
|
|
344
348
|
- vendor/kreuzberg/src/text/string_utils.rs
|
|
345
349
|
- vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs
|
|
346
350
|
- vendor/kreuzberg/src/text/token_reduction/config.rs
|
|
@@ -455,6 +459,7 @@ files:
|
|
|
455
459
|
- vendor/kreuzberg/tests/opml_extractor_tests.rs
|
|
456
460
|
- vendor/kreuzberg/tests/orgmode_extractor_tests.rs
|
|
457
461
|
- vendor/kreuzberg/tests/pdf_integration.rs
|
|
462
|
+
- vendor/kreuzberg/tests/pdfium_linking.rs
|
|
458
463
|
- vendor/kreuzberg/tests/pipeline_integration.rs
|
|
459
464
|
- vendor/kreuzberg/tests/plugin_ocr_backend_test.rs
|
|
460
465
|
- vendor/kreuzberg/tests/plugin_postprocessor_test.rs
|
|
@@ -469,7 +474,6 @@ files:
|
|
|
469
474
|
- vendor/kreuzberg/tests/typst_behavioral_tests.rs
|
|
470
475
|
- vendor/kreuzberg/tests/typst_extractor_tests.rs
|
|
471
476
|
- vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs
|
|
472
|
-
- vendor/rb-sys/.cargo-ok
|
|
473
477
|
- vendor/rb-sys/.cargo_vcs_info.json
|
|
474
478
|
- vendor/rb-sys/Cargo.lock
|
|
475
479
|
- vendor/rb-sys/Cargo.toml
|
|
@@ -492,7 +496,6 @@ files:
|
|
|
492
496
|
- vendor/rb-sys/src/stable_api.rs
|
|
493
497
|
- vendor/rb-sys/src/stable_api/compiled.c
|
|
494
498
|
- vendor/rb-sys/src/stable_api/compiled.rs
|
|
495
|
-
- vendor/rb-sys/src/stable_api/ruby_2_6.rs
|
|
496
499
|
- vendor/rb-sys/src/stable_api/ruby_2_7.rs
|
|
497
500
|
- vendor/rb-sys/src/stable_api/ruby_3_0.rs
|
|
498
501
|
- vendor/rb-sys/src/stable_api/ruby_3_1.rs
|
|
@@ -507,13 +510,13 @@ homepage: https://github.com/kreuzberg-dev/kreuzberg
|
|
|
507
510
|
licenses:
|
|
508
511
|
- MIT
|
|
509
512
|
metadata:
|
|
510
|
-
|
|
513
|
+
homepage_uri: https://github.com/kreuzberg-dev/kreuzberg
|
|
514
|
+
source_code_uri: https://github.com/kreuzberg-dev/kreuzberg
|
|
511
515
|
changelog_uri: https://github.com/kreuzberg-dev/kreuzberg/blob/main/CHANGELOG.md
|
|
512
516
|
documentation_uri: https://docs.kreuzberg.dev
|
|
513
|
-
|
|
514
|
-
keywords: document-intelligence,document-extraction,ocr,rust,bindings
|
|
517
|
+
bug_tracker_uri: https://github.com/kreuzberg-dev/kreuzberg/issues
|
|
515
518
|
rubygems_mfa_required: 'true'
|
|
516
|
-
|
|
519
|
+
keywords: document-intelligence,document-extraction,ocr,rust,bindings
|
|
517
520
|
post_install_message:
|
|
518
521
|
rdoc_options: []
|
|
519
522
|
require_paths:
|
|
@@ -529,7 +532,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
529
532
|
- !ruby/object:Gem::Version
|
|
530
533
|
version: '0'
|
|
531
534
|
requirements: []
|
|
532
|
-
rubygems_version: 3.
|
|
535
|
+
rubygems_version: 3.5.22
|
|
533
536
|
signing_key:
|
|
534
537
|
specification_version: 4
|
|
535
538
|
summary: High-performance document intelligence framework
|