kreuzberg 4.0.0.pre.rc.6 → 4.0.0.pre.rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +5 -3
  3. data/README.md +15 -9
  4. data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -0
  5. data/ext/kreuzberg_rb/native/Cargo.lock +516 -324
  6. data/ext/kreuzberg_rb/native/Cargo.toml +13 -3
  7. data/ext/kreuzberg_rb/native/src/lib.rs +139 -2
  8. data/kreuzberg.gemspec +38 -4
  9. data/lib/kreuzberg/config.rb +34 -1
  10. data/lib/kreuzberg/result.rb +77 -14
  11. data/lib/kreuzberg/version.rb +1 -1
  12. data/sig/kreuzberg.rbs +23 -6
  13. data/vendor/kreuzberg/Cargo.toml +25 -11
  14. data/vendor/kreuzberg/README.md +13 -8
  15. data/vendor/kreuzberg/build.rs +17 -6
  16. data/vendor/kreuzberg/src/api/mod.rs +2 -0
  17. data/vendor/kreuzberg/src/chunking/mod.rs +1279 -79
  18. data/vendor/kreuzberg/src/chunking/processor.rs +220 -0
  19. data/vendor/kreuzberg/src/core/config.rs +49 -1
  20. data/vendor/kreuzberg/src/core/extractor.rs +134 -2
  21. data/vendor/kreuzberg/src/core/mod.rs +4 -2
  22. data/vendor/kreuzberg/src/core/pipeline.rs +188 -1
  23. data/vendor/kreuzberg/src/extraction/docx.rs +358 -0
  24. data/vendor/kreuzberg/src/extraction/html.rs +24 -8
  25. data/vendor/kreuzberg/src/extraction/image.rs +124 -1
  26. data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -2
  27. data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -3
  28. data/vendor/kreuzberg/src/extraction/pptx.rs +187 -87
  29. data/vendor/kreuzberg/src/extractors/archive.rs +1 -0
  30. data/vendor/kreuzberg/src/extractors/bibtex.rs +1 -0
  31. data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
  32. data/vendor/kreuzberg/src/extractors/docx.rs +50 -17
  33. data/vendor/kreuzberg/src/extractors/email.rs +29 -15
  34. data/vendor/kreuzberg/src/extractors/epub.rs +1 -0
  35. data/vendor/kreuzberg/src/extractors/excel.rs +2 -0
  36. data/vendor/kreuzberg/src/extractors/fictionbook.rs +1 -0
  37. data/vendor/kreuzberg/src/extractors/html.rs +29 -15
  38. data/vendor/kreuzberg/src/extractors/image.rs +25 -4
  39. data/vendor/kreuzberg/src/extractors/jats.rs +3 -0
  40. data/vendor/kreuzberg/src/extractors/jupyter.rs +1 -0
  41. data/vendor/kreuzberg/src/extractors/latex.rs +1 -0
  42. data/vendor/kreuzberg/src/extractors/markdown.rs +1 -0
  43. data/vendor/kreuzberg/src/extractors/mod.rs +78 -14
  44. data/vendor/kreuzberg/src/extractors/odt.rs +3 -3
  45. data/vendor/kreuzberg/src/extractors/opml.rs +1 -0
  46. data/vendor/kreuzberg/src/extractors/orgmode.rs +1 -0
  47. data/vendor/kreuzberg/src/extractors/pdf.rs +194 -17
  48. data/vendor/kreuzberg/src/extractors/pptx.rs +32 -13
  49. data/vendor/kreuzberg/src/extractors/rst.rs +1 -0
  50. data/vendor/kreuzberg/src/extractors/rtf.rs +3 -4
  51. data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
  52. data/vendor/kreuzberg/src/extractors/text.rs +7 -2
  53. data/vendor/kreuzberg/src/extractors/typst.rs +1 -0
  54. data/vendor/kreuzberg/src/extractors/xml.rs +27 -15
  55. data/vendor/kreuzberg/src/keywords/processor.rs +9 -1
  56. data/vendor/kreuzberg/src/language_detection/mod.rs +43 -0
  57. data/vendor/kreuzberg/src/language_detection/processor.rs +219 -0
  58. data/vendor/kreuzberg/src/lib.rs +10 -2
  59. data/vendor/kreuzberg/src/mcp/mod.rs +2 -0
  60. data/vendor/kreuzberg/src/mcp/server.rs +14 -12
  61. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +2 -0
  62. data/vendor/kreuzberg/src/pdf/error.rs +8 -0
  63. data/vendor/kreuzberg/src/pdf/metadata.rs +238 -95
  64. data/vendor/kreuzberg/src/pdf/mod.rs +14 -2
  65. data/vendor/kreuzberg/src/pdf/rendering.rs +1 -2
  66. data/vendor/kreuzberg/src/pdf/table.rs +26 -2
  67. data/vendor/kreuzberg/src/pdf/text.rs +89 -7
  68. data/vendor/kreuzberg/src/plugins/extractor.rs +34 -3
  69. data/vendor/kreuzberg/src/plugins/mod.rs +3 -0
  70. data/vendor/kreuzberg/src/plugins/ocr.rs +22 -3
  71. data/vendor/kreuzberg/src/plugins/processor.rs +8 -0
  72. data/vendor/kreuzberg/src/plugins/registry.rs +2 -0
  73. data/vendor/kreuzberg/src/plugins/validator.rs +11 -0
  74. data/vendor/kreuzberg/src/text/mod.rs +6 -0
  75. data/vendor/kreuzberg/src/text/quality_processor.rs +219 -0
  76. data/vendor/kreuzberg/src/types.rs +173 -21
  77. data/vendor/kreuzberg/tests/archive_integration.rs +2 -0
  78. data/vendor/kreuzberg/tests/batch_processing.rs +5 -3
  79. data/vendor/kreuzberg/tests/concurrency_stress.rs +14 -6
  80. data/vendor/kreuzberg/tests/config_features.rs +15 -1
  81. data/vendor/kreuzberg/tests/config_loading_tests.rs +1 -0
  82. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +2 -0
  83. data/vendor/kreuzberg/tests/email_integration.rs +2 -0
  84. data/vendor/kreuzberg/tests/error_handling.rs +43 -34
  85. data/vendor/kreuzberg/tests/format_integration.rs +2 -0
  86. data/vendor/kreuzberg/tests/image_integration.rs +2 -0
  87. data/vendor/kreuzberg/tests/mime_detection.rs +17 -16
  88. data/vendor/kreuzberg/tests/ocr_configuration.rs +4 -0
  89. data/vendor/kreuzberg/tests/ocr_errors.rs +22 -0
  90. data/vendor/kreuzberg/tests/ocr_quality.rs +2 -0
  91. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -21
  92. data/vendor/kreuzberg/tests/pdf_integration.rs +2 -0
  93. data/vendor/kreuzberg/tests/pipeline_integration.rs +25 -0
  94. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +5 -0
  95. data/vendor/kreuzberg/tests/plugin_system.rs +6 -0
  96. data/vendor/kreuzberg/tests/registry_integration_tests.rs +1 -0
  97. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +2 -0
  98. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -1
  99. data/vendor/kreuzberg/tests/security_validation.rs +1 -0
  100. data/vendor/kreuzberg/tests/test_fastembed.rs +45 -23
  101. data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1 -0
  102. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +3 -2
  103. data/vendor/rb-sys/.cargo_vcs_info.json +2 -2
  104. data/vendor/rb-sys/Cargo.lock +15 -15
  105. data/vendor/rb-sys/Cargo.toml +4 -4
  106. data/vendor/rb-sys/Cargo.toml.orig +4 -4
  107. data/vendor/rb-sys/bin/release.sh +9 -8
  108. data/vendor/rb-sys/build/features.rs +5 -2
  109. data/vendor/rb-sys/build/main.rs +55 -15
  110. data/vendor/rb-sys/build/stable_api_config.rs +4 -2
  111. data/vendor/rb-sys/build/version.rs +3 -1
  112. data/vendor/rb-sys/src/macros.rs +2 -2
  113. data/vendor/rb-sys/src/special_consts.rs +1 -1
  114. data/vendor/rb-sys/src/stable_api/compiled.rs +1 -1
  115. data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +12 -4
  116. data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +12 -4
  117. data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +12 -4
  118. data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +12 -4
  119. data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +19 -6
  120. data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +17 -5
  121. data/vendor/rb-sys/src/stable_api.rs +0 -1
  122. data/vendor/rb-sys/src/tracking_allocator.rs +1 -3
  123. metadata +11 -10
  124. data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
  125. data/vendor/rb-sys/.cargo-ok +0 -1
  126. data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
@@ -1,9 +1,9 @@
1
1
  use super::StableApiDefinition;
2
2
  use crate::{
3
- VALUE, debug_ruby_assert_type,
3
+ debug_ruby_assert_type,
4
4
  internal::{RArray, RString, RTypedData},
5
5
  ruby_value_type::RUBY_T_DATA,
6
- value_type,
6
+ value_type, VALUE,
7
7
  };
8
8
  use std::{
9
9
  ffi::c_void,
@@ -307,7 +307,11 @@ impl StableApiDefinition for Definition {
307
307
 
308
308
  #[inline]
309
309
  unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
310
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_type called on non-T_DATA object");
310
+ debug_ruby_assert_type!(
311
+ obj,
312
+ RUBY_T_DATA,
313
+ "rtypeddata_type called on non-T_DATA object"
314
+ );
311
315
 
312
316
  let rdata = obj as *const RTypedData;
313
317
  (*rdata).type_
@@ -315,7 +319,11 @@ impl StableApiDefinition for Definition {
315
319
 
316
320
  #[inline]
317
321
  unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
318
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_get_data called on non-T_DATA object");
322
+ debug_ruby_assert_type!(
323
+ obj,
324
+ RUBY_T_DATA,
325
+ "rtypeddata_get_data called on non-T_DATA object"
326
+ );
319
327
 
320
328
  // For Ruby 3.0 and lower, simply return the data field
321
329
  let rdata = obj as *const RTypedData;
@@ -1,9 +1,9 @@
1
1
  use super::StableApiDefinition;
2
2
  use crate::{
3
- VALUE, debug_ruby_assert_type,
3
+ debug_ruby_assert_type,
4
4
  internal::{RArray, RString, RTypedData},
5
5
  ruby_value_type::RUBY_T_DATA,
6
- value_type,
6
+ value_type, VALUE,
7
7
  };
8
8
  use std::{
9
9
  ffi::c_void,
@@ -300,7 +300,11 @@ impl StableApiDefinition for Definition {
300
300
 
301
301
  #[inline]
302
302
  unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
303
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_type called on non-T_DATA object");
303
+ debug_ruby_assert_type!(
304
+ obj,
305
+ RUBY_T_DATA,
306
+ "rtypeddata_type called on non-T_DATA object"
307
+ );
304
308
 
305
309
  let rdata = obj as *const RTypedData;
306
310
  (*rdata).type_
@@ -308,7 +312,11 @@ impl StableApiDefinition for Definition {
308
312
 
309
313
  #[inline]
310
314
  unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
311
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_get_data called on non-T_DATA object");
315
+ debug_ruby_assert_type!(
316
+ obj,
317
+ RUBY_T_DATA,
318
+ "rtypeddata_get_data called on non-T_DATA object"
319
+ );
312
320
 
313
321
  // For Ruby 3.1 and lower, simply return the data field
314
322
  let rdata = obj as *const RTypedData;
@@ -1,9 +1,9 @@
1
1
  use super::StableApiDefinition;
2
2
  use crate::{
3
- VALUE, debug_ruby_assert_type,
3
+ debug_ruby_assert_type,
4
4
  internal::{RArray, RString, RTypedData},
5
5
  ruby_value_type::RUBY_T_DATA,
6
- value_type,
6
+ value_type, VALUE,
7
7
  };
8
8
  use std::{
9
9
  ffi::c_void,
@@ -298,7 +298,11 @@ impl StableApiDefinition for Definition {
298
298
 
299
299
  #[inline]
300
300
  unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
301
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_type called on non-T_DATA object");
301
+ debug_ruby_assert_type!(
302
+ obj,
303
+ RUBY_T_DATA,
304
+ "rtypeddata_type called on non-T_DATA object"
305
+ );
302
306
 
303
307
  let rdata = obj as *const RTypedData;
304
308
  (*rdata).type_
@@ -306,7 +310,11 @@ impl StableApiDefinition for Definition {
306
310
 
307
311
  #[inline]
308
312
  unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
309
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_get_data called on non-T_DATA object");
313
+ debug_ruby_assert_type!(
314
+ obj,
315
+ RUBY_T_DATA,
316
+ "rtypeddata_get_data called on non-T_DATA object"
317
+ );
310
318
 
311
319
  // For Ruby 3.2 and lower, simply return the data field
312
320
  let rdata = obj as *const RTypedData;
@@ -1,9 +1,9 @@
1
1
  use super::StableApiDefinition;
2
2
  use crate::{
3
- VALUE, debug_ruby_assert_type,
3
+ debug_ruby_assert_type,
4
4
  internal::{RArray, RString},
5
5
  ruby_value_type::RUBY_T_DATA,
6
- value_type,
6
+ value_type, VALUE,
7
7
  };
8
8
  use std::{
9
9
  os::raw::{c_char, c_long},
@@ -284,7 +284,11 @@ impl StableApiDefinition for Definition {
284
284
 
285
285
  #[inline]
286
286
  unsafe fn rtypeddata_embedded_p(&self, obj: VALUE) -> bool {
287
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_embedded_p called on non-T_DATA object");
287
+ debug_ruby_assert_type!(
288
+ obj,
289
+ RUBY_T_DATA,
290
+ "rtypeddata_embedded_p called on non-T_DATA object"
291
+ );
288
292
 
289
293
  let rdata = obj as *const crate::internal::RTypedData;
290
294
  let typed_flag = (*rdata).typed_flag;
@@ -298,7 +302,11 @@ impl StableApiDefinition for Definition {
298
302
 
299
303
  #[inline]
300
304
  unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
301
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_type called on non-T_DATA object");
305
+ debug_ruby_assert_type!(
306
+ obj,
307
+ RUBY_T_DATA,
308
+ "rtypeddata_type called on non-T_DATA object"
309
+ );
302
310
 
303
311
  let rdata = obj as *const crate::internal::RTypedData;
304
312
  (*rdata).type_
@@ -306,14 +314,19 @@ impl StableApiDefinition for Definition {
306
314
 
307
315
  #[inline]
308
316
  unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut std::ffi::c_void {
309
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_get_data called on non-T_DATA object");
317
+ debug_ruby_assert_type!(
318
+ obj,
319
+ RUBY_T_DATA,
320
+ "rtypeddata_get_data called on non-T_DATA object"
321
+ );
310
322
 
311
323
  if self.rtypeddata_embedded_p(obj) {
312
324
  // For embedded data, calculate pointer based on struct layout
313
325
  // The formula matches Ruby's implementation:
314
326
  // embedded_typed_data_size = sizeof(RTypedData) - sizeof(void *)
315
327
  const EMBEDDED_TYPED_DATA_SIZE: usize =
316
- std::mem::size_of::<crate::internal::RTypedData>() - std::mem::size_of::<*mut std::ffi::c_void>();
328
+ std::mem::size_of::<crate::internal::RTypedData>()
329
+ - std::mem::size_of::<*mut std::ffi::c_void>();
317
330
 
318
331
  // Return address after the header as the data pointer
319
332
  (obj as *mut u8).add(EMBEDDED_TYPED_DATA_SIZE) as *mut std::ffi::c_void
@@ -1,9 +1,9 @@
1
1
  use super::StableApiDefinition;
2
2
  use crate::{
3
- VALUE, debug_ruby_assert_type,
3
+ debug_ruby_assert_type,
4
4
  internal::{RArray, RString, RTypedData},
5
5
  ruby_value_type::RUBY_T_DATA,
6
- value_type,
6
+ value_type, VALUE,
7
7
  };
8
8
  use std::{
9
9
  ffi::c_void,
@@ -285,7 +285,11 @@ impl StableApiDefinition for Definition {
285
285
 
286
286
  #[inline]
287
287
  unsafe fn rtypeddata_embedded_p(&self, obj: VALUE) -> bool {
288
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_embedded_p called on non-T_DATA object");
288
+ debug_ruby_assert_type!(
289
+ obj,
290
+ RUBY_T_DATA,
291
+ "rtypeddata_embedded_p called on non-T_DATA object"
292
+ );
289
293
 
290
294
  let rdata = obj as *const RTypedData;
291
295
  let typed_flag = (*rdata).typed_flag;
@@ -299,7 +303,11 @@ impl StableApiDefinition for Definition {
299
303
 
300
304
  #[inline]
301
305
  unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
302
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_type called on non-T_DATA object");
306
+ debug_ruby_assert_type!(
307
+ obj,
308
+ RUBY_T_DATA,
309
+ "rtypeddata_type called on non-T_DATA object"
310
+ );
303
311
 
304
312
  let rdata = obj as *const RTypedData;
305
313
  (*rdata).type_
@@ -307,7 +315,11 @@ impl StableApiDefinition for Definition {
307
315
 
308
316
  #[inline]
309
317
  unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
310
- debug_ruby_assert_type!(obj, RUBY_T_DATA, "rtypeddata_get_data called on non-T_DATA object");
318
+ debug_ruby_assert_type!(
319
+ obj,
320
+ RUBY_T_DATA,
321
+ "rtypeddata_get_data called on non-T_DATA object"
322
+ );
311
323
 
312
324
  if self.rtypeddata_embedded_p(obj) {
313
325
  // For embedded data, calculate pointer based on struct layout
@@ -226,7 +226,6 @@ mod compiled;
226
226
  use compiled as api;
227
227
 
228
228
  #[cfg(stable_api_include_rust_impl)]
229
- #[cfg_attr(ruby_eq_2_6, path = "stable_api/ruby_2_6.rs")]
230
229
  #[cfg_attr(ruby_eq_2_7, path = "stable_api/ruby_2_7.rs")]
231
230
  #[cfg_attr(ruby_eq_3_0, path = "stable_api/ruby_3_0.rs")]
232
231
  #[cfg_attr(ruby_eq_3_1, path = "stable_api/ruby_3_1.rs")]
@@ -3,8 +3,8 @@
3
3
  use std::{
4
4
  fmt::Formatter,
5
5
  sync::{
6
- Arc,
7
6
  atomic::{AtomicIsize, Ordering},
7
+ Arc,
8
8
  },
9
9
  };
10
10
 
@@ -59,13 +59,11 @@ mod mri {
59
59
 
60
60
  unsafe {
61
61
  if is_ruby_vm_started() {
62
- // On Windows, ssize_t is i32 even on 64-bit, so cast i64 to i32
63
62
  #[cfg(all(target_pointer_width = "64", target_os = "windows"))]
64
63
  rb_gc_adjust_memory_usage(delta as i32);
65
64
 
66
65
  #[cfg(not(all(target_pointer_width = "64", target_os = "windows")))]
67
66
  rb_gc_adjust_memory_usage(delta);
68
-
69
67
  delta as isize
70
68
  } else {
71
69
  0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0.pre.rc.6
4
+ version: 4.0.0.pre.rc.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-10 00:00:00.000000000 Z
11
+ date: 2025-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -186,6 +186,7 @@ files:
186
186
  - Steepfile
187
187
  - examples/async_patterns.rb
188
188
  - ext/kreuzberg_rb/extconf.rb
189
+ - ext/kreuzberg_rb/native/.cargo/config.toml
189
190
  - ext/kreuzberg_rb/native/Cargo.lock
190
191
  - ext/kreuzberg_rb/native/Cargo.toml
191
192
  - ext/kreuzberg_rb/native/README.md
@@ -241,6 +242,7 @@ files:
241
242
  - vendor/kreuzberg/src/api/types.rs
242
243
  - vendor/kreuzberg/src/cache/mod.rs
243
244
  - vendor/kreuzberg/src/chunking/mod.rs
245
+ - vendor/kreuzberg/src/chunking/processor.rs
244
246
  - vendor/kreuzberg/src/core/batch_mode.rs
245
247
  - vendor/kreuzberg/src/core/config.rs
246
248
  - vendor/kreuzberg/src/core/extractor.rs
@@ -277,7 +279,6 @@ files:
277
279
  - vendor/kreuzberg/src/extractors/epub.rs
278
280
  - vendor/kreuzberg/src/extractors/excel.rs
279
281
  - vendor/kreuzberg/src/extractors/fictionbook.rs
280
- - vendor/kreuzberg/src/extractors/fictionbook.rs.backup2
281
282
  - vendor/kreuzberg/src/extractors/html.rs
282
283
  - vendor/kreuzberg/src/extractors/image.rs
283
284
  - vendor/kreuzberg/src/extractors/jats.rs
@@ -309,6 +310,7 @@ files:
309
310
  - vendor/kreuzberg/src/keywords/types.rs
310
311
  - vendor/kreuzberg/src/keywords/yake.rs
311
312
  - vendor/kreuzberg/src/language_detection/mod.rs
313
+ - vendor/kreuzberg/src/language_detection/processor.rs
312
314
  - vendor/kreuzberg/src/lib.rs
313
315
  - vendor/kreuzberg/src/mcp/mod.rs
314
316
  - vendor/kreuzberg/src/mcp/server.rs
@@ -341,6 +343,7 @@ files:
341
343
  - vendor/kreuzberg/src/stopwords/mod.rs
342
344
  - vendor/kreuzberg/src/text/mod.rs
343
345
  - vendor/kreuzberg/src/text/quality.rs
346
+ - vendor/kreuzberg/src/text/quality_processor.rs
344
347
  - vendor/kreuzberg/src/text/string_utils.rs
345
348
  - vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs
346
349
  - vendor/kreuzberg/src/text/token_reduction/config.rs
@@ -469,7 +472,6 @@ files:
469
472
  - vendor/kreuzberg/tests/typst_behavioral_tests.rs
470
473
  - vendor/kreuzberg/tests/typst_extractor_tests.rs
471
474
  - vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs
472
- - vendor/rb-sys/.cargo-ok
473
475
  - vendor/rb-sys/.cargo_vcs_info.json
474
476
  - vendor/rb-sys/Cargo.lock
475
477
  - vendor/rb-sys/Cargo.toml
@@ -492,7 +494,6 @@ files:
492
494
  - vendor/rb-sys/src/stable_api.rs
493
495
  - vendor/rb-sys/src/stable_api/compiled.c
494
496
  - vendor/rb-sys/src/stable_api/compiled.rs
495
- - vendor/rb-sys/src/stable_api/ruby_2_6.rs
496
497
  - vendor/rb-sys/src/stable_api/ruby_2_7.rs
497
498
  - vendor/rb-sys/src/stable_api/ruby_3_0.rs
498
499
  - vendor/rb-sys/src/stable_api/ruby_3_1.rs
@@ -507,13 +508,13 @@ homepage: https://github.com/kreuzberg-dev/kreuzberg
507
508
  licenses:
508
509
  - MIT
509
510
  metadata:
510
- bug_tracker_uri: https://github.com/kreuzberg-dev/kreuzberg/issues
511
+ homepage_uri: https://github.com/kreuzberg-dev/kreuzberg
512
+ source_code_uri: https://github.com/kreuzberg-dev/kreuzberg
511
513
  changelog_uri: https://github.com/kreuzberg-dev/kreuzberg/blob/main/CHANGELOG.md
512
514
  documentation_uri: https://docs.kreuzberg.dev
513
- homepage_uri: https://github.com/kreuzberg-dev/kreuzberg
514
- keywords: document-intelligence,document-extraction,ocr,rust,bindings
515
+ bug_tracker_uri: https://github.com/kreuzberg-dev/kreuzberg/issues
515
516
  rubygems_mfa_required: 'true'
516
- source_code_uri: https://github.com/kreuzberg-dev/kreuzberg
517
+ keywords: document-intelligence,document-extraction,ocr,rust,bindings
517
518
  post_install_message:
518
519
  rdoc_options: []
519
520
  require_paths:
@@ -529,7 +530,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
529
530
  - !ruby/object:Gem::Version
530
531
  version: '0'
531
532
  requirements: []
532
- rubygems_version: 3.4.20
533
+ rubygems_version: 3.5.22
533
534
  signing_key:
534
535
  specification_version: 4
535
536
  summary: High-performance document intelligence framework