kreuzberg 4.4.6-aarch64-linux → 4.5.2-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b52fd8ab029d63ee54f697c758806e2f32566d3b112cf48865f59f9ddb9b4d5f
4
- data.tar.gz: 65c4f41bd2157f5d79e86fc9bda2b8389a991ffaed9568f602700e95adef55e9
3
+ metadata.gz: 5d37d9bc188b071a5a193db3a4cf610caaff6e8b9a84ef04bebfbf2abd5f7f16
4
+ data.tar.gz: 7d568f4f6b5e8e1be95ac814b55974d348a460c1ab4949aaafe3b0de9a3d9dbe
5
5
  SHA512:
6
- metadata.gz: b2c65f9ea098867bda920c3732f6b81c4e52c35aa30d25f15f5755808c34b97a780d35e2f39fc05273658b629c3276577878b4cda1893aa901ccf801e7f39953
7
- data.tar.gz: 5837f3f6e04712ad82d02bc121e3cb483d28a8b5b32a27f6b3af4f57e8ba875a339fc8e63d1fbc5a466e96d78968b63b5a303a9a58c812e4064e367094fbc511
6
+ metadata.gz: 9d5a0a4d9e6917ef0ac38e319410752e0032de7742a167a2a34e5c48356f1af145d1a2f56f0d7d6d28fef439c4f82710655116f03f1077c851f521e5fc4ba09c
7
+ data.tar.gz: '01898e03048b285ce8347ec975041bea2e633fed829abeaf8d1842fcc5137d8dacecf54f971ddc5a9a61261dc45ac30de3c408f8bab696af4597a648e0683a31'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.4.6)
4
+ kreuzberg (4.5.2)
5
5
  rb_sys (~> 0.9.119)
6
6
  sorbet-runtime (~> 0.5)
7
7
 
@@ -49,7 +49,7 @@ GEM
49
49
  i18n (1.14.8)
50
50
  concurrent-ruby (~> 1.0)
51
51
  io-console (0.8.2)
52
- json (2.19.1)
52
+ json (2.19.2)
53
53
  json-schema (6.2.0)
54
54
  addressable (~> 2.8)
55
55
  bigdecimal (>= 3.1, < 5)
@@ -60,7 +60,7 @@ GEM
60
60
  rb-fsevent (~> 0.10, >= 0.10.3)
61
61
  rb-inotify (~> 0.9, >= 0.9.10)
62
62
  logger (1.7.0)
63
- mcp (0.8.0)
63
+ mcp (0.9.0)
64
64
  json-schema (>= 4.1)
65
65
  method_source (1.1.0)
66
66
  minitest (6.0.2)
@@ -134,7 +134,7 @@ GEM
134
134
  rubocop (~> 1.81)
135
135
  ruby-progressbar (1.13.0)
136
136
  securerandom (0.4.1)
137
- sorbet-runtime (0.6.13023)
137
+ sorbet-runtime (0.6.13055)
138
138
  steep (1.10.0)
139
139
  activesupport (>= 5.1)
140
140
  concurrent-ruby (>= 1.1.10)
@@ -220,14 +220,14 @@ CHECKSUMS
220
220
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
221
221
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
222
222
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
223
- json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
223
+ json (2.19.2) sha256=e7e1bd318b2c37c4ceee2444841c86539bc462e81f40d134cf97826cb14e83cf
224
224
  json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
225
- kreuzberg (4.4.6)
225
+ kreuzberg (4.5.2)
226
226
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
227
227
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
228
228
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
229
229
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
230
- mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
230
+ mcp (0.9.0) sha256=a0a3737b0ac9df0772f4ef7e2b013c260ddbcf217a5d50a66bff0baeddf03e47
231
231
  method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5
232
232
  minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
233
233
  mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
@@ -259,7 +259,7 @@ CHECKSUMS
259
259
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
260
260
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
261
261
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
262
- sorbet-runtime (0.6.13023) sha256=c00d11cc54951efbc0aece994dd6b20b1d1cb2a2606100c24d4ae7f840383073
262
+ sorbet-runtime (0.6.13055) sha256=c8ae8c81310e0a28d290b11f44ddca59659b7d7f13752c0ef5d16964bbb84d18
263
263
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
264
264
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
265
265
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.6" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.2" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -47,6 +47,9 @@
47
47
  <a href="https://docs.kreuzberg.dev">
48
48
  <img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
49
49
  </a>
50
+ <a href="https://huggingface.co/Kreuzberg">
51
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
52
+ </a>
50
53
  </div>
51
54
 
52
55
  <img width="1128" height="191" alt="Banner2" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
@@ -392,7 +392,8 @@ module Kreuzberg
392
392
  #
393
393
  class PDF
394
394
  attr_reader :extract_images, :passwords, :extract_metadata, :font_config, :hierarchy,
395
- :extract_annotations, :top_margin_fraction, :bottom_margin_fraction
395
+ :extract_annotations, :top_margin_fraction, :bottom_margin_fraction,
396
+ :allow_single_column_tables
396
397
 
397
398
  def initialize(
398
399
  extract_images: false,
@@ -402,7 +403,8 @@ module Kreuzberg
402
403
  hierarchy: nil,
403
404
  extract_annotations: false,
404
405
  top_margin_fraction: nil,
405
- bottom_margin_fraction: nil
406
+ bottom_margin_fraction: nil,
407
+ allow_single_column_tables: false
406
408
  )
407
409
  @extract_images = extract_images ? true : false
408
410
  @passwords = if passwords.is_a?(Array)
@@ -416,6 +418,7 @@ module Kreuzberg
416
418
  @extract_annotations = extract_annotations ? true : false
417
419
  @top_margin_fraction = top_margin_fraction&.to_f
418
420
  @bottom_margin_fraction = bottom_margin_fraction&.to_f
421
+ @allow_single_column_tables = allow_single_column_tables ? true : false
419
422
  end
420
423
 
421
424
  def to_h
@@ -427,7 +430,8 @@ module Kreuzberg
427
430
  hierarchy: @hierarchy&.to_h,
428
431
  extract_annotations: @extract_annotations,
429
432
  top_margin_fraction: @top_margin_fraction,
430
- bottom_margin_fraction: @bottom_margin_fraction
433
+ bottom_margin_fraction: @bottom_margin_fraction,
434
+ allow_single_column_tables: @allow_single_column_tables
431
435
  }.compact
432
436
  end
433
437
 
@@ -803,6 +807,85 @@ module Kreuzberg
803
807
  end
804
808
  end
805
809
 
810
+ # Hardware acceleration configuration for ONNX Runtime
811
+ #
812
+ # Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used
813
+ # for inference in layout detection and embedding generation.
814
+ #
815
+ # @example Auto-select provider (CoreML on macOS, CUDA on Linux, CPU elsewhere)
816
+ # acceleration = Acceleration.new
817
+ #
818
+ # @example Force CPU only
819
+ # acceleration = Acceleration.new(provider: 'cpu')
820
+ #
821
+ # @example Use CUDA with specific device
822
+ # acceleration = Acceleration.new(provider: 'cuda', device_id: 0)
823
+ #
824
+ class Acceleration
825
+ attr_reader :provider, :device_id
826
+
827
+ def initialize(provider: 'auto', device_id: 0)
828
+ @provider = provider.to_s
829
+ @device_id = device_id.to_i
830
+ end
831
+
832
+ def to_h
833
+ {
834
+ provider: @provider,
835
+ device_id: @device_id
836
+ }
837
+ end
838
+ end
839
+
840
+ # Layout detection configuration
841
+ #
842
+ # @example Basic usage with fast preset
843
+ # layout = LayoutDetection.new(preset: "fast")
844
+ #
845
+ # @example Accurate preset with custom threshold
846
+ # layout = LayoutDetection.new(
847
+ # preset: "accurate",
848
+ # confidence_threshold: 0.5,
849
+ # apply_heuristics: true
850
+ # )
851
+ #
852
+ class LayoutDetection
853
+ attr_reader :preset, :confidence_threshold, :apply_heuristics
854
+
855
+ def initialize(preset: 'fast', confidence_threshold: nil, apply_heuristics: true)
856
+ @preset = preset.to_s
857
+ @confidence_threshold = confidence_threshold&.to_f
858
+ @apply_heuristics = apply_heuristics ? true : false
859
+ end
860
+
861
+ def to_h
862
+ {
863
+ preset: @preset,
864
+ confidence_threshold: @confidence_threshold,
865
+ apply_heuristics: @apply_heuristics
866
+ }.compact
867
+ end
868
+ end
869
+
870
+ # Concurrency configuration for thread pool management
871
+ #
872
+ # @example Limit max threads
873
+ # concurrency = Concurrency.new(max_threads: 4)
874
+ #
875
+ class Concurrency
876
+ attr_reader :max_threads
877
+
878
+ def initialize(max_threads: nil)
879
+ @max_threads = max_threads&.to_i
880
+ end
881
+
882
+ def to_h
883
+ h = {}
884
+ h[:max_threads] = @max_threads unless @max_threads.nil?
885
+ h
886
+ end
887
+ end
888
+
806
889
  # Main extraction configuration
807
890
  #
808
891
  # @example Basic usage
@@ -847,7 +930,8 @@ module Kreuzberg
847
930
  :images, :postprocessor,
848
931
  :token_reduction, :keywords, :html_options, :pages,
849
932
  :max_concurrent_extractions, :output_format, :result_format,
850
- :security_limits
933
+ :security_limits, :layout, :concurrency,
934
+ :cache_namespace, :cache_ttl_secs
851
935
 
852
936
  # Alias for backward compatibility - image_extraction is the canonical name
853
937
  alias image_extraction images
@@ -872,7 +956,7 @@ module Kreuzberg
872
956
  language_detection pdf_options image_extraction
873
957
  postprocessor token_reduction keywords html_options pages
874
958
  max_concurrent_extractions output_format result_format
875
- security_limits
959
+ security_limits layout concurrency cache_namespace cache_ttl_secs
876
960
  ].freeze
877
961
 
878
962
  # Aliases for backward compatibility
@@ -947,7 +1031,11 @@ module Kreuzberg
947
1031
  max_concurrent_extractions: nil,
948
1032
  output_format: nil,
949
1033
  result_format: nil,
950
- security_limits: nil)
1034
+ security_limits: nil,
1035
+ layout: nil,
1036
+ concurrency: nil,
1037
+ cache_namespace: nil,
1038
+ cache_ttl_secs: nil)
951
1039
  kwargs = {
952
1040
  use_cache: use_cache, enable_quality_processing: enable_quality_processing,
953
1041
  force_ocr: force_ocr, include_document_structure: include_document_structure,
@@ -957,7 +1045,10 @@ module Kreuzberg
957
1045
  token_reduction: token_reduction, keywords: keywords, html_options: html_options,
958
1046
  pages: pages, max_concurrent_extractions: max_concurrent_extractions,
959
1047
  output_format: output_format, result_format: result_format,
960
- security_limits: security_limits
1048
+ security_limits: security_limits, layout: layout,
1049
+ concurrency: concurrency,
1050
+ cache_namespace: cache_namespace,
1051
+ cache_ttl_secs: cache_ttl_secs
961
1052
  }
962
1053
  extracted = extract_from_hash(hash, kwargs)
963
1054
 
@@ -971,7 +1062,7 @@ module Kreuzberg
971
1062
  defaults.merge(hash.slice(*defaults.keys))
972
1063
  end
973
1064
 
974
- def assign_attributes(params)
1065
+ def assign_attributes(params) # rubocop:disable Metrics/MethodLength
975
1066
  @use_cache = params[:use_cache] ? true : false
976
1067
  @enable_quality_processing = params[:enable_quality_processing] ? true : false
977
1068
  @force_ocr = params[:force_ocr] ? true : false
@@ -986,9 +1077,13 @@ module Kreuzberg
986
1077
  @keywords = normalize_config(params[:keywords], Keywords)
987
1078
  @html_options = normalize_config(params[:html_options], HtmlOptions)
988
1079
  @pages = normalize_config(params[:pages], PageConfig)
1080
+ @layout = normalize_config(params[:layout], LayoutDetection)
1081
+ @concurrency = normalize_config(params[:concurrency], Concurrency)
989
1082
  @max_concurrent_extractions = params[:max_concurrent_extractions]&.to_i
990
1083
  @output_format = validate_output_format(params[:output_format])
991
1084
  @result_format = validate_result_format(params[:result_format])
1085
+ @cache_namespace = params[:cache_namespace]
1086
+ @cache_ttl_secs = params[:cache_ttl_secs]&.to_i
992
1087
  @security_limits = params[:security_limits]
993
1088
  end
994
1089
 
@@ -1024,7 +1119,9 @@ module Kreuzberg
1024
1119
  include_document_structure: @include_document_structure,
1025
1120
  max_concurrent_extractions: @max_concurrent_extractions,
1026
1121
  output_format: @output_format,
1027
- result_format: @result_format
1122
+ result_format: @result_format,
1123
+ cache_namespace: @cache_namespace,
1124
+ cache_ttl_secs: @cache_ttl_secs
1028
1125
  }
1029
1126
  end
1030
1127
 
@@ -1034,7 +1131,8 @@ module Kreuzberg
1034
1131
  language_detection: @language_detection&.to_h, pdf_options: @pdf_options&.to_h,
1035
1132
  image_extraction: @images&.to_h, postprocessor: @postprocessor&.to_h,
1036
1133
  token_reduction: @token_reduction&.to_h, keywords: @keywords&.to_h,
1037
- html_options: @html_options&.to_h, pages: @pages&.to_h
1134
+ html_options: @html_options&.to_h, pages: @pages&.to_h,
1135
+ layout: @layout&.to_h, concurrency: @concurrency&.to_h
1038
1136
  }
1039
1137
  end
1040
1138
 
@@ -1172,12 +1270,20 @@ module Kreuzberg
1172
1270
  @html_options = normalize_config(value, HtmlOptions)
1173
1271
  when :pages
1174
1272
  @pages = normalize_config(value, PageConfig)
1273
+ when :layout
1274
+ @layout = normalize_config(value, LayoutDetection)
1275
+ when :concurrency
1276
+ @concurrency = normalize_config(value, Concurrency)
1175
1277
  when :max_concurrent_extractions
1176
1278
  @max_concurrent_extractions = value&.to_i
1177
1279
  when :output_format
1178
1280
  @output_format = validate_output_format(value)
1179
1281
  when :result_format
1180
1282
  @result_format = validate_result_format(value)
1283
+ when :cache_namespace
1284
+ @cache_namespace = value
1285
+ when :cache_ttl_secs
1286
+ @cache_ttl_secs = value&.to_i
1181
1287
  else
1182
1288
  raise ArgumentError, "Unknown configuration key: #{key}"
1183
1289
  end
@@ -1228,6 +1334,12 @@ module Kreuzberg
1228
1334
  end
1229
1335
 
1230
1336
  def update_from_merged(merged)
1337
+ update_core_options(merged)
1338
+ update_processing_options(merged)
1339
+ update_output_options(merged)
1340
+ end
1341
+
1342
+ def update_core_options(merged)
1231
1343
  @use_cache = merged.use_cache
1232
1344
  @enable_quality_processing = merged.enable_quality_processing
1233
1345
  @force_ocr = merged.force_ocr
@@ -1235,6 +1347,9 @@ module Kreuzberg
1235
1347
  @ocr = merged.ocr
1236
1348
  @chunking = merged.chunking
1237
1349
  @language_detection = merged.language_detection
1350
+ end
1351
+
1352
+ def update_processing_options(merged)
1238
1353
  @pdf_options = merged.pdf_options
1239
1354
  @images = merged.image_extraction
1240
1355
  @postprocessor = merged.postprocessor
@@ -1242,9 +1357,16 @@ module Kreuzberg
1242
1357
  @keywords = merged.keywords
1243
1358
  @html_options = merged.html_options
1244
1359
  @pages = merged.pages
1360
+ @layout = merged.layout
1361
+ end
1362
+
1363
+ def update_output_options(merged)
1364
+ @concurrency = merged.concurrency
1245
1365
  @max_concurrent_extractions = merged.max_concurrent_extractions
1246
1366
  @output_format = merged.output_format
1247
1367
  @result_format = merged.result_format
1368
+ @cache_namespace = merged.cache_namespace
1369
+ @cache_ttl_secs = merged.cache_ttl_secs
1248
1370
  end
1249
1371
  end
1250
1372
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.4.6'
4
+ VERSION = '4.5.2'
5
5
  end
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/kreuzberg.rbs CHANGED
@@ -240,7 +240,9 @@ module Kreuzberg
240
240
  attr_reader det_db_unclip_ratio: Float?
241
241
  attr_reader det_limit_side_len: Integer?
242
242
  attr_reader rec_batch_num: Integer?
243
- def initialize: (?language: String?, ?cache_dir: String?, ?use_angle_cls: bool?, ?enable_table_detection: bool?, ?det_db_thresh: Float?, ?det_db_box_thresh: Float?, ?det_db_unclip_ratio: Float?, ?det_limit_side_len: Integer?, ?rec_batch_num: Integer?) -> void
243
+ attr_reader padding: Integer?
244
+ attr_reader model_tier: String?
245
+ def initialize: (?language: String?, ?cache_dir: String?, ?use_angle_cls: bool?, ?enable_table_detection: bool?, ?det_db_thresh: Float?, ?det_db_box_thresh: Float?, ?det_db_unclip_ratio: Float?, ?det_limit_side_len: Integer?, ?rec_batch_num: Integer?, ?padding: Integer?, ?model_tier: String?) -> void
244
246
  def to_h: () -> Hash[Symbol, untyped]
245
247
  end
246
248
 
@@ -332,8 +334,9 @@ module Kreuzberg
332
334
  attr_reader extract_annotations: bool
333
335
  attr_reader top_margin_fraction: Float?
334
336
  attr_reader bottom_margin_fraction: Float?
337
+ attr_reader allow_single_column_tables: bool
335
338
 
336
- def initialize: (?extract_images: bool, ?passwords: (Array[String] | String)?, ?extract_metadata: bool, ?font_config: (FontConfig | Hash[Symbol, untyped])?, ?hierarchy: (Hierarchy | Hash[Symbol, untyped])?, ?extract_annotations: bool, ?top_margin_fraction: Float?, ?bottom_margin_fraction: Float?) -> void
339
+ def initialize: (?extract_images: bool, ?passwords: (Array[String] | String)?, ?extract_metadata: bool, ?font_config: (FontConfig | Hash[Symbol, untyped])?, ?hierarchy: (Hierarchy | Hash[Symbol, untyped])?, ?extract_annotations: bool, ?top_margin_fraction: Float?, ?bottom_margin_fraction: Float?, ?allow_single_column_tables: bool) -> void
337
340
  def to_h: () -> Hash[Symbol, untyped]
338
341
  end
339
342
 
@@ -456,9 +459,27 @@ module Kreuzberg
456
459
  def to_h: () -> Hash[Symbol, untyped]
457
460
  end
458
461
 
462
+ class LayoutDetection
463
+ attr_reader preset: String
464
+ attr_reader confidence_threshold: Float?
465
+ attr_reader apply_heuristics: bool
466
+
467
+ def initialize: (?preset: String, ?confidence_threshold: Float?, ?apply_heuristics: bool) -> void
468
+ def to_h: () -> Hash[Symbol, untyped]
469
+ end
470
+
471
+ class Concurrency
472
+ attr_reader max_threads: Integer?
473
+
474
+ def initialize: (?max_threads: Integer?) -> void
475
+ def to_h: () -> Hash[Symbol, untyped]
476
+ end
477
+
459
478
  class Extraction
460
479
  attr_reader use_cache: bool
461
480
  attr_reader enable_quality_processing: bool
481
+ attr_reader cache_namespace: String?
482
+ attr_reader cache_ttl_secs: Integer?
462
483
  attr_reader force_ocr: bool
463
484
  attr_reader include_document_structure: bool
464
485
  attr_reader ocr: OCR?
@@ -471,6 +492,8 @@ module Kreuzberg
471
492
  attr_reader keywords: Keywords?
472
493
  attr_reader html_options: HtmlOptions?
473
494
  attr_reader pages: PageConfig?
495
+ attr_reader layout: LayoutDetection?
496
+ attr_reader concurrency: Concurrency?
474
497
  attr_reader max_concurrent_extractions: Integer?
475
498
  attr_reader output_format: String?
476
499
  attr_reader result_format: String?
@@ -495,9 +518,13 @@ module Kreuzberg
495
518
  ?keywords: (Keywords | Hash[Symbol, untyped])?,
496
519
  ?html_options: (HtmlOptions | Hash[Symbol, untyped])?,
497
520
  ?pages: (PageConfig | Hash[Symbol, untyped])?,
521
+ ?layout: (LayoutDetection | Hash[Symbol, untyped])?,
522
+ ?concurrency: (Concurrency | Hash[Symbol, untyped])?,
498
523
  ?max_concurrent_extractions: Integer?,
499
524
  ?output_format: String?,
500
- ?result_format: String?
525
+ ?result_format: String?,
526
+ ?cache_namespace: String?,
527
+ ?cache_ttl_secs: Integer?
501
528
  ) -> void
502
529
  def to_h: () -> Hash[Symbol, untyped]
503
530
  def to_json: (*untyped) -> String
@@ -755,6 +782,7 @@ module Kreuzberg
755
782
 
756
783
  type config_hash = Hash[Symbol, untyped]
757
784
  type config_input = config_hash | _ToH
785
+ type file_config_input = Hash[Symbol, untyped]?
758
786
 
759
787
  interface _ToH
760
788
  def to_h: () -> config_hash
@@ -1179,13 +1207,15 @@ module Kreuzberg
1179
1207
 
1180
1208
  def self.batch_extract_files_sync: (
1181
1209
  paths: Array[String | Pathname],
1182
- ?config: config_input?
1210
+ ?config: config_input?,
1211
+ ?file_configs: Array[file_config_input]?
1183
1212
  ) -> Array[Result]
1184
1213
 
1185
1214
  def self.batch_extract_bytes_sync: (
1186
1215
  data_array: Array[String],
1187
1216
  mime_types: Array[String],
1188
- ?config: config_input?
1217
+ ?config: config_input?,
1218
+ ?file_configs: Array[file_config_input]?
1189
1219
  ) -> Array[Result]
1190
1220
 
1191
1221
  def self.extract_file: (
@@ -1202,13 +1232,15 @@ module Kreuzberg
1202
1232
 
1203
1233
  def self.batch_extract_files: (
1204
1234
  paths: Array[String | Pathname],
1205
- ?config: config_input?
1235
+ ?config: config_input?,
1236
+ ?file_configs: Array[file_config_input]?
1206
1237
  ) -> Array[Result]
1207
1238
 
1208
1239
  def self.batch_extract_bytes: (
1209
1240
  data_array: Array[String],
1210
1241
  mime_types: Array[String],
1211
- ?config: config_input?
1242
+ ?config: config_input?,
1243
+ ?file_configs: Array[file_config_input]?
1212
1244
  ) -> Array[Result]
1213
1245
 
1214
1246
  # Cache API
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.4.6
4
+ version: 4.5.2
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-13 00:00:00.000000000 Z
11
+ date: 2026-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler