kreuzberg 4.5.4-aarch64-linux → 4.6.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dfd3586f04b1b2b6dcdf06af30712755fd3fd8d76c7bc03cee05a67133134998
4
- data.tar.gz: c402149a2765063da19d639eb2aa2b75340db6292b680c9218bc826d5997346c
3
+ metadata.gz: 7d632985bd042b73c0e51c84e292d9626267f205992bf49c9c62bcef78d9656f
4
+ data.tar.gz: ded364d7414b8fb4eafbda15af492698338799eb86749f9708f3f595ab7cad6d
5
5
  SHA512:
6
- metadata.gz: aa8f6ba036c180200182377b90b15f293cf008b440c49d732d5dfa84df203373b173b9928138d58917fd7dddfe41c3b4ea89cab582c69707253ed87163932349
7
- data.tar.gz: 211e120a58693b5c385ff0c577391d542a4ee9ca3e1194ca96e2e975a2636d78cdf9526b25852dad70d543142576317c2b344260f83148143d4d9ab6ac6822bd
6
+ metadata.gz: 5714ba27e6e41a99db90b829cd0c02986eab8c522673f62635ec0c5457d3d7367d290ef1146486fcccdda44fdb55751a99b24758454fdd58c90d93b83b77656e
7
+ data.tar.gz: ff4e355557401115eb34c227ddc4cd573e6b877f830b5b1a13b749508d202cd691866122e2be4132a883e7c80a4e7bae9f388296c79df7890adbd9aabcb822ca
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.4" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.1" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -926,14 +926,14 @@ module Kreuzberg
926
926
  # )
927
927
  #
928
928
  class Extraction
929
- attr_reader :use_cache, :enable_quality_processing, :force_ocr,
929
+ attr_reader :use_cache, :enable_quality_processing, :force_ocr, :force_ocr_pages,
930
930
  :include_document_structure,
931
931
  :ocr, :chunking, :language_detection, :pdf_options,
932
932
  :images, :postprocessor,
933
933
  :token_reduction, :keywords, :html_options, :pages,
934
934
  :max_concurrent_extractions, :output_format, :result_format,
935
935
  :security_limits, :layout, :concurrency,
936
- :cache_namespace, :cache_ttl_secs
936
+ :cache_namespace, :cache_ttl_secs, :extraction_timeout_secs
937
937
 
938
938
  # Alias for backward compatibility - image_extraction is the canonical name
939
939
  alias image_extraction images
@@ -954,11 +954,11 @@ module Kreuzberg
954
954
  #
955
955
  # Keys that are allowed in the Extraction config
956
956
  ALLOWED_KEYS = %i[
957
- use_cache enable_quality_processing force_ocr include_document_structure ocr chunking
957
+ use_cache enable_quality_processing force_ocr force_ocr_pages include_document_structure ocr chunking
958
958
  language_detection pdf_options image_extraction
959
959
  postprocessor token_reduction keywords html_options pages
960
960
  max_concurrent_extractions output_format result_format
961
- security_limits layout concurrency cache_namespace cache_ttl_secs
961
+ security_limits layout concurrency cache_namespace cache_ttl_secs extraction_timeout_secs
962
962
  ].freeze
963
963
 
964
964
  # Aliases for backward compatibility
@@ -1019,6 +1019,7 @@ module Kreuzberg
1019
1019
  use_cache: true,
1020
1020
  enable_quality_processing: true,
1021
1021
  force_ocr: false,
1022
+ force_ocr_pages: nil,
1022
1023
  include_document_structure: false,
1023
1024
  ocr: nil,
1024
1025
  chunking: nil,
@@ -1037,10 +1038,12 @@ module Kreuzberg
1037
1038
  layout: nil,
1038
1039
  concurrency: nil,
1039
1040
  cache_namespace: nil,
1040
- cache_ttl_secs: nil)
1041
+ cache_ttl_secs: nil,
1042
+ extraction_timeout_secs: nil)
1041
1043
  kwargs = {
1042
1044
  use_cache: use_cache, enable_quality_processing: enable_quality_processing,
1043
- force_ocr: force_ocr, include_document_structure: include_document_structure,
1045
+ force_ocr: force_ocr, force_ocr_pages: force_ocr_pages,
1046
+ include_document_structure: include_document_structure,
1044
1047
  ocr: ocr, chunking: chunking, language_detection: language_detection,
1045
1048
  pdf_options: pdf_options, image_extraction: image_extraction,
1046
1049
  postprocessor: postprocessor,
@@ -1050,7 +1053,8 @@ module Kreuzberg
1050
1053
  security_limits: security_limits, layout: layout,
1051
1054
  concurrency: concurrency,
1052
1055
  cache_namespace: cache_namespace,
1053
- cache_ttl_secs: cache_ttl_secs
1056
+ cache_ttl_secs: cache_ttl_secs,
1057
+ extraction_timeout_secs: extraction_timeout_secs
1054
1058
  }
1055
1059
  extracted = extract_from_hash(hash, kwargs)
1056
1060
 
@@ -1068,6 +1072,7 @@ module Kreuzberg
1068
1072
  @use_cache = params[:use_cache] ? true : false
1069
1073
  @enable_quality_processing = params[:enable_quality_processing] ? true : false
1070
1074
  @force_ocr = params[:force_ocr] ? true : false
1075
+ @force_ocr_pages = params[:force_ocr_pages]
1071
1076
  @include_document_structure = params[:include_document_structure] ? true : false
1072
1077
  @ocr = normalize_config(params[:ocr], OCR)
1073
1078
  @chunking = normalize_config(params[:chunking], Chunking)
@@ -1086,6 +1091,7 @@ module Kreuzberg
1086
1091
  @result_format = validate_result_format(params[:result_format])
1087
1092
  @cache_namespace = params[:cache_namespace]
1088
1093
  @cache_ttl_secs = params[:cache_ttl_secs]&.to_i
1094
+ @extraction_timeout_secs = params[:extraction_timeout_secs]&.to_i
1089
1095
  @security_limits = params[:security_limits]
1090
1096
  end
1091
1097
 
@@ -1118,12 +1124,14 @@ module Kreuzberg
1118
1124
  use_cache: @use_cache,
1119
1125
  enable_quality_processing: @enable_quality_processing,
1120
1126
  force_ocr: @force_ocr,
1127
+ force_ocr_pages: @force_ocr_pages,
1121
1128
  include_document_structure: @include_document_structure,
1122
1129
  max_concurrent_extractions: @max_concurrent_extractions,
1123
1130
  output_format: @output_format,
1124
1131
  result_format: @result_format,
1125
1132
  cache_namespace: @cache_namespace,
1126
- cache_ttl_secs: @cache_ttl_secs
1133
+ cache_ttl_secs: @cache_ttl_secs,
1134
+ extraction_timeout_secs: @extraction_timeout_secs
1127
1135
  }
1128
1136
  end
1129
1137
 
@@ -1250,6 +1258,8 @@ module Kreuzberg
1250
1258
  @enable_quality_processing = value ? true : false
1251
1259
  when :force_ocr
1252
1260
  @force_ocr = value ? true : false
1261
+ when :force_ocr_pages
1262
+ @force_ocr_pages = value
1253
1263
  when :include_document_structure
1254
1264
  @include_document_structure = value ? true : false
1255
1265
  when :ocr
@@ -1286,6 +1296,8 @@ module Kreuzberg
1286
1296
  @cache_namespace = value
1287
1297
  when :cache_ttl_secs
1288
1298
  @cache_ttl_secs = value&.to_i
1299
+ when :extraction_timeout_secs
1300
+ @extraction_timeout_secs = value&.to_i
1289
1301
  else
1290
1302
  raise ArgumentError, "Unknown configuration key: #{key}"
1291
1303
  end
@@ -1345,6 +1357,7 @@ module Kreuzberg
1345
1357
  @use_cache = merged.use_cache
1346
1358
  @enable_quality_processing = merged.enable_quality_processing
1347
1359
  @force_ocr = merged.force_ocr
1360
+ @force_ocr_pages = merged.force_ocr_pages
1348
1361
  @include_document_structure = merged.include_document_structure
1349
1362
  @ocr = merged.ocr
1350
1363
  @chunking = merged.chunking
@@ -1369,6 +1382,7 @@ module Kreuzberg
1369
1382
  @result_format = merged.result_format
1370
1383
  @cache_namespace = merged.cache_namespace
1371
1384
  @cache_ttl_secs = merged.cache_ttl_secs
1385
+ @extraction_timeout_secs = merged.extraction_timeout_secs
1372
1386
  end
1373
1387
  end
1374
1388
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.5.4'
4
+ VERSION = '4.6.1'
5
5
  end
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/kreuzberg.rbs CHANGED
@@ -481,7 +481,9 @@ module Kreuzberg
481
481
  attr_reader enable_quality_processing: bool
482
482
  attr_reader cache_namespace: String?
483
483
  attr_reader cache_ttl_secs: Integer?
484
+ attr_reader extraction_timeout_secs: Integer?
484
485
  attr_reader force_ocr: bool
486
+ attr_reader force_ocr_pages: Array[Integer]?
485
487
  attr_reader include_document_structure: bool
486
488
  attr_reader ocr: OCR?
487
489
  attr_reader chunking: Chunking?
@@ -508,6 +510,7 @@ module Kreuzberg
508
510
  ?use_cache: bool,
509
511
  ?enable_quality_processing: bool,
510
512
  ?force_ocr: bool,
513
+ ?force_ocr_pages: Array[Integer]?,
511
514
  ?include_document_structure: bool,
512
515
  ?ocr: (OCR | Hash[Symbol, untyped])?,
513
516
  ?chunking: (Chunking | Hash[Symbol, untyped])?,
@@ -525,7 +528,8 @@ module Kreuzberg
525
528
  ?output_format: String?,
526
529
  ?result_format: String?,
527
530
  ?cache_namespace: String?,
528
- ?cache_ttl_secs: Integer?
531
+ ?cache_ttl_secs: Integer?,
532
+ ?extraction_timeout_secs: Integer?
529
533
  ) -> void
530
534
  def to_h: () -> Hash[Symbol, untyped]
531
535
  def to_json: (*untyped) -> String
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.5.4
4
+ version: 4.6.1
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-23 00:00:00.000000000 Z
11
+ date: 2026-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -175,18 +175,7 @@ executables: []
175
175
  extensions: []
176
176
  extra_rdoc_files: []
177
177
  files:
178
- - ".gitignore"
179
- - ".rspec"
180
- - ".rubocop.yaml"
181
- - ".rubocop.yml"
182
- - Gemfile
183
- - Gemfile.lock
184
178
  - README.md
185
- - Rakefile
186
- - Steepfile
187
- - examples/async_patterns.rb
188
- - extconf.rb
189
- - kreuzberg.gemspec
190
179
  - lib/kreuzberg.rb
191
180
  - lib/kreuzberg/api_proxy.rb
192
181
  - lib/kreuzberg/cache_api.rb
@@ -232,9 +221,6 @@ files:
232
221
  - spec/binding/plugins/postprocessor_spec.rb
233
222
  - spec/binding/plugins/validator_spec.rb
234
223
  - spec/binding/tables_spec.rb
235
- - spec/fixtures/config.toml
236
- - spec/fixtures/config.yaml
237
- - spec/fixtures/invalid_config.toml
238
224
  - spec/serialization_spec.rb
239
225
  - spec/smoke/package_spec.rb
240
226
  - spec/spec_helper.rb
@@ -254,7 +240,6 @@ files:
254
240
  - spec/unit/config/postprocessor_config_spec.rb
255
241
  - spec/unit/config/tesseract_config_spec.rb
256
242
  - spec/unit/config/token_reduction_config_spec.rb
257
- - test/metadata_types_test.rb
258
243
  homepage: https://github.com/kreuzberg-dev/kreuzberg
259
244
  licenses:
260
245
  - MIT
data/.gitignore DELETED
@@ -1,14 +0,0 @@
1
- tmp/
2
- lib/*.bundle
3
- lib/*.dylib
4
- lib/*.so
5
- lib/*.dll
6
- lib/*.dylib
7
- lib/*.so
8
- lib/*.dll
9
-
10
- # Vendor directory for local development (symlink to ../../crates/kreuzberg)
11
- # In CI, this is replaced by the actual vendored crate files
12
- # Exception: vendor/rb-sys is patched and committed
13
- !vendor/rb-sys/
14
- vendor/
data/.rspec DELETED
@@ -1,3 +0,0 @@
1
- --require spec_helper
2
- --format documentation
3
- --color
data/.rubocop.yaml DELETED
@@ -1 +0,0 @@
1
- inherit_from: .rubocop.yml