kreuzberg 4.6.2-aarch64-linux → 4.6.3-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 406c791db1a8cb29e3ff2e89a60e0a29ed73c0ff0548338b60c3574bd5944f6f
4
- data.tar.gz: 7565bbe0708afceadc13f43b57103a03529992f705caf21f1ddaa00e68ad27d6
3
+ metadata.gz: adae55dc7f30e68a211cc0493985d0d1687b3988e76509fbffd87f91fee45207
4
+ data.tar.gz: 6071b7d76b01dc15b47a11fc5eaeb4292fbb07630d20c3ac113751bbded3de0f
5
5
  SHA512:
6
- metadata.gz: a680fd2406f8dac338a53ab303ef29d117462cbf376d48be4af48846f7d4f82d4e7999e2aeac17a404e5da7feb79f1cccfb8f240153c0c36a4274e16eeb50f6f
7
- data.tar.gz: dc1c1dc215020a65560490e159e38d6780b703bae597279cb0e3d2c369afce7a7bee06984972987d1c3a69fe2040da1cb1e2812df5c542590300dfcb09e5e7d7
6
+ metadata.gz: 1e90683694a29205d479b3cda7fb367658bf311520884f7c1faa1b4ec1d6be69dad491b620e33d521b8dea893e062c620738b0974157a096670e825a0fd1a434
7
+ data.tar.gz: ff29c19cb5b0085b84ba1a3ad9f97602ed83211e934d5eff1b8a630868a2ff7e919bba5937d7c49cb7a424fbc2239b726750cdca5c8893fdeb6cb4d98540f5fa
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.2" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.3" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -837,6 +837,25 @@ module Kreuzberg
837
837
  end
838
838
  end
839
839
 
840
+ # Email extraction configuration
841
+ #
842
+ # @example With fallback codepage
843
+ # email = Email.new(msg_fallback_codepage: 1251)
844
+ #
845
+ class Email
846
+ attr_reader :msg_fallback_codepage
847
+
848
+ def initialize(msg_fallback_codepage: nil)
849
+ @msg_fallback_codepage = msg_fallback_codepage&.to_i
850
+ end
851
+
852
+ def to_h
853
+ h = {}
854
+ h[:msg_fallback_codepage] = @msg_fallback_codepage unless @msg_fallback_codepage.nil?
855
+ h
856
+ end
857
+ end
858
+
840
859
  # Layout detection configuration
841
860
  #
842
861
  # @example Basic usage with fast preset
@@ -933,7 +952,8 @@ module Kreuzberg
933
952
  :token_reduction, :keywords, :html_options, :pages,
934
953
  :max_concurrent_extractions, :output_format, :result_format,
935
954
  :security_limits, :layout, :concurrency,
936
- :cache_namespace, :cache_ttl_secs, :extraction_timeout_secs
955
+ :cache_namespace, :cache_ttl_secs, :extraction_timeout_secs,
956
+ :max_archive_depth, :acceleration, :email
937
957
 
938
958
  # Alias for backward compatibility - image_extraction is the canonical name
939
959
  alias image_extraction images
@@ -959,6 +979,7 @@ module Kreuzberg
959
979
  postprocessor token_reduction keywords html_options pages
960
980
  max_concurrent_extractions output_format result_format
961
981
  security_limits layout concurrency cache_namespace cache_ttl_secs extraction_timeout_secs
982
+ max_archive_depth acceleration email
962
983
  ].freeze
963
984
 
964
985
  # Aliases for backward compatibility
@@ -1015,7 +1036,7 @@ module Kreuzberg
1015
1036
  new(**normalize_hash_keys(hash))
1016
1037
  end
1017
1038
 
1018
- def initialize(hash = nil,
1039
+ def initialize(hash = nil, # rubocop:disable Metrics/MethodLength
1019
1040
  use_cache: true,
1020
1041
  enable_quality_processing: true,
1021
1042
  force_ocr: false,
@@ -1039,7 +1060,10 @@ module Kreuzberg
1039
1060
  concurrency: nil,
1040
1061
  cache_namespace: nil,
1041
1062
  cache_ttl_secs: nil,
1042
- extraction_timeout_secs: nil)
1063
+ extraction_timeout_secs: nil,
1064
+ max_archive_depth: 3,
1065
+ acceleration: nil,
1066
+ email: nil)
1043
1067
  kwargs = {
1044
1068
  use_cache: use_cache, enable_quality_processing: enable_quality_processing,
1045
1069
  force_ocr: force_ocr, force_ocr_pages: force_ocr_pages,
@@ -1054,7 +1078,10 @@ module Kreuzberg
1054
1078
  concurrency: concurrency,
1055
1079
  cache_namespace: cache_namespace,
1056
1080
  cache_ttl_secs: cache_ttl_secs,
1057
- extraction_timeout_secs: extraction_timeout_secs
1081
+ extraction_timeout_secs: extraction_timeout_secs,
1082
+ max_archive_depth: max_archive_depth,
1083
+ acceleration: acceleration,
1084
+ email: email
1058
1085
  }
1059
1086
  extracted = extract_from_hash(hash, kwargs)
1060
1087
 
@@ -1086,7 +1113,10 @@ module Kreuzberg
1086
1113
  @pages = normalize_config(params[:pages], PageConfig)
1087
1114
  @layout = normalize_config(params[:layout], LayoutDetection)
1088
1115
  @concurrency = normalize_config(params[:concurrency], Concurrency)
1116
+ @acceleration = normalize_config(params[:acceleration], Acceleration)
1117
+ @email = normalize_config(params[:email], Email)
1089
1118
  @max_concurrent_extractions = params[:max_concurrent_extractions]&.to_i
1119
+ @max_archive_depth = params[:max_archive_depth]&.to_i || 3
1090
1120
  @output_format = validate_output_format(params[:output_format])
1091
1121
  @result_format = validate_result_format(params[:result_format])
1092
1122
  @cache_namespace = params[:cache_namespace]
@@ -1127,6 +1157,7 @@ module Kreuzberg
1127
1157
  force_ocr_pages: @force_ocr_pages,
1128
1158
  include_document_structure: @include_document_structure,
1129
1159
  max_concurrent_extractions: @max_concurrent_extractions,
1160
+ max_archive_depth: @max_archive_depth,
1130
1161
  output_format: @output_format,
1131
1162
  result_format: @result_format,
1132
1163
  cache_namespace: @cache_namespace,
@@ -1142,7 +1173,8 @@ module Kreuzberg
1142
1173
  image_extraction: @images&.to_h, postprocessor: @postprocessor&.to_h,
1143
1174
  token_reduction: @token_reduction&.to_h, keywords: @keywords&.to_h,
1144
1175
  html_options: @html_options&.to_h, pages: @pages&.to_h,
1145
- layout: @layout&.to_h, concurrency: @concurrency&.to_h
1176
+ layout: @layout&.to_h, concurrency: @concurrency&.to_h,
1177
+ acceleration: @acceleration&.to_h, email: @email&.to_h
1146
1178
  }
1147
1179
  end
1148
1180
 
@@ -1286,6 +1318,12 @@ module Kreuzberg
1286
1318
  @layout = normalize_config(value, LayoutDetection)
1287
1319
  when :concurrency
1288
1320
  @concurrency = normalize_config(value, Concurrency)
1321
+ when :acceleration
1322
+ @acceleration = normalize_config(value, Acceleration)
1323
+ when :email
1324
+ @email = normalize_config(value, Email)
1325
+ when :max_archive_depth
1326
+ @max_archive_depth = value&.to_i || 3
1289
1327
  when :max_concurrent_extractions
1290
1328
  @max_concurrent_extractions = value&.to_i
1291
1329
  when :output_format
@@ -1373,6 +1411,9 @@ module Kreuzberg
1373
1411
  @html_options = merged.html_options
1374
1412
  @pages = merged.pages
1375
1413
  @layout = merged.layout
1414
+ @acceleration = merged.acceleration
1415
+ @email = merged.email
1416
+ @max_archive_depth = merged.max_archive_depth
1376
1417
  end
1377
1418
 
1378
1419
  def update_output_options(merged)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.6.2'
4
+ VERSION = '4.6.3'
5
5
  end
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/kreuzberg.rbs CHANGED
@@ -459,6 +459,21 @@ module Kreuzberg
459
459
  def to_h: () -> Hash[Symbol, untyped]
460
460
  end
461
461
 
462
+ class Acceleration
463
+ attr_reader provider: String
464
+ attr_reader device_id: Integer
465
+
466
+ def initialize: (?provider: String, ?device_id: Integer) -> void
467
+ def to_h: () -> Hash[Symbol, untyped]
468
+ end
469
+
470
+ class Email
471
+ attr_reader msg_fallback_codepage: Integer?
472
+
473
+ def initialize: (?msg_fallback_codepage: Integer?) -> void
474
+ def to_h: () -> Hash[Symbol, untyped]
475
+ end
476
+
462
477
  class LayoutDetection
463
478
  attr_reader preset: String
464
479
  attr_reader confidence_threshold: Float?
@@ -497,7 +512,10 @@ module Kreuzberg
497
512
  attr_reader pages: PageConfig?
498
513
  attr_reader layout: LayoutDetection?
499
514
  attr_reader concurrency: Concurrency?
515
+ attr_reader acceleration: Acceleration?
516
+ attr_reader email: Email?
500
517
  attr_reader max_concurrent_extractions: Integer?
518
+ attr_reader max_archive_depth: Integer
501
519
  attr_reader output_format: String?
502
520
  attr_reader result_format: String?
503
521
  attr_reader security_limits: Hash[String, Integer]?
@@ -524,7 +542,10 @@ module Kreuzberg
524
542
  ?pages: (PageConfig | Hash[Symbol, untyped])?,
525
543
  ?layout: (LayoutDetection | Hash[Symbol, untyped])?,
526
544
  ?concurrency: (Concurrency | Hash[Symbol, untyped])?,
545
+ ?acceleration: (Acceleration | Hash[Symbol, untyped])?,
546
+ ?email: (Email | Hash[Symbol, untyped])?,
527
547
  ?max_concurrent_extractions: Integer?,
548
+ ?max_archive_depth: Integer,
528
549
  ?output_format: String?,
529
550
  ?result_format: String?,
530
551
  ?cache_namespace: String?,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.6.2
4
+ version: 4.6.3
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-26 00:00:00.000000000 Z
11
+ date: 2026-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler