kreuzberg 4.6.2-aarch64-linux → 4.6.3-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/kreuzberg/config.rb +46 -5
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg_rb.so +0 -0
- data/sig/kreuzberg.rbs +21 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: adae55dc7f30e68a211cc0493985d0d1687b3988e76509fbffd87f91fee45207
|
|
4
|
+
data.tar.gz: 6071b7d76b01dc15b47a11fc5eaeb4292fbb07630d20c3ac113751bbded3de0f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1e90683694a29205d479b3cda7fb367658bf311520884f7c1faa1b4ec1d6be69dad491b620e33d521b8dea893e062c620738b0974157a096670e825a0fd1a434
|
|
7
|
+
data.tar.gz: ff29c19cb5b0085b84ba1a3ad9f97602ed83211e934d5eff1b8a630868a2ff7e919bba5937d7c49cb7a424fbc2239b726750cdca5c8893fdeb6cb4d98540f5fa
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.3" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
data/lib/kreuzberg/config.rb
CHANGED
|
@@ -837,6 +837,25 @@ module Kreuzberg
|
|
|
837
837
|
end
|
|
838
838
|
end
|
|
839
839
|
|
|
840
|
+
# Email extraction configuration
|
|
841
|
+
#
|
|
842
|
+
# @example With fallback codepage
|
|
843
|
+
# email = Email.new(msg_fallback_codepage: 1251)
|
|
844
|
+
#
|
|
845
|
+
class Email
|
|
846
|
+
attr_reader :msg_fallback_codepage
|
|
847
|
+
|
|
848
|
+
def initialize(msg_fallback_codepage: nil)
|
|
849
|
+
@msg_fallback_codepage = msg_fallback_codepage&.to_i
|
|
850
|
+
end
|
|
851
|
+
|
|
852
|
+
def to_h
|
|
853
|
+
h = {}
|
|
854
|
+
h[:msg_fallback_codepage] = @msg_fallback_codepage unless @msg_fallback_codepage.nil?
|
|
855
|
+
h
|
|
856
|
+
end
|
|
857
|
+
end
|
|
858
|
+
|
|
840
859
|
# Layout detection configuration
|
|
841
860
|
#
|
|
842
861
|
# @example Basic usage with fast preset
|
|
@@ -933,7 +952,8 @@ module Kreuzberg
|
|
|
933
952
|
:token_reduction, :keywords, :html_options, :pages,
|
|
934
953
|
:max_concurrent_extractions, :output_format, :result_format,
|
|
935
954
|
:security_limits, :layout, :concurrency,
|
|
936
|
-
:cache_namespace, :cache_ttl_secs, :extraction_timeout_secs
|
|
955
|
+
:cache_namespace, :cache_ttl_secs, :extraction_timeout_secs,
|
|
956
|
+
:max_archive_depth, :acceleration, :email
|
|
937
957
|
|
|
938
958
|
# Alias for backward compatibility - image_extraction is the canonical name
|
|
939
959
|
alias image_extraction images
|
|
@@ -959,6 +979,7 @@ module Kreuzberg
|
|
|
959
979
|
postprocessor token_reduction keywords html_options pages
|
|
960
980
|
max_concurrent_extractions output_format result_format
|
|
961
981
|
security_limits layout concurrency cache_namespace cache_ttl_secs extraction_timeout_secs
|
|
982
|
+
max_archive_depth acceleration email
|
|
962
983
|
].freeze
|
|
963
984
|
|
|
964
985
|
# Aliases for backward compatibility
|
|
@@ -1015,7 +1036,7 @@ module Kreuzberg
|
|
|
1015
1036
|
new(**normalize_hash_keys(hash))
|
|
1016
1037
|
end
|
|
1017
1038
|
|
|
1018
|
-
def initialize(hash = nil,
|
|
1039
|
+
def initialize(hash = nil, # rubocop:disable Metrics/MethodLength
|
|
1019
1040
|
use_cache: true,
|
|
1020
1041
|
enable_quality_processing: true,
|
|
1021
1042
|
force_ocr: false,
|
|
@@ -1039,7 +1060,10 @@ module Kreuzberg
|
|
|
1039
1060
|
concurrency: nil,
|
|
1040
1061
|
cache_namespace: nil,
|
|
1041
1062
|
cache_ttl_secs: nil,
|
|
1042
|
-
extraction_timeout_secs: nil
|
|
1063
|
+
extraction_timeout_secs: nil,
|
|
1064
|
+
max_archive_depth: 3,
|
|
1065
|
+
acceleration: nil,
|
|
1066
|
+
email: nil)
|
|
1043
1067
|
kwargs = {
|
|
1044
1068
|
use_cache: use_cache, enable_quality_processing: enable_quality_processing,
|
|
1045
1069
|
force_ocr: force_ocr, force_ocr_pages: force_ocr_pages,
|
|
@@ -1054,7 +1078,10 @@ module Kreuzberg
|
|
|
1054
1078
|
concurrency: concurrency,
|
|
1055
1079
|
cache_namespace: cache_namespace,
|
|
1056
1080
|
cache_ttl_secs: cache_ttl_secs,
|
|
1057
|
-
extraction_timeout_secs: extraction_timeout_secs
|
|
1081
|
+
extraction_timeout_secs: extraction_timeout_secs,
|
|
1082
|
+
max_archive_depth: max_archive_depth,
|
|
1083
|
+
acceleration: acceleration,
|
|
1084
|
+
email: email
|
|
1058
1085
|
}
|
|
1059
1086
|
extracted = extract_from_hash(hash, kwargs)
|
|
1060
1087
|
|
|
@@ -1086,7 +1113,10 @@ module Kreuzberg
|
|
|
1086
1113
|
@pages = normalize_config(params[:pages], PageConfig)
|
|
1087
1114
|
@layout = normalize_config(params[:layout], LayoutDetection)
|
|
1088
1115
|
@concurrency = normalize_config(params[:concurrency], Concurrency)
|
|
1116
|
+
@acceleration = normalize_config(params[:acceleration], Acceleration)
|
|
1117
|
+
@email = normalize_config(params[:email], Email)
|
|
1089
1118
|
@max_concurrent_extractions = params[:max_concurrent_extractions]&.to_i
|
|
1119
|
+
@max_archive_depth = params[:max_archive_depth]&.to_i || 3
|
|
1090
1120
|
@output_format = validate_output_format(params[:output_format])
|
|
1091
1121
|
@result_format = validate_result_format(params[:result_format])
|
|
1092
1122
|
@cache_namespace = params[:cache_namespace]
|
|
@@ -1127,6 +1157,7 @@ module Kreuzberg
|
|
|
1127
1157
|
force_ocr_pages: @force_ocr_pages,
|
|
1128
1158
|
include_document_structure: @include_document_structure,
|
|
1129
1159
|
max_concurrent_extractions: @max_concurrent_extractions,
|
|
1160
|
+
max_archive_depth: @max_archive_depth,
|
|
1130
1161
|
output_format: @output_format,
|
|
1131
1162
|
result_format: @result_format,
|
|
1132
1163
|
cache_namespace: @cache_namespace,
|
|
@@ -1142,7 +1173,8 @@ module Kreuzberg
|
|
|
1142
1173
|
image_extraction: @images&.to_h, postprocessor: @postprocessor&.to_h,
|
|
1143
1174
|
token_reduction: @token_reduction&.to_h, keywords: @keywords&.to_h,
|
|
1144
1175
|
html_options: @html_options&.to_h, pages: @pages&.to_h,
|
|
1145
|
-
layout: @layout&.to_h, concurrency: @concurrency&.to_h
|
|
1176
|
+
layout: @layout&.to_h, concurrency: @concurrency&.to_h,
|
|
1177
|
+
acceleration: @acceleration&.to_h, email: @email&.to_h
|
|
1146
1178
|
}
|
|
1147
1179
|
end
|
|
1148
1180
|
|
|
@@ -1286,6 +1318,12 @@ module Kreuzberg
|
|
|
1286
1318
|
@layout = normalize_config(value, LayoutDetection)
|
|
1287
1319
|
when :concurrency
|
|
1288
1320
|
@concurrency = normalize_config(value, Concurrency)
|
|
1321
|
+
when :acceleration
|
|
1322
|
+
@acceleration = normalize_config(value, Acceleration)
|
|
1323
|
+
when :email
|
|
1324
|
+
@email = normalize_config(value, Email)
|
|
1325
|
+
when :max_archive_depth
|
|
1326
|
+
@max_archive_depth = value&.to_i || 3
|
|
1289
1327
|
when :max_concurrent_extractions
|
|
1290
1328
|
@max_concurrent_extractions = value&.to_i
|
|
1291
1329
|
when :output_format
|
|
@@ -1373,6 +1411,9 @@ module Kreuzberg
|
|
|
1373
1411
|
@html_options = merged.html_options
|
|
1374
1412
|
@pages = merged.pages
|
|
1375
1413
|
@layout = merged.layout
|
|
1414
|
+
@acceleration = merged.acceleration
|
|
1415
|
+
@email = merged.email
|
|
1416
|
+
@max_archive_depth = merged.max_archive_depth
|
|
1376
1417
|
end
|
|
1377
1418
|
|
|
1378
1419
|
def update_output_options(merged)
|
data/lib/kreuzberg/version.rb
CHANGED
data/lib/kreuzberg_rb.so
CHANGED
|
Binary file
|
data/sig/kreuzberg.rbs
CHANGED
|
@@ -459,6 +459,21 @@ module Kreuzberg
|
|
|
459
459
|
def to_h: () -> Hash[Symbol, untyped]
|
|
460
460
|
end
|
|
461
461
|
|
|
462
|
+
class Acceleration
|
|
463
|
+
attr_reader provider: String
|
|
464
|
+
attr_reader device_id: Integer
|
|
465
|
+
|
|
466
|
+
def initialize: (?provider: String, ?device_id: Integer) -> void
|
|
467
|
+
def to_h: () -> Hash[Symbol, untyped]
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
class Email
|
|
471
|
+
attr_reader msg_fallback_codepage: Integer?
|
|
472
|
+
|
|
473
|
+
def initialize: (?msg_fallback_codepage: Integer?) -> void
|
|
474
|
+
def to_h: () -> Hash[Symbol, untyped]
|
|
475
|
+
end
|
|
476
|
+
|
|
462
477
|
class LayoutDetection
|
|
463
478
|
attr_reader preset: String
|
|
464
479
|
attr_reader confidence_threshold: Float?
|
|
@@ -497,7 +512,10 @@ module Kreuzberg
|
|
|
497
512
|
attr_reader pages: PageConfig?
|
|
498
513
|
attr_reader layout: LayoutDetection?
|
|
499
514
|
attr_reader concurrency: Concurrency?
|
|
515
|
+
attr_reader acceleration: Acceleration?
|
|
516
|
+
attr_reader email: Email?
|
|
500
517
|
attr_reader max_concurrent_extractions: Integer?
|
|
518
|
+
attr_reader max_archive_depth: Integer
|
|
501
519
|
attr_reader output_format: String?
|
|
502
520
|
attr_reader result_format: String?
|
|
503
521
|
attr_reader security_limits: Hash[String, Integer]?
|
|
@@ -524,7 +542,10 @@ module Kreuzberg
|
|
|
524
542
|
?pages: (PageConfig | Hash[Symbol, untyped])?,
|
|
525
543
|
?layout: (LayoutDetection | Hash[Symbol, untyped])?,
|
|
526
544
|
?concurrency: (Concurrency | Hash[Symbol, untyped])?,
|
|
545
|
+
?acceleration: (Acceleration | Hash[Symbol, untyped])?,
|
|
546
|
+
?email: (Email | Hash[Symbol, untyped])?,
|
|
527
547
|
?max_concurrent_extractions: Integer?,
|
|
548
|
+
?max_archive_depth: Integer,
|
|
528
549
|
?output_format: String?,
|
|
529
550
|
?result_format: String?,
|
|
530
551
|
?cache_namespace: String?,
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.6.
|
|
4
|
+
version: 4.6.3
|
|
5
5
|
platform: aarch64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|