kreuzberg 4.5.1-aarch64-linux → 4.5.4-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/README.md +4 -4
- data/lib/kreuzberg/config.rb +25 -8
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg_rb.so +0 -0
- data/sig/kreuzberg.rbs +7 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dfd3586f04b1b2b6dcdf06af30712755fd3fd8d76c7bc03cee05a67133134998
|
|
4
|
+
data.tar.gz: c402149a2765063da19d639eb2aa2b75340db6292b680c9218bc826d5997346c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aa8f6ba036c180200182377b90b15f293cf008b440c49d732d5dfa84df203373b173b9928138d58917fd7dddfe41c3b4ea89cab582c69707253ed87163932349
|
|
7
|
+
data.tar.gz: 211e120a58693b5c385ff0c577391d542a4ee9ca3e1194ca96e2e975a2636d78cdf9526b25852dad70d543142576317c2b344260f83148143d4d9ab6ac6822bd
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.5.
|
|
4
|
+
kreuzberg (4.5.4)
|
|
5
5
|
rb_sys (~> 0.9.119)
|
|
6
6
|
sorbet-runtime (~> 0.5)
|
|
7
7
|
|
|
@@ -134,7 +134,7 @@ GEM
|
|
|
134
134
|
rubocop (~> 1.81)
|
|
135
135
|
ruby-progressbar (1.13.0)
|
|
136
136
|
securerandom (0.4.1)
|
|
137
|
-
sorbet-runtime (0.6.
|
|
137
|
+
sorbet-runtime (0.6.13055)
|
|
138
138
|
steep (1.10.0)
|
|
139
139
|
activesupport (>= 5.1)
|
|
140
140
|
concurrent-ruby (>= 1.1.10)
|
|
@@ -222,7 +222,7 @@ CHECKSUMS
|
|
|
222
222
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
223
223
|
json (2.19.2) sha256=e7e1bd318b2c37c4ceee2444841c86539bc462e81f40d134cf97826cb14e83cf
|
|
224
224
|
json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
|
|
225
|
-
kreuzberg (4.5.
|
|
225
|
+
kreuzberg (4.5.4)
|
|
226
226
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
227
227
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
228
228
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
@@ -259,7 +259,7 @@ CHECKSUMS
|
|
|
259
259
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
260
260
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
261
261
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
262
|
-
sorbet-runtime (0.6.
|
|
262
|
+
sorbet-runtime (0.6.13055) sha256=c8ae8c81310e0a28d290b11f44ddca59659b7d7f13752c0ef5d16964bbb84d18
|
|
263
263
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
264
264
|
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
265
265
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.4" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -61,7 +61,7 @@
|
|
|
61
61
|
</div>
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
Extract text, tables, images, and metadata from
|
|
64
|
+
Extract text, tables, images, and metadata from 91+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
## Installation
|
|
@@ -211,9 +211,9 @@ puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
|
|
|
211
211
|
|
|
212
212
|
## Features
|
|
213
213
|
|
|
214
|
-
### Supported File Formats (
|
|
214
|
+
### Supported File Formats (91+)
|
|
215
215
|
|
|
216
|
-
|
|
216
|
+
91+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
|
|
217
217
|
|
|
218
218
|
#### Office Documents
|
|
219
219
|
|
data/lib/kreuzberg/config.rb
CHANGED
|
@@ -850,19 +850,21 @@ module Kreuzberg
|
|
|
850
850
|
# )
|
|
851
851
|
#
|
|
852
852
|
class LayoutDetection
|
|
853
|
-
attr_reader :preset, :confidence_threshold, :apply_heuristics
|
|
853
|
+
attr_reader :preset, :confidence_threshold, :apply_heuristics, :table_model
|
|
854
854
|
|
|
855
|
-
def initialize(preset: 'fast', confidence_threshold: nil, apply_heuristics: true)
|
|
855
|
+
def initialize(preset: 'fast', confidence_threshold: nil, apply_heuristics: true, table_model: nil)
|
|
856
856
|
@preset = preset.to_s
|
|
857
857
|
@confidence_threshold = confidence_threshold&.to_f
|
|
858
858
|
@apply_heuristics = apply_heuristics ? true : false
|
|
859
|
+
@table_model = table_model&.to_s
|
|
859
860
|
end
|
|
860
861
|
|
|
861
862
|
def to_h
|
|
862
863
|
{
|
|
863
864
|
preset: @preset,
|
|
864
865
|
confidence_threshold: @confidence_threshold,
|
|
865
|
-
apply_heuristics: @apply_heuristics
|
|
866
|
+
apply_heuristics: @apply_heuristics,
|
|
867
|
+
table_model: @table_model
|
|
866
868
|
}.compact
|
|
867
869
|
end
|
|
868
870
|
end
|
|
@@ -930,7 +932,8 @@ module Kreuzberg
|
|
|
930
932
|
:images, :postprocessor,
|
|
931
933
|
:token_reduction, :keywords, :html_options, :pages,
|
|
932
934
|
:max_concurrent_extractions, :output_format, :result_format,
|
|
933
|
-
:security_limits, :layout, :concurrency
|
|
935
|
+
:security_limits, :layout, :concurrency,
|
|
936
|
+
:cache_namespace, :cache_ttl_secs
|
|
934
937
|
|
|
935
938
|
# Alias for backward compatibility - image_extraction is the canonical name
|
|
936
939
|
alias image_extraction images
|
|
@@ -955,7 +958,7 @@ module Kreuzberg
|
|
|
955
958
|
language_detection pdf_options image_extraction
|
|
956
959
|
postprocessor token_reduction keywords html_options pages
|
|
957
960
|
max_concurrent_extractions output_format result_format
|
|
958
|
-
security_limits layout concurrency
|
|
961
|
+
security_limits layout concurrency cache_namespace cache_ttl_secs
|
|
959
962
|
].freeze
|
|
960
963
|
|
|
961
964
|
# Aliases for backward compatibility
|
|
@@ -1032,7 +1035,9 @@ module Kreuzberg
|
|
|
1032
1035
|
result_format: nil,
|
|
1033
1036
|
security_limits: nil,
|
|
1034
1037
|
layout: nil,
|
|
1035
|
-
concurrency: nil
|
|
1038
|
+
concurrency: nil,
|
|
1039
|
+
cache_namespace: nil,
|
|
1040
|
+
cache_ttl_secs: nil)
|
|
1036
1041
|
kwargs = {
|
|
1037
1042
|
use_cache: use_cache, enable_quality_processing: enable_quality_processing,
|
|
1038
1043
|
force_ocr: force_ocr, include_document_structure: include_document_structure,
|
|
@@ -1043,7 +1048,9 @@ module Kreuzberg
|
|
|
1043
1048
|
pages: pages, max_concurrent_extractions: max_concurrent_extractions,
|
|
1044
1049
|
output_format: output_format, result_format: result_format,
|
|
1045
1050
|
security_limits: security_limits, layout: layout,
|
|
1046
|
-
concurrency: concurrency
|
|
1051
|
+
concurrency: concurrency,
|
|
1052
|
+
cache_namespace: cache_namespace,
|
|
1053
|
+
cache_ttl_secs: cache_ttl_secs
|
|
1047
1054
|
}
|
|
1048
1055
|
extracted = extract_from_hash(hash, kwargs)
|
|
1049
1056
|
|
|
@@ -1077,6 +1084,8 @@ module Kreuzberg
|
|
|
1077
1084
|
@max_concurrent_extractions = params[:max_concurrent_extractions]&.to_i
|
|
1078
1085
|
@output_format = validate_output_format(params[:output_format])
|
|
1079
1086
|
@result_format = validate_result_format(params[:result_format])
|
|
1087
|
+
@cache_namespace = params[:cache_namespace]
|
|
1088
|
+
@cache_ttl_secs = params[:cache_ttl_secs]&.to_i
|
|
1080
1089
|
@security_limits = params[:security_limits]
|
|
1081
1090
|
end
|
|
1082
1091
|
|
|
@@ -1112,7 +1121,9 @@ module Kreuzberg
|
|
|
1112
1121
|
include_document_structure: @include_document_structure,
|
|
1113
1122
|
max_concurrent_extractions: @max_concurrent_extractions,
|
|
1114
1123
|
output_format: @output_format,
|
|
1115
|
-
result_format: @result_format
|
|
1124
|
+
result_format: @result_format,
|
|
1125
|
+
cache_namespace: @cache_namespace,
|
|
1126
|
+
cache_ttl_secs: @cache_ttl_secs
|
|
1116
1127
|
}
|
|
1117
1128
|
end
|
|
1118
1129
|
|
|
@@ -1271,6 +1282,10 @@ module Kreuzberg
|
|
|
1271
1282
|
@output_format = validate_output_format(value)
|
|
1272
1283
|
when :result_format
|
|
1273
1284
|
@result_format = validate_result_format(value)
|
|
1285
|
+
when :cache_namespace
|
|
1286
|
+
@cache_namespace = value
|
|
1287
|
+
when :cache_ttl_secs
|
|
1288
|
+
@cache_ttl_secs = value&.to_i
|
|
1274
1289
|
else
|
|
1275
1290
|
raise ArgumentError, "Unknown configuration key: #{key}"
|
|
1276
1291
|
end
|
|
@@ -1352,6 +1367,8 @@ module Kreuzberg
|
|
|
1352
1367
|
@max_concurrent_extractions = merged.max_concurrent_extractions
|
|
1353
1368
|
@output_format = merged.output_format
|
|
1354
1369
|
@result_format = merged.result_format
|
|
1370
|
+
@cache_namespace = merged.cache_namespace
|
|
1371
|
+
@cache_ttl_secs = merged.cache_ttl_secs
|
|
1355
1372
|
end
|
|
1356
1373
|
end
|
|
1357
1374
|
end
|
data/lib/kreuzberg/version.rb
CHANGED
data/lib/kreuzberg_rb.so
CHANGED
|
Binary file
|
data/sig/kreuzberg.rbs
CHANGED
|
@@ -463,8 +463,9 @@ module Kreuzberg
|
|
|
463
463
|
attr_reader preset: String
|
|
464
464
|
attr_reader confidence_threshold: Float?
|
|
465
465
|
attr_reader apply_heuristics: bool
|
|
466
|
+
attr_reader table_model: String?
|
|
466
467
|
|
|
467
|
-
def initialize: (?preset: String, ?confidence_threshold: Float?, ?apply_heuristics: bool) -> void
|
|
468
|
+
def initialize: (?preset: String, ?confidence_threshold: Float?, ?apply_heuristics: bool, ?table_model: String?) -> void
|
|
468
469
|
def to_h: () -> Hash[Symbol, untyped]
|
|
469
470
|
end
|
|
470
471
|
|
|
@@ -478,6 +479,8 @@ module Kreuzberg
|
|
|
478
479
|
class Extraction
|
|
479
480
|
attr_reader use_cache: bool
|
|
480
481
|
attr_reader enable_quality_processing: bool
|
|
482
|
+
attr_reader cache_namespace: String?
|
|
483
|
+
attr_reader cache_ttl_secs: Integer?
|
|
481
484
|
attr_reader force_ocr: bool
|
|
482
485
|
attr_reader include_document_structure: bool
|
|
483
486
|
attr_reader ocr: OCR?
|
|
@@ -520,7 +523,9 @@ module Kreuzberg
|
|
|
520
523
|
?concurrency: (Concurrency | Hash[Symbol, untyped])?,
|
|
521
524
|
?max_concurrent_extractions: Integer?,
|
|
522
525
|
?output_format: String?,
|
|
523
|
-
?result_format: String
|
|
526
|
+
?result_format: String?,
|
|
527
|
+
?cache_namespace: String?,
|
|
528
|
+
?cache_ttl_secs: Integer?
|
|
524
529
|
) -> void
|
|
525
530
|
def to_h: () -> Hash[Symbol, untyped]
|
|
526
531
|
def to_json: (*untyped) -> String
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.5.
|
|
4
|
+
version: 4.5.4
|
|
5
5
|
platform: aarch64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|