kreuzberg 4.5.1-aarch64-linux → 4.5.4-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a4f7421dd696f83a87681b1250e5a2271dd4dbad4cf31999b272921c87a6cbc3
4
- data.tar.gz: a9fb76d70dedec40e317f36c432a715a5076ac9b2cb66a5e1030900003dfcb73
3
+ metadata.gz: dfd3586f04b1b2b6dcdf06af30712755fd3fd8d76c7bc03cee05a67133134998
4
+ data.tar.gz: c402149a2765063da19d639eb2aa2b75340db6292b680c9218bc826d5997346c
5
5
  SHA512:
6
- metadata.gz: efaec6320e12b8f8d56016d26067a8ba5de8a92e2aee51ffc530e189fc5df6c0fc14125fae03cd6bfcd6b5e8386066741b089170257d20e047a28592652a1019
7
- data.tar.gz: 3fd93ff62a4d0e63b782e4da4f93288a2705a5d84907910fc475062861e7c32a4dbe9cd1d3d8aaa194d7f977891b653cfc19ee38a35ded03ed94aace011dc419
6
+ metadata.gz: aa8f6ba036c180200182377b90b15f293cf008b440c49d732d5dfa84df203373b173b9928138d58917fd7dddfe41c3b4ea89cab582c69707253ed87163932349
7
+ data.tar.gz: 211e120a58693b5c385ff0c577391d542a4ee9ca3e1194ca96e2e975a2636d78cdf9526b25852dad70d543142576317c2b344260f83148143d4d9ab6ac6822bd
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.5.1)
4
+ kreuzberg (4.5.4)
5
5
  rb_sys (~> 0.9.119)
6
6
  sorbet-runtime (~> 0.5)
7
7
 
@@ -134,7 +134,7 @@ GEM
134
134
  rubocop (~> 1.81)
135
135
  ruby-progressbar (1.13.0)
136
136
  securerandom (0.4.1)
137
- sorbet-runtime (0.6.13051)
137
+ sorbet-runtime (0.6.13055)
138
138
  steep (1.10.0)
139
139
  activesupport (>= 5.1)
140
140
  concurrent-ruby (>= 1.1.10)
@@ -222,7 +222,7 @@ CHECKSUMS
222
222
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
223
223
  json (2.19.2) sha256=e7e1bd318b2c37c4ceee2444841c86539bc462e81f40d134cf97826cb14e83cf
224
224
  json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
225
- kreuzberg (4.5.1)
225
+ kreuzberg (4.5.4)
226
226
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
227
227
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
228
228
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
@@ -259,7 +259,7 @@ CHECKSUMS
259
259
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
260
260
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
261
261
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
262
- sorbet-runtime (0.6.13051) sha256=ae5495bf229c5e3e5e3a2e17ac4853798d993437f5b5f12b763d462183852452
262
+ sorbet-runtime (0.6.13055) sha256=c8ae8c81310e0a28d290b11f44ddca59659b7d7f13752c0ef5d16964bbb84d18
263
263
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
264
264
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
265
265
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.1" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.4" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -61,7 +61,7 @@
61
61
  </div>
62
62
 
63
63
 
64
- Extract text, tables, images, and metadata from 88+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
64
+ Extract text, tables, images, and metadata from 91+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
65
65
 
66
66
 
67
67
  ## Installation
@@ -211,9 +211,9 @@ puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
211
211
 
212
212
  ## Features
213
213
 
214
- ### Supported File Formats (88+)
214
+ ### Supported File Formats (91+)
215
215
 
216
- 88+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
216
+ 91+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
217
217
 
218
218
  #### Office Documents
219
219
 
@@ -850,19 +850,21 @@ module Kreuzberg
850
850
  # )
851
851
  #
852
852
  class LayoutDetection
853
- attr_reader :preset, :confidence_threshold, :apply_heuristics
853
+ attr_reader :preset, :confidence_threshold, :apply_heuristics, :table_model
854
854
 
855
- def initialize(preset: 'fast', confidence_threshold: nil, apply_heuristics: true)
855
+ def initialize(preset: 'fast', confidence_threshold: nil, apply_heuristics: true, table_model: nil)
856
856
  @preset = preset.to_s
857
857
  @confidence_threshold = confidence_threshold&.to_f
858
858
  @apply_heuristics = apply_heuristics ? true : false
859
+ @table_model = table_model&.to_s
859
860
  end
860
861
 
861
862
  def to_h
862
863
  {
863
864
  preset: @preset,
864
865
  confidence_threshold: @confidence_threshold,
865
- apply_heuristics: @apply_heuristics
866
+ apply_heuristics: @apply_heuristics,
867
+ table_model: @table_model
866
868
  }.compact
867
869
  end
868
870
  end
@@ -930,7 +932,8 @@ module Kreuzberg
930
932
  :images, :postprocessor,
931
933
  :token_reduction, :keywords, :html_options, :pages,
932
934
  :max_concurrent_extractions, :output_format, :result_format,
933
- :security_limits, :layout, :concurrency
935
+ :security_limits, :layout, :concurrency,
936
+ :cache_namespace, :cache_ttl_secs
934
937
 
935
938
  # Alias for backward compatibility - image_extraction is the canonical name
936
939
  alias image_extraction images
@@ -955,7 +958,7 @@ module Kreuzberg
955
958
  language_detection pdf_options image_extraction
956
959
  postprocessor token_reduction keywords html_options pages
957
960
  max_concurrent_extractions output_format result_format
958
- security_limits layout concurrency
961
+ security_limits layout concurrency cache_namespace cache_ttl_secs
959
962
  ].freeze
960
963
 
961
964
  # Aliases for backward compatibility
@@ -1032,7 +1035,9 @@ module Kreuzberg
1032
1035
  result_format: nil,
1033
1036
  security_limits: nil,
1034
1037
  layout: nil,
1035
- concurrency: nil)
1038
+ concurrency: nil,
1039
+ cache_namespace: nil,
1040
+ cache_ttl_secs: nil)
1036
1041
  kwargs = {
1037
1042
  use_cache: use_cache, enable_quality_processing: enable_quality_processing,
1038
1043
  force_ocr: force_ocr, include_document_structure: include_document_structure,
@@ -1043,7 +1048,9 @@ module Kreuzberg
1043
1048
  pages: pages, max_concurrent_extractions: max_concurrent_extractions,
1044
1049
  output_format: output_format, result_format: result_format,
1045
1050
  security_limits: security_limits, layout: layout,
1046
- concurrency: concurrency
1051
+ concurrency: concurrency,
1052
+ cache_namespace: cache_namespace,
1053
+ cache_ttl_secs: cache_ttl_secs
1047
1054
  }
1048
1055
  extracted = extract_from_hash(hash, kwargs)
1049
1056
 
@@ -1077,6 +1084,8 @@ module Kreuzberg
1077
1084
  @max_concurrent_extractions = params[:max_concurrent_extractions]&.to_i
1078
1085
  @output_format = validate_output_format(params[:output_format])
1079
1086
  @result_format = validate_result_format(params[:result_format])
1087
+ @cache_namespace = params[:cache_namespace]
1088
+ @cache_ttl_secs = params[:cache_ttl_secs]&.to_i
1080
1089
  @security_limits = params[:security_limits]
1081
1090
  end
1082
1091
 
@@ -1112,7 +1121,9 @@ module Kreuzberg
1112
1121
  include_document_structure: @include_document_structure,
1113
1122
  max_concurrent_extractions: @max_concurrent_extractions,
1114
1123
  output_format: @output_format,
1115
- result_format: @result_format
1124
+ result_format: @result_format,
1125
+ cache_namespace: @cache_namespace,
1126
+ cache_ttl_secs: @cache_ttl_secs
1116
1127
  }
1117
1128
  end
1118
1129
 
@@ -1271,6 +1282,10 @@ module Kreuzberg
1271
1282
  @output_format = validate_output_format(value)
1272
1283
  when :result_format
1273
1284
  @result_format = validate_result_format(value)
1285
+ when :cache_namespace
1286
+ @cache_namespace = value
1287
+ when :cache_ttl_secs
1288
+ @cache_ttl_secs = value&.to_i
1274
1289
  else
1275
1290
  raise ArgumentError, "Unknown configuration key: #{key}"
1276
1291
  end
@@ -1352,6 +1367,8 @@ module Kreuzberg
1352
1367
  @max_concurrent_extractions = merged.max_concurrent_extractions
1353
1368
  @output_format = merged.output_format
1354
1369
  @result_format = merged.result_format
1370
+ @cache_namespace = merged.cache_namespace
1371
+ @cache_ttl_secs = merged.cache_ttl_secs
1355
1372
  end
1356
1373
  end
1357
1374
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.5.1'
4
+ VERSION = '4.5.4'
5
5
  end
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/kreuzberg.rbs CHANGED
@@ -463,8 +463,9 @@ module Kreuzberg
463
463
  attr_reader preset: String
464
464
  attr_reader confidence_threshold: Float?
465
465
  attr_reader apply_heuristics: bool
466
+ attr_reader table_model: String?
466
467
 
467
- def initialize: (?preset: String, ?confidence_threshold: Float?, ?apply_heuristics: bool) -> void
468
+ def initialize: (?preset: String, ?confidence_threshold: Float?, ?apply_heuristics: bool, ?table_model: String?) -> void
468
469
  def to_h: () -> Hash[Symbol, untyped]
469
470
  end
470
471
 
@@ -478,6 +479,8 @@ module Kreuzberg
478
479
  class Extraction
479
480
  attr_reader use_cache: bool
480
481
  attr_reader enable_quality_processing: bool
482
+ attr_reader cache_namespace: String?
483
+ attr_reader cache_ttl_secs: Integer?
481
484
  attr_reader force_ocr: bool
482
485
  attr_reader include_document_structure: bool
483
486
  attr_reader ocr: OCR?
@@ -520,7 +523,9 @@ module Kreuzberg
520
523
  ?concurrency: (Concurrency | Hash[Symbol, untyped])?,
521
524
  ?max_concurrent_extractions: Integer?,
522
525
  ?output_format: String?,
523
- ?result_format: String?
526
+ ?result_format: String?,
527
+ ?cache_namespace: String?,
528
+ ?cache_ttl_secs: Integer?
524
529
  ) -> void
525
530
  def to_h: () -> Hash[Symbol, untyped]
526
531
  def to_json: (*untyped) -> String
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.5.1
4
+ version: 4.5.4
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-21 00:00:00.000000000 Z
11
+ date: 2026-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler