kreuzberg 4.4.4-aarch64-linux → 4.4.6-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e3e805d656ad3069d15a0daf5c0b4fe5bfd39c9e6770c912d8d6a32dad6fa43c
4
- data.tar.gz: 52b2eaf315ccd95a1825f53fb714c15788e3ebc953b8563424f5012090f38f97
3
+ metadata.gz: b52fd8ab029d63ee54f697c758806e2f32566d3b112cf48865f59f9ddb9b4d5f
4
+ data.tar.gz: 65c4f41bd2157f5d79e86fc9bda2b8389a991ffaed9568f602700e95adef55e9
5
5
  SHA512:
6
- metadata.gz: 40501bcd37865f165bf90c375e49ff5209e8bd4d597b7d274ed628bce8c505fdfccc972dc9ed626c1ad272ba200a54c0c177595ff1bdb63dd63fb24e036df897
7
- data.tar.gz: bc4fad7dd2301d04c5ba4dd54bf29e33fba37035c7e7618ac3f7deaf700b7d6b6354d3ee1fb7241c3ecef2f67303a596327f449fb4d09c01a1c228161484c64d
6
+ metadata.gz: b2c65f9ea098867bda920c3732f6b81c4e52c35aa30d25f15f5755808c34b97a780d35e2f39fc05273658b629c3276577878b4cda1893aa901ccf801e7f39953
7
+ data.tar.gz: 5837f3f6e04712ad82d02bc121e3cb483d28a8b5b32a27f6b3af4f57e8ba875a339fc8e63d1fbc5a466e96d78968b63b5a303a9a58c812e4064e367094fbc511
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.4.4)
4
+ kreuzberg (4.4.6)
5
5
  rb_sys (~> 0.9.119)
6
6
  sorbet-runtime (~> 0.5)
7
7
 
@@ -49,7 +49,7 @@ GEM
49
49
  i18n (1.14.8)
50
50
  concurrent-ruby (~> 1.0)
51
51
  io-console (0.8.2)
52
- json (2.19.0)
52
+ json (2.19.1)
53
53
  json-schema (6.2.0)
54
54
  addressable (~> 2.8)
55
55
  bigdecimal (>= 3.1, < 5)
@@ -122,7 +122,7 @@ GEM
122
122
  rubocop-ast (>= 1.49.0, < 2.0)
123
123
  ruby-progressbar (~> 1.7)
124
124
  unicode-display_width (>= 2.4.0, < 4.0)
125
- rubocop-ast (1.49.0)
125
+ rubocop-ast (1.49.1)
126
126
  parser (>= 3.3.7.2)
127
127
  prism (~> 1.7)
128
128
  rubocop-performance (1.26.1)
@@ -134,7 +134,7 @@ GEM
134
134
  rubocop (~> 1.81)
135
135
  ruby-progressbar (1.13.0)
136
136
  securerandom (0.4.1)
137
- sorbet-runtime (0.6.12997)
137
+ sorbet-runtime (0.6.13023)
138
138
  steep (1.10.0)
139
139
  activesupport (>= 5.1)
140
140
  concurrent-ruby (>= 1.1.10)
@@ -220,9 +220,9 @@ CHECKSUMS
220
220
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
221
221
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
222
222
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
223
- json (2.19.0) sha256=bc5202f083618b3af7aba3184146ec9d820f8f6de261838b577173475e499d9a
223
+ json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
224
224
  json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
225
- kreuzberg (4.4.4)
225
+ kreuzberg (4.4.6)
226
226
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
227
227
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
228
228
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
@@ -254,12 +254,12 @@ CHECKSUMS
254
254
  rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
255
255
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
256
256
  rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
257
- rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
257
+ rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
258
258
  rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
259
259
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
260
260
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
261
261
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
262
- sorbet-runtime (0.6.12997) sha256=5e84f6168c10e15b994fccb808ba64bbb8b3b027ea7bf083a9a3815a8b765c3f
262
+ sorbet-runtime (0.6.13023) sha256=c00d11cc54951efbc0aece994dd6b20b1d1cb2a2606100c24d4ae7f840383073
263
263
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
264
264
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
265
265
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.4" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.6" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -33,12 +33,12 @@
33
33
  <a href="https://rubygems.org/gems/kreuzberg">
34
34
  <img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
35
35
  </a>
36
+ <a href="https://kreuzberg-dev.r-universe.dev/kreuzberg">
37
+ <img src="https://img.shields.io/badge/R-kreuzberg-007ec6" alt="R">
38
+ </a>
36
39
  <a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
37
40
  <img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
38
41
  </a>
39
- <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
40
- <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C">
41
- </a>
42
42
 
43
43
  <!-- Project Info -->
44
44
  <a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
@@ -58,7 +58,7 @@
58
58
  </div>
59
59
 
60
60
 
61
- Extract text, tables, images, and metadata from 75+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
61
+ Extract text, tables, images, and metadata from 88+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
62
62
 
63
63
 
64
64
  ## Installation
@@ -91,7 +91,7 @@ gem 'kreuzberg'
91
91
 
92
92
  - **Ruby 3.2.0 or higher** required (including Ruby 4.x)
93
93
  - Ruby 4.0+ is fully supported with no code changes required
94
- - Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.24+ for embeddings support
94
+ - Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.22.x for embeddings support
95
95
  - Optional: [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for OCR functionality
96
96
 
97
97
  **Ruby 4.0 Compatibility:** Kreuzberg is fully compatible with Ruby 4.0 (released December 25, 2025) and all Ruby 4.x versions. All tests pass with 100% compatibility. The gem compiles without any breaking changes. Key Ruby 4.0 features like Ruby Box, ZJIT compiler, and Ractor improvements work seamlessly with Kreuzberg.
@@ -188,7 +188,7 @@ config = Kreuzberg::Config::Extraction.new(
188
188
  result = Kreuzberg.extract_file_sync('contract.pdf', config: config)
189
189
 
190
190
  puts "Extracted #{result.content.length} characters"
191
- puts "Quality score: #{result.metadata&.dig('quality_score')}"
191
+ puts "Quality score: #{result.quality_score}"
192
192
  puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
193
193
  ```
194
194
 
@@ -208,19 +208,21 @@ puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
208
208
 
209
209
  ## Features
210
210
 
211
- ### Supported File Formats (75+)
211
+ ### Supported File Formats (88+)
212
212
 
213
- 75+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
213
+ 88+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
214
214
 
215
215
  #### Office Documents
216
216
 
217
217
  | Category | Formats | Capabilities |
218
218
  |----------|---------|--------------|
219
- | **Word Processing** | `.docx`, `.odt` | Full text, tables, images, metadata, styles |
220
- | **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.ods` | Sheet data, formulas, cell metadata, charts |
221
- | **Presentations** | `.pptx`, `.ppt`, `.ppsx` | Slides, speaker notes, images, metadata |
219
+ | **Word Processing** | `.docx`, `.docm`, `.dotx`, `.dotm`, `.dot`, `.odt` | Full text, tables, images, metadata, styles |
220
+ | **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.xltx`, `.xlt`, `.ods` | Sheet data, formulas, cell metadata, charts |
221
+ | **Presentations** | `.pptx`, `.pptm`, `.ppsx`, `.potx`, `.potm`, `.pot`, `.ppt` | Slides, speaker notes, images, metadata |
222
222
  | **PDF** | `.pdf` | Text, tables, images, metadata, OCR support |
223
223
  | **eBooks** | `.epub`, `.fb2` | Chapters, metadata, embedded resources |
224
+ | **Database** | `.dbf` | Table data extraction, field type support |
225
+ | **Hangul** | `.hwp`, `.hwpx` | Korean document format, text extraction |
224
226
 
225
227
  #### Images (OCR-Enabled)
226
228
 
@@ -334,7 +336,7 @@ config = Kreuzberg::Config::Extraction.new(
334
336
  result = Kreuzberg.extract_file_sync('contract.pdf', config: config)
335
337
 
336
338
  puts "Extracted #{result.content.length} characters"
337
- puts "Quality score: #{result.metadata&.dig('quality_score')}"
339
+ puts "Quality score: #{result.quality_score}"
338
340
  puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
339
341
  ```
340
342
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.4.4'
4
+ VERSION = '4.4.6'
5
5
  end
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/kreuzberg.rbs CHANGED
@@ -259,6 +259,9 @@ module Kreuzberg
259
259
  attr_reader preset: String?
260
260
  attr_reader embedding: Embedding?
261
261
  attr_reader enabled: bool?
262
+ attr_reader sizing_type: String?
263
+ attr_reader sizing_model: String?
264
+ attr_reader sizing_cache_dir: String?
262
265
 
263
266
  def initialize: (
264
267
  ?max_chars: Integer?,
@@ -267,7 +270,10 @@ module Kreuzberg
267
270
  ?embedding: (Embedding | Hash[Symbol, untyped])?,
268
271
  ?chunk_size: Integer?,
269
272
  ?chunk_overlap: Integer?,
270
- ?enabled: bool
273
+ ?enabled: bool,
274
+ ?sizing_type: String?,
275
+ ?sizing_model: String?,
276
+ ?sizing_cache_dir: String?
271
277
  ) -> void
272
278
  def to_h: () -> Hash[Symbol, untyped]
273
279
  end
@@ -767,6 +773,21 @@ module Kreuzberg
767
773
  def to_h: () -> table_hash
768
774
  end
769
775
 
776
+ # Heading level in the document hierarchy
777
+ class HeadingLevel
778
+ attr_reader level: Integer
779
+ attr_reader text: String
780
+
781
+ def initialize: (level: Integer, text: String) -> void
782
+ end
783
+
784
+ # Heading context for a chunk's section
785
+ class HeadingContext
786
+ attr_reader headings: Array[HeadingLevel]
787
+
788
+ def initialize: (headings: Array[HeadingLevel]) -> void
789
+ end
790
+
770
791
  # Text chunk (Struct from result.rb)
771
792
  class Chunk
772
793
  attr_reader content: String
@@ -778,6 +799,7 @@ module Kreuzberg
778
799
  attr_reader first_page: Integer?
779
800
  attr_reader last_page: Integer?
780
801
  attr_reader embedding: Array[Float]?
802
+ attr_reader heading_context: HeadingContext?
781
803
 
782
804
  def initialize: (
783
805
  content: String,
@@ -788,7 +810,8 @@ module Kreuzberg
788
810
  total_chunks: Integer,
789
811
  first_page: Integer?,
790
812
  last_page: Integer?,
791
- embedding: Array[Float]?
813
+ embedding: Array[Float]?,
814
+ heading_context: HeadingContext?
792
815
  ) -> void
793
816
  def to_h: () -> chunk_hash
794
817
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.4.4
4
+ version: 4.4.6
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-07 00:00:00.000000000 Z
11
+ date: 2026-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler