kreuzberg 4.2.3 → 4.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c6fc44b151014f7e56c82bd191f55244a4294a259b24b95fc494dba6f8eaba6
4
- data.tar.gz: 6e40a732814ff3e2a164e718cdb1c7a6ae838b2b2210a66b232f8675c7f79a80
3
+ metadata.gz: 037c280a2c425e3e2f9f0664d63ec8760ec5751714c7cb3178b1d8d8dd004999
4
+ data.tar.gz: 518f80241e0c7b4515276b37261fc20114f3928740e2809e3a5519a6a3ae11a1
5
5
  SHA512:
6
- metadata.gz: f9c3a45f31c3ad9e3857872d8705b397b40c4317844ef421f4da4c2918e57411f5a626df4f6706d7db4916f33b8644c736e7b41508b398fd0197f1a87170fa3c
7
- data.tar.gz: 8b05a75be261dbe583c4873d9d21079efff97d6c9c0340bbd8a73a43c9d15955431f4de20cd8b4a8b7956872f52e4467c253f5da03177a1e7d3b6a10d202b59d
6
+ metadata.gz: 2f6cdde03849c18c54c587e3a500c250458b2960da207fd351b76c202842189b5eb24bd5eab5d4a5355f0faff8aeebf81fb0c5b8416aed2f75778c820f3ae000
7
+ data.tar.gz: a19437bcb6cf06382456718d1c53b06d065f905c55dd1e75ae5503203e1984417bb6f7468dde31af5b2c695ddc5d119d59204238086067c96cd15f659eaed566
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.2.3)
4
+ kreuzberg (4.2.4)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -97,7 +97,7 @@ GEM
97
97
  rspec-mocks (3.13.7)
98
98
  diff-lcs (>= 1.2.0, < 2.0)
99
99
  rspec-support (~> 3.13.0)
100
- rspec-support (3.13.6)
100
+ rspec-support (3.13.7)
101
101
  rubocop (1.84.0)
102
102
  json (~> 2.3)
103
103
  language_server-protocol (~> 3.17.0.2)
@@ -121,7 +121,7 @@ GEM
121
121
  rubocop (~> 1.81)
122
122
  ruby-progressbar (1.13.0)
123
123
  securerandom (0.4.1)
124
- sorbet-runtime (0.6.12903)
124
+ sorbet-runtime (0.6.12904)
125
125
  steep (1.10.0)
126
126
  activesupport (>= 5.1)
127
127
  concurrent-ruby (>= 1.1.10)
@@ -207,7 +207,7 @@ CHECKSUMS
207
207
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
208
208
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
209
209
  json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
210
- kreuzberg (4.2.3)
210
+ kreuzberg (4.2.4)
211
211
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
212
212
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
213
213
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
@@ -235,14 +235,14 @@ CHECKSUMS
235
235
  rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
236
236
  rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
237
237
  rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
238
- rspec-support (3.13.6) sha256=2e8de3702427eab064c9352fe74488cc12a1bfae887ad8b91cba480ec9f8afb2
238
+ rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
239
239
  rubocop (1.84.0) sha256=88dec310153bb685a879f5a7cdb601f6287b8f0ee675d9dc63a17c7204c4190a
240
240
  rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
241
241
  rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
242
242
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
243
243
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
244
244
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
245
- sorbet-runtime (0.6.12903) sha256=c23968c0dcf5a5db57f32c003fe3db7fb588c168cdd57d92ea4dceaba063118a
245
+ sorbet-runtime (0.6.12904) sha256=0bf2ea0d70de7f3896ec1db3fbdbe89be970ddc1b92406630fd7411a3a8b1bd0
246
246
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
247
247
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
248
248
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.3" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.4" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.2.3'
4
+ VERSION = '4.2.4'
5
5
  end
data/vendor/Cargo.toml CHANGED
@@ -3,7 +3,7 @@ members = ["kreuzberg", "kreuzberg-tesseract", "kreuzberg-ffi"]
3
3
  resolver = "2"
4
4
 
5
5
  [workspace.package]
6
- version = "4.2.3"
6
+ version = "4.2.4"
7
7
  edition = "2024"
8
8
  rust-version = "1.91"
9
9
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.2.3"
3
+ version = "4.2.4"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
17
17
 
18
18
  This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
19
19
 
20
- > **🚀 Version 4.2.3 Release**
20
+ > **🚀 Version 4.2.4 Release**
21
21
  > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
22
22
  >
23
23
  > **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
@@ -3,6 +3,14 @@
3
3
  use super::types::KeywordAlgorithm;
4
4
  use serde::{Deserialize, Serialize};
5
5
 
6
+ fn default_max_keywords() -> usize {
7
+ 10
8
+ }
9
+
10
+ fn default_ngram_range() -> (usize, usize) {
11
+ (1, 3)
12
+ }
13
+
6
14
  /// YAKE-specific parameters.
7
15
  #[cfg(feature = "keywords-yake")]
8
16
  #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -45,15 +53,18 @@ impl Default for RakeParams {
45
53
  #[derive(Debug, Clone, Serialize, Deserialize)]
46
54
  pub struct KeywordConfig {
47
55
  /// Algorithm to use for extraction.
56
+ #[serde(default)]
48
57
  pub algorithm: KeywordAlgorithm,
49
58
 
50
59
  /// Maximum number of keywords to extract (default: 10).
60
+ #[serde(default = "default_max_keywords")]
51
61
  pub max_keywords: usize,
52
62
 
53
63
  /// Minimum score threshold (0.0-1.0, default: 0.0).
54
64
  ///
55
65
  /// Keywords with scores below this threshold are filtered out.
56
66
  /// Note: Score ranges differ between algorithms.
67
+ #[serde(default)]
57
68
  pub min_score: f32,
58
69
 
59
70
  /// N-gram range for keyword extraction (min, max).
@@ -61,6 +72,7 @@ pub struct KeywordConfig {
61
72
  /// (1, 1) = unigrams only
62
73
  /// (1, 2) = unigrams and bigrams
63
74
  /// (1, 3) = unigrams, bigrams, and trigrams (default)
75
+ #[serde(default = "default_ngram_range")]
64
76
  pub ngram_range: (usize, usize),
65
77
 
66
78
  /// Language code for stopword filtering (e.g., "en", "de", "fr").
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg-tesseract"
3
- version = "4.2.3"
3
+ version = "4.2.4"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.3
4
+ version: 4.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-01-28 00:00:00.000000000 Z
11
+ date: 2026-01-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler