kreuzberg 4.2.3 → 4.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -6
- data/README.md +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/keywords/config.rs +12 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 037c280a2c425e3e2f9f0664d63ec8760ec5751714c7cb3178b1d8d8dd004999
|
|
4
|
+
data.tar.gz: 518f80241e0c7b4515276b37261fc20114f3928740e2809e3a5519a6a3ae11a1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2f6cdde03849c18c54c587e3a500c250458b2960da207fd351b76c202842189b5eb24bd5eab5d4a5355f0faff8aeebf81fb0c5b8416aed2f75778c820f3ae000
|
|
7
|
+
data.tar.gz: a19437bcb6cf06382456718d1c53b06d065f905c55dd1e75ae5503203e1984417bb6f7468dde31af5b2c695ddc5d119d59204238086067c96cd15f659eaed566
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.2.
|
|
4
|
+
kreuzberg (4.2.4)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
@@ -97,7 +97,7 @@ GEM
|
|
|
97
97
|
rspec-mocks (3.13.7)
|
|
98
98
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
99
99
|
rspec-support (~> 3.13.0)
|
|
100
|
-
rspec-support (3.13.
|
|
100
|
+
rspec-support (3.13.7)
|
|
101
101
|
rubocop (1.84.0)
|
|
102
102
|
json (~> 2.3)
|
|
103
103
|
language_server-protocol (~> 3.17.0.2)
|
|
@@ -121,7 +121,7 @@ GEM
|
|
|
121
121
|
rubocop (~> 1.81)
|
|
122
122
|
ruby-progressbar (1.13.0)
|
|
123
123
|
securerandom (0.4.1)
|
|
124
|
-
sorbet-runtime (0.6.
|
|
124
|
+
sorbet-runtime (0.6.12904)
|
|
125
125
|
steep (1.10.0)
|
|
126
126
|
activesupport (>= 5.1)
|
|
127
127
|
concurrent-ruby (>= 1.1.10)
|
|
@@ -207,7 +207,7 @@ CHECKSUMS
|
|
|
207
207
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
208
208
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
209
209
|
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
210
|
-
kreuzberg (4.2.
|
|
210
|
+
kreuzberg (4.2.4)
|
|
211
211
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
212
212
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
213
213
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
@@ -235,14 +235,14 @@ CHECKSUMS
|
|
|
235
235
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
236
236
|
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
237
237
|
rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
|
|
238
|
-
rspec-support (3.13.
|
|
238
|
+
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
|
|
239
239
|
rubocop (1.84.0) sha256=88dec310153bb685a879f5a7cdb601f6287b8f0ee675d9dc63a17c7204c4190a
|
|
240
240
|
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
241
241
|
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
|
|
242
242
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
243
243
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
244
244
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
245
|
-
sorbet-runtime (0.6.
|
|
245
|
+
sorbet-runtime (0.6.12904) sha256=0bf2ea0d70de7f3896ec1db3fbdbe89be970ddc1b92406630fd7411a3a8b1bd0
|
|
246
246
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
247
247
|
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
248
248
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.4" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
data/lib/kreuzberg/version.rb
CHANGED
data/vendor/Cargo.toml
CHANGED
data/vendor/kreuzberg/Cargo.toml
CHANGED
data/vendor/kreuzberg/README.md
CHANGED
|
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
|
|
|
17
17
|
|
|
18
18
|
This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
|
|
19
19
|
|
|
20
|
-
> **🚀 Version 4.2.
|
|
20
|
+
> **🚀 Version 4.2.4 Release**
|
|
21
21
|
> This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
|
|
22
22
|
>
|
|
23
23
|
> **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
|
|
@@ -3,6 +3,14 @@
|
|
|
3
3
|
use super::types::KeywordAlgorithm;
|
|
4
4
|
use serde::{Deserialize, Serialize};
|
|
5
5
|
|
|
6
|
+
fn default_max_keywords() -> usize {
|
|
7
|
+
10
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
fn default_ngram_range() -> (usize, usize) {
|
|
11
|
+
(1, 3)
|
|
12
|
+
}
|
|
13
|
+
|
|
6
14
|
/// YAKE-specific parameters.
|
|
7
15
|
#[cfg(feature = "keywords-yake")]
|
|
8
16
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
@@ -45,15 +53,18 @@ impl Default for RakeParams {
|
|
|
45
53
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
46
54
|
pub struct KeywordConfig {
|
|
47
55
|
/// Algorithm to use for extraction.
|
|
56
|
+
#[serde(default)]
|
|
48
57
|
pub algorithm: KeywordAlgorithm,
|
|
49
58
|
|
|
50
59
|
/// Maximum number of keywords to extract (default: 10).
|
|
60
|
+
#[serde(default = "default_max_keywords")]
|
|
51
61
|
pub max_keywords: usize,
|
|
52
62
|
|
|
53
63
|
/// Minimum score threshold (0.0-1.0, default: 0.0).
|
|
54
64
|
///
|
|
55
65
|
/// Keywords with scores below this threshold are filtered out.
|
|
56
66
|
/// Note: Score ranges differ between algorithms.
|
|
67
|
+
#[serde(default)]
|
|
57
68
|
pub min_score: f32,
|
|
58
69
|
|
|
59
70
|
/// N-gram range for keyword extraction (min, max).
|
|
@@ -61,6 +72,7 @@ pub struct KeywordConfig {
|
|
|
61
72
|
/// (1, 1) = unigrams only
|
|
62
73
|
/// (1, 2) = unigrams and bigrams
|
|
63
74
|
/// (1, 3) = unigrams, bigrams, and trigrams (default)
|
|
75
|
+
#[serde(default = "default_ngram_range")]
|
|
64
76
|
pub ngram_range: (usize, usize),
|
|
65
77
|
|
|
66
78
|
/// Language code for stopword filtering (e.g., "en", "de", "fr").
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.2.
|
|
4
|
+
version: 4.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-01-
|
|
11
|
+
date: 2026-01-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|