kreuzberg 4.2.7 → 4.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +6 -5
- data/kreuzberg.gemspec +2 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/spec/binding/metadata_types_spec.rb +6 -1
- data/vendor/Cargo.toml +2 -2
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg-ffi/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +16 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2a1e253ec8ac69b4394b7f68cd0d9d2050e66885139219a664ee4cff4f7e4c2f
|
|
4
|
+
data.tar.gz: f6a6998344418328b89aa4a220a080b2708d0540ceb643a12e815fe5040c088b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c3b982490411f182ee4567e2c692c9cfc9d0f1a670db081d73ea20d707c4e8fb26a12da1e077f405cea1c85f219b3971c00ecee2ffc03bce740cfd0bbe2c65ec
|
|
7
|
+
data.tar.gz: c93cf5fb1319aac459129d5266abd02d46731574ddd741edb357ae30f1bb2a42d75eac0e5ff0dd4af8b26dc0fc0da646a1854438c0467ef299d8156f87e5d4c5
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.2.
|
|
4
|
+
kreuzberg (4.2.8)
|
|
5
|
+
rb_sys (~> 0.9.119)
|
|
5
6
|
|
|
6
7
|
GEM
|
|
7
8
|
remote: https://rubygems.org/
|
|
@@ -172,7 +173,6 @@ DEPENDENCIES
|
|
|
172
173
|
pry-byebug (~> 3.10)
|
|
173
174
|
rake (~> 13.0)
|
|
174
175
|
rake-compiler (~> 1.2)
|
|
175
|
-
rb_sys (= 0.9.119)
|
|
176
176
|
rbs (~> 3.0)
|
|
177
177
|
rspec (~> 3.12)
|
|
178
178
|
rubocop (~> 1.66)
|
|
@@ -209,7 +209,7 @@ CHECKSUMS
|
|
|
209
209
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
210
210
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
211
211
|
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
212
|
-
kreuzberg (4.2.
|
|
212
|
+
kreuzberg (4.2.8)
|
|
213
213
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
214
214
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
215
215
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.8" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -2027,10 +2027,11 @@ dependencies = [
|
|
|
2027
2027
|
|
|
2028
2028
|
[[package]]
|
|
2029
2029
|
name = "html-to-markdown-rs"
|
|
2030
|
-
version = "2.24.
|
|
2030
|
+
version = "2.24.5"
|
|
2031
2031
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2032
|
-
checksum = "
|
|
2032
|
+
checksum = "fd0940e15a42f085d4a4ed3ac20c8278af7f8d06cd44f6b7a18148e9347c68db"
|
|
2033
2033
|
dependencies = [
|
|
2034
|
+
"ahash",
|
|
2034
2035
|
"astral-tl",
|
|
2035
2036
|
"base64 0.22.1",
|
|
2036
2037
|
"html-escape",
|
|
@@ -2627,7 +2628,7 @@ dependencies = [
|
|
|
2627
2628
|
|
|
2628
2629
|
[[package]]
|
|
2629
2630
|
name = "kreuzberg"
|
|
2630
|
-
version = "4.2.
|
|
2631
|
+
version = "4.2.7"
|
|
2631
2632
|
dependencies = [
|
|
2632
2633
|
"ahash",
|
|
2633
2634
|
"async-trait",
|
|
@@ -2713,7 +2714,7 @@ dependencies = [
|
|
|
2713
2714
|
|
|
2714
2715
|
[[package]]
|
|
2715
2716
|
name = "kreuzberg-ffi"
|
|
2716
|
-
version = "4.2.
|
|
2717
|
+
version = "4.2.7"
|
|
2717
2718
|
dependencies = [
|
|
2718
2719
|
"ahash",
|
|
2719
2720
|
"async-trait",
|
|
@@ -2771,7 +2772,7 @@ dependencies = [
|
|
|
2771
2772
|
|
|
2772
2773
|
[[package]]
|
|
2773
2774
|
name = "kreuzberg-tesseract"
|
|
2774
|
-
version = "4.2.
|
|
2775
|
+
version = "4.2.7"
|
|
2775
2776
|
dependencies = [
|
|
2776
2777
|
"cc",
|
|
2777
2778
|
"cmake",
|
data/kreuzberg.gemspec
CHANGED
|
@@ -195,10 +195,11 @@ Gem::Specification.new do |spec|
|
|
|
195
195
|
spec.require_paths = ['lib']
|
|
196
196
|
spec.extensions = ['ext/kreuzberg_rb/extconf.rb']
|
|
197
197
|
|
|
198
|
+
spec.add_dependency 'rb_sys', '~> 0.9.119'
|
|
199
|
+
|
|
198
200
|
spec.add_development_dependency 'bundler', '~> 4.0'
|
|
199
201
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
200
202
|
spec.add_development_dependency 'rake-compiler', '~> 1.2'
|
|
201
|
-
spec.add_development_dependency 'rb_sys', '0.9.119'
|
|
202
203
|
spec.add_development_dependency 'rspec', '~> 3.12'
|
|
203
204
|
spec.add_development_dependency 'sorbet-runtime', '~> 0.5'
|
|
204
205
|
unless Gem.win_platform?
|
data/lib/kreuzberg/version.rb
CHANGED
|
@@ -1174,6 +1174,7 @@ RSpec.describe 'Kreuzberg Metadata Types' do
|
|
|
1174
1174
|
end
|
|
1175
1175
|
|
|
1176
1176
|
def create_concurrent_test_files
|
|
1177
|
+
@concurrent_tempfiles = []
|
|
1177
1178
|
test_files = []
|
|
1178
1179
|
5.times do |i|
|
|
1179
1180
|
html_content = <<~HTML
|
|
@@ -1191,7 +1192,11 @@ RSpec.describe 'Kreuzberg Metadata Types' do
|
|
|
1191
1192
|
</body>
|
|
1192
1193
|
</html>
|
|
1193
1194
|
HTML
|
|
1194
|
-
|
|
1195
|
+
file = Tempfile.new(['test', '.html'])
|
|
1196
|
+
file.write(html_content)
|
|
1197
|
+
file.close
|
|
1198
|
+
@concurrent_tempfiles << file
|
|
1199
|
+
test_files << file.path
|
|
1195
1200
|
end
|
|
1196
1201
|
test_files
|
|
1197
1202
|
end
|
data/vendor/Cargo.toml
CHANGED
|
@@ -3,7 +3,7 @@ members = ["kreuzberg", "kreuzberg-tesseract", "kreuzberg-ffi"]
|
|
|
3
3
|
resolver = "2"
|
|
4
4
|
|
|
5
5
|
[workspace.package]
|
|
6
|
-
version = "4.2.
|
|
6
|
+
version = "4.2.8"
|
|
7
7
|
edition = "2024"
|
|
8
8
|
rust-version = "1.91"
|
|
9
9
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -48,7 +48,7 @@ hex = "0.4.3"
|
|
|
48
48
|
toml = "0.9.11"
|
|
49
49
|
num_cpus = "1.17.0"
|
|
50
50
|
once_cell = "1.21.3"
|
|
51
|
-
html-to-markdown-rs = { version = "2.24.
|
|
51
|
+
html-to-markdown-rs = { version = "2.24.5", default-features = false }
|
|
52
52
|
reqwest = { version = "0.13.1", default-features = false, features = ["json", "rustls"] }
|
|
53
53
|
image = { version = "0.25.9", default-features = false }
|
|
54
54
|
lzma-rust2 = { version = "0.15.7" }
|
data/vendor/kreuzberg/Cargo.toml
CHANGED
data/vendor/kreuzberg/README.md
CHANGED
|
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
|
|
|
17
17
|
|
|
18
18
|
This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
|
|
19
19
|
|
|
20
|
-
> **🚀 Version 4.2.
|
|
20
|
+
> **🚀 Version 4.2.8 Release**
|
|
21
21
|
> This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
|
|
22
22
|
>
|
|
23
23
|
> **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
|
|
@@ -28,7 +28,7 @@ serde_json = { workspace = true }
|
|
|
28
28
|
serde = { workspace = true }
|
|
29
29
|
async-trait = { workspace = true }
|
|
30
30
|
tokio = { workspace = true }
|
|
31
|
-
html-to-markdown-rs = { version = "2.24.
|
|
31
|
+
html-to-markdown-rs = { version = "2.24.5", default-features = false }
|
|
32
32
|
rayon = { version = "1.11", optional = true }
|
|
33
33
|
log = "0.4"
|
|
34
34
|
ahash = "0.8"
|
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.2.
|
|
4
|
+
version: 4.2.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-02-
|
|
11
|
+
date: 2026-02-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rb_sys
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 0.9.119
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 0.9.119
|
|
13
27
|
- !ruby/object:Gem::Dependency
|
|
14
28
|
name: bundler
|
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -52,20 +66,6 @@ dependencies:
|
|
|
52
66
|
- - "~>"
|
|
53
67
|
- !ruby/object:Gem::Version
|
|
54
68
|
version: '1.2'
|
|
55
|
-
- !ruby/object:Gem::Dependency
|
|
56
|
-
name: rb_sys
|
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
|
58
|
-
requirements:
|
|
59
|
-
- - '='
|
|
60
|
-
- !ruby/object:Gem::Version
|
|
61
|
-
version: 0.9.119
|
|
62
|
-
type: :development
|
|
63
|
-
prerelease: false
|
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
-
requirements:
|
|
66
|
-
- - '='
|
|
67
|
-
- !ruby/object:Gem::Version
|
|
68
|
-
version: 0.9.119
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: rspec
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|