kreuzberg 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +3 -8
- data/lib/kreuzberg/cli.rb +2 -2
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg/internal.rbs +4 -4
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +5 -5
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 927d90412c1b3d60924b084adf2dd74789817c652d2c766b3c941e3bab26d598
|
|
4
|
+
data.tar.gz: 5d20205ff891669002713d94f7ee3aae478b70ff6dcf0cbff295ed39f6bd723a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0c3893e3c7bfb7ded24d28d19fa450c444e38dbcb030d8ad7e9d8284d1adcea531f11583966494e1ddb7e25d65c3448b1dd339a3b660dc76552f666224a02eaa
|
|
7
|
+
data.tar.gz: c61cfc4aab02a11861ae656496f17e6ab7b8f6f6e37a307067a39f2fcb58d920b8c1b5853b273de1361072f0a683cc56879097507a286f77364e45bf4562ae22
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.0.
|
|
4
|
+
kreuzberg (4.0.1)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
@@ -198,7 +198,7 @@ CHECKSUMS
|
|
|
198
198
|
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
|
|
199
199
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
200
200
|
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
201
|
-
kreuzberg (4.0.
|
|
201
|
+
kreuzberg (4.0.1)
|
|
202
202
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
203
203
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
204
204
|
listen (3.9.0) sha256=db9e4424e0e5834480385197c139cb6b0ae0ef28cc13310cfd1ca78377d59c67
|
data/README.md
CHANGED
|
@@ -193,8 +193,7 @@ puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
|
|
|
193
193
|
- **[Installation Guide](https://kreuzberg.dev/getting-started/installation/)** - Platform-specific setup
|
|
194
194
|
- **[API Documentation](https://kreuzberg.dev/api/)** - Complete API reference
|
|
195
195
|
- **[Examples & Guides](https://kreuzberg.dev/guides/)** - Full code examples and usage guides
|
|
196
|
-
- **[Configuration Guide](https://kreuzberg.dev/configuration/)** - Advanced configuration options
|
|
197
|
-
- **[Troubleshooting](https://kreuzberg.dev/troubleshooting/)** - Common issues and solutions
|
|
196
|
+
- **[Configuration Guide](https://kreuzberg.dev/guides/configuration/)** - Advanced configuration options
|
|
198
197
|
|
|
199
198
|
|
|
200
199
|
|
|
@@ -335,7 +334,7 @@ puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
|
|
|
335
334
|
|
|
336
335
|
Kreuzberg supports extensible post-processing plugins for custom text transformation and filtering.
|
|
337
336
|
|
|
338
|
-
For detailed plugin documentation, visit [Plugin System Guide](https://kreuzberg.dev/plugins/).
|
|
337
|
+
For detailed plugin documentation, visit [Plugin System Guide](https://kreuzberg.dev/guides/plugins/).
|
|
339
338
|
|
|
340
339
|
|
|
341
340
|
|
|
@@ -369,7 +368,7 @@ puts "Installation verified! Extracted #{result.content.length} characters"
|
|
|
369
368
|
|
|
370
369
|
For advanced configuration options including language detection, table extraction, OCR settings, and more:
|
|
371
370
|
|
|
372
|
-
**[Configuration Guide](https://kreuzberg.dev/configuration/)**
|
|
371
|
+
**[Configuration Guide](https://kreuzberg.dev/guides/configuration/)**
|
|
373
372
|
|
|
374
373
|
## Documentation
|
|
375
374
|
|
|
@@ -377,10 +376,6 @@ For advanced configuration options including language detection, table extractio
|
|
|
377
376
|
- **[API Reference](https://kreuzberg.dev/reference/api-ruby/)**
|
|
378
377
|
- **[Examples & Guides](https://kreuzberg.dev/guides/)**
|
|
379
378
|
|
|
380
|
-
## Troubleshooting
|
|
381
|
-
|
|
382
|
-
For common issues and solutions, visit [Troubleshooting Guide](https://kreuzberg.dev/troubleshooting/).
|
|
383
|
-
|
|
384
379
|
## Contributing
|
|
385
380
|
|
|
386
381
|
Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg-dev/kreuzberg/blob/main/CONTRIBUTING.md).
|
data/lib/kreuzberg/cli.rb
CHANGED
|
@@ -13,7 +13,7 @@ module Kreuzberg
|
|
|
13
13
|
# @param ocr [Boolean] Enable OCR
|
|
14
14
|
# @return [String] Extracted content
|
|
15
15
|
#
|
|
16
|
-
def extract(path
|
|
16
|
+
def extract(path:, output: 'text', ocr: false)
|
|
17
17
|
args = ['extract', path, '--format', output]
|
|
18
18
|
args.push('--ocr', ocr ? 'true' : 'false')
|
|
19
19
|
CLIProxy.call(args)
|
|
@@ -24,7 +24,7 @@ module Kreuzberg
|
|
|
24
24
|
# @param path [String] Path to the file
|
|
25
25
|
# @return [String] MIME type
|
|
26
26
|
#
|
|
27
|
-
def detect(path)
|
|
27
|
+
def detect(path:)
|
|
28
28
|
CLIProxy.call(['detect', path]).strip
|
|
29
29
|
end
|
|
30
30
|
|
data/lib/kreuzberg/version.rb
CHANGED
data/sig/kreuzberg/internal.rbs
CHANGED
|
@@ -21,10 +21,10 @@ module Kreuzberg
|
|
|
21
21
|
|
|
22
22
|
module CLI
|
|
23
23
|
# All methods are both instance and class methods due to module_function
|
|
24
|
-
def extract: (String
|
|
25
|
-
def self.extract: (String
|
|
26
|
-
def detect: (String
|
|
27
|
-
def self.detect: (String
|
|
24
|
+
def extract: (path: String, ?output: String, ?ocr: bool) -> String
|
|
25
|
+
def self.extract: (path: String, ?output: String, ?ocr: bool) -> String
|
|
26
|
+
def detect: (path: String) -> String
|
|
27
|
+
def self.detect: (path: String) -> String
|
|
28
28
|
def version: () -> String
|
|
29
29
|
def self.version: () -> String
|
|
30
30
|
def help: () -> String
|
data/vendor/Cargo.toml
CHANGED
data/vendor/kreuzberg/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg"
|
|
3
|
-
version = "4.0.
|
|
3
|
+
version = "4.0.1"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -159,7 +159,7 @@ html-to-markdown-rs = { workspace = true, features = [
|
|
|
159
159
|
"inline-images",
|
|
160
160
|
"metadata",
|
|
161
161
|
], optional = true }
|
|
162
|
-
quick-xml = { version = "0.
|
|
162
|
+
quick-xml = { version = "0.39.0", features = ["serialize"], optional = true }
|
|
163
163
|
tar = { version = "0.4.44", optional = true }
|
|
164
164
|
sevenz-rust2 = { version = "0.20.1", optional = true }
|
|
165
165
|
lzma-rust2 = { workspace = true, optional = true }
|
|
@@ -185,7 +185,7 @@ image = { workspace = true, default-features = false, features = [
|
|
|
185
185
|
], optional = true }
|
|
186
186
|
tiff = { version = "0.11", optional = true }
|
|
187
187
|
fast_image_resize = { version = "5.6.0", optional = true }
|
|
188
|
-
ndarray = { version = "0.17.
|
|
188
|
+
ndarray = { version = "0.17.2", optional = true }
|
|
189
189
|
kamadak-exif = { version = "0.6.1", optional = true }
|
|
190
190
|
whatlang = { version = "0.18.0", optional = true }
|
|
191
191
|
text-splitter = { version = "0.29.3", features = ["markdown"], optional = true }
|
|
@@ -231,7 +231,7 @@ reqwest = { workspace = true, default-features = false, features = [
|
|
|
231
231
|
"rustls",
|
|
232
232
|
], optional = true }
|
|
233
233
|
# Use rustls-tls for fastembed on non-Windows platforms
|
|
234
|
-
fastembed = { version = "5.
|
|
234
|
+
fastembed = { version = "5.8", default-features = false, features = [
|
|
235
235
|
"hf-hub-rustls-tls",
|
|
236
236
|
"ort-load-dynamic",
|
|
237
237
|
], optional = true }
|
|
@@ -245,7 +245,7 @@ reqwest = { workspace = true, default-features = false, features = [
|
|
|
245
245
|
"native-tls",
|
|
246
246
|
], optional = true }
|
|
247
247
|
# Use native-tls for fastembed on Windows
|
|
248
|
-
fastembed = { version = "5.
|
|
248
|
+
fastembed = { version = "5.8", default-features = false, features = [
|
|
249
249
|
"hf-hub-native-tls",
|
|
250
250
|
"ort-load-dynamic",
|
|
251
251
|
], optional = true }
|