kreuzberg 4.4.1 → 4.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +8 -8
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +79 -1219
- data/ext/kreuzberg_rb/native/Cargo.toml +6 -6
- data/lib/kreuzberg/result.rb +8 -4
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +2 -0
- data/spec/binding/batch_operations_spec.rb +4 -3
- data/vendor/Cargo.toml +4 -4
- data/vendor/kreuzberg/Cargo.toml +11 -25
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/extraction/docx/drawing.rs +5 -2
- data/vendor/kreuzberg/src/extraction/docx/math.rs +1706 -0
- data/vendor/kreuzberg/src/extraction/docx/mod.rs +1 -0
- data/vendor/kreuzberg/src/extraction/docx/parser.rs +992 -78
- data/vendor/kreuzberg/src/extraction/docx/table.rs +12 -1
- data/vendor/kreuzberg/src/extraction/email.rs +198 -6
- data/vendor/kreuzberg/src/extraction/html/converter.rs +50 -29
- data/vendor/kreuzberg/src/extraction/image.rs +20 -2
- data/vendor/kreuzberg/src/extraction/markdown.rs +87 -0
- data/vendor/kreuzberg/src/extraction/mod.rs +2 -0
- data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +59 -36
- data/vendor/kreuzberg/src/extraction/pptx/elements.rs +2 -0
- data/vendor/kreuzberg/src/extraction/pptx/mod.rs +43 -15
- data/vendor/kreuzberg/src/extractors/bibtex.rs +3 -3
- data/vendor/kreuzberg/src/extractors/csv.rs +98 -1
- data/vendor/kreuzberg/src/extractors/docbook.rs +39 -15
- data/vendor/kreuzberg/src/extractors/docx.rs +63 -25
- data/vendor/kreuzberg/src/extractors/epub/content.rs +15 -1
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +637 -65
- data/vendor/kreuzberg/src/extractors/html.rs +12 -1
- data/vendor/kreuzberg/src/extractors/jupyter.rs +24 -8
- data/vendor/kreuzberg/src/extractors/latex/commands.rs +3 -1
- data/vendor/kreuzberg/src/extractors/latex/environments.rs +9 -4
- data/vendor/kreuzberg/src/extractors/latex/parser.rs +55 -82
- data/vendor/kreuzberg/src/extractors/latex/utilities.rs +78 -9
- data/vendor/kreuzberg/src/extractors/odt.rs +257 -36
- data/vendor/kreuzberg/src/extractors/pptx.rs +17 -3
- data/vendor/kreuzberg/src/extractors/rst.rs +31 -15
- data/vendor/kreuzberg/src/extractors/rtf/formatting.rs +24 -1
- data/vendor/kreuzberg/src/extractors/rtf/mod.rs +31 -2
- data/vendor/kreuzberg/src/extractors/rtf/parser.rs +26 -12
- data/vendor/kreuzberg/src/extractors/rtf/tables.rs +9 -4
- data/vendor/kreuzberg/src/extractors/typst.rs +81 -19
- data/vendor/kreuzberg/src/mcp/server.rs +16 -19
- data/vendor/kreuzberg/src/ocr/processor/execution.rs +62 -25
- data/vendor/kreuzberg/src/paddle_ocr/backend.rs +2 -2
- data/vendor/kreuzberg/tests/batch_orchestration.rs +4 -2
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -4
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +11 -3
- data/vendor/kreuzberg/tests/pdf_markdown_regression.rs +194 -84
- data/vendor/kreuzberg-ffi/Cargo.toml +3 -3
- data/vendor/kreuzberg-ffi/kreuzberg.h +2 -2
- data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
- data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
- data/vendor/kreuzberg-tesseract/build.rs +38 -8
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b65c17f6cf4332ce366d8da0197077c2e222a262a2da29b0681c1139918527bf
|
|
4
|
+
data.tar.gz: 1082caefd179e5eeaa44b69ad93f02458dae2e6cc2edf5aaf1dfc1eee838276c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '01496d04b7671527a8e861ac3d155a4c1ed96c75b6898d419ad021edba646e896fbd3fa46920c0a8ebf8d7eba79ab3ad83edce44261b813b16a1e6d28161fc61'
|
|
7
|
+
data.tar.gz: 64aa2e0654a3300c06948c65ed1d944f2c05f08f9989fcd27f5ab38e20772292d5913edea6beed522da4056ba0288f4416bed1ee6f90438155e3ec112d6ef4cb
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.4.
|
|
4
|
+
kreuzberg (4.4.2)
|
|
5
5
|
rb_sys (~> 0.9.119)
|
|
6
6
|
sorbet-runtime (~> 0.5)
|
|
7
7
|
|
|
@@ -60,7 +60,7 @@ GEM
|
|
|
60
60
|
rb-fsevent (~> 0.10, >= 0.10.3)
|
|
61
61
|
rb-inotify (~> 0.9, >= 0.9.10)
|
|
62
62
|
logger (1.7.0)
|
|
63
|
-
mcp (0.
|
|
63
|
+
mcp (0.8.0)
|
|
64
64
|
json-schema (>= 4.1)
|
|
65
65
|
method_source (1.1.0)
|
|
66
66
|
minitest (6.0.2)
|
|
@@ -79,7 +79,7 @@ GEM
|
|
|
79
79
|
pry-byebug (3.12.0)
|
|
80
80
|
byebug (~> 13.0)
|
|
81
81
|
pry (>= 0.13, < 0.17)
|
|
82
|
-
public_suffix (7.0.
|
|
82
|
+
public_suffix (7.0.5)
|
|
83
83
|
racc (1.8.1)
|
|
84
84
|
rainbow (3.1.1)
|
|
85
85
|
rake (13.3.1)
|
|
@@ -134,7 +134,7 @@ GEM
|
|
|
134
134
|
rubocop (~> 1.81)
|
|
135
135
|
ruby-progressbar (1.13.0)
|
|
136
136
|
securerandom (0.4.1)
|
|
137
|
-
sorbet-runtime (0.6.
|
|
137
|
+
sorbet-runtime (0.6.12984)
|
|
138
138
|
steep (1.10.0)
|
|
139
139
|
activesupport (>= 5.1)
|
|
140
140
|
concurrent-ruby (>= 1.1.10)
|
|
@@ -222,12 +222,12 @@ CHECKSUMS
|
|
|
222
222
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
223
223
|
json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
|
|
224
224
|
json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
|
|
225
|
-
kreuzberg (4.4.
|
|
225
|
+
kreuzberg (4.4.2)
|
|
226
226
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
227
227
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
228
228
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
229
229
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
230
|
-
mcp (0.
|
|
230
|
+
mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
|
|
231
231
|
method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5
|
|
232
232
|
minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
|
|
233
233
|
mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
|
|
@@ -236,7 +236,7 @@ CHECKSUMS
|
|
|
236
236
|
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
237
237
|
pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e
|
|
238
238
|
pry-byebug (3.12.0) sha256=594e094ae8a8390a7ad4c7b36ae36e13304ed02664c67417d108dc5f7213d1b7
|
|
239
|
-
public_suffix (7.0.
|
|
239
|
+
public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
|
|
240
240
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
241
241
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
242
242
|
rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
|
|
@@ -259,7 +259,7 @@ CHECKSUMS
|
|
|
259
259
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
260
260
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
261
261
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
262
|
-
sorbet-runtime (0.6.
|
|
262
|
+
sorbet-runtime (0.6.12984) sha256=3fff20a5b147a2e191210563d61886ac121fc1cd8b5e0faf6bc18873139e0fe4
|
|
263
263
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
264
264
|
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
265
265
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.2" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|