kreuzberg 4.3.8 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +24 -10
- data/README.md +4 -1
- data/examples/async_patterns.rb +9 -10
- data/ext/kreuzberg_rb/native/Cargo.lock +253 -719
- data/ext/kreuzberg_rb/native/Cargo.toml +23 -12
- data/ext/kreuzberg_rb/native/src/config/types.rs +1 -209
- data/lib/kreuzberg/result.rb +9 -12
- data/lib/kreuzberg/version.rb +1 -1
- data/vendor/Cargo.toml +8 -3
- data/vendor/kreuzberg/Cargo.toml +18 -23
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/api/error.rs +1 -1
- data/vendor/kreuzberg/src/api/handlers.rs +9 -1
- data/vendor/kreuzberg/src/core/config/extraction/core.rs +2 -2
- data/vendor/kreuzberg/src/core/extractor/batch.rs +2 -2
- data/vendor/kreuzberg/src/core/extractor/bytes.rs +5 -1
- data/vendor/kreuzberg/src/core/extractor/legacy.rs +5 -1
- data/vendor/kreuzberg/src/core/mime.rs +1 -1
- data/vendor/kreuzberg/src/error.rs +2 -2
- data/vendor/kreuzberg/src/extraction/docx/parser.rs +51 -2
- data/vendor/kreuzberg/src/extraction/email.rs +55 -13
- data/vendor/kreuzberg/src/extraction/excel.rs +114 -62
- data/vendor/kreuzberg/src/extraction/html/converter.rs +10 -1
- data/vendor/kreuzberg/src/extraction/image.rs +50 -49
- data/vendor/kreuzberg/src/extraction/mod.rs +4 -10
- data/vendor/kreuzberg/src/extraction/ppt/mod.rs +3 -2
- data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +39 -22
- data/vendor/kreuzberg/src/extraction/pptx/image_handling.rs +0 -8
- data/vendor/kreuzberg/src/extraction/pptx/mod.rs +0 -13
- data/vendor/kreuzberg/src/extraction/text.rs +9 -5
- data/vendor/kreuzberg/src/extraction/xml.rs +220 -48
- data/vendor/kreuzberg/src/extractors/archive.rs +88 -4
- data/vendor/kreuzberg/src/extractors/bibtex.rs +2 -1
- data/vendor/kreuzberg/src/extractors/citation.rs +2 -1
- data/vendor/kreuzberg/src/extractors/csv.rs +2 -1
- data/vendor/kreuzberg/src/extractors/djot_format/attributes.rs +0 -33
- data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +7 -4
- data/vendor/kreuzberg/src/extractors/doc.rs +2 -1
- data/vendor/kreuzberg/src/extractors/docbook.rs +2 -1
- data/vendor/kreuzberg/src/extractors/docx.rs +2 -1
- data/vendor/kreuzberg/src/extractors/email.rs +2 -1
- data/vendor/kreuzberg/src/extractors/epub/content.rs +324 -138
- data/vendor/kreuzberg/src/extractors/epub/mod.rs +2 -1
- data/vendor/kreuzberg/src/extractors/excel.rs +92 -13
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +2 -1
- data/vendor/kreuzberg/src/extractors/html.rs +2 -1
- data/vendor/kreuzberg/src/extractors/image.rs +28 -16
- data/vendor/kreuzberg/src/extractors/jats/mod.rs +2 -1
- data/vendor/kreuzberg/src/extractors/jupyter.rs +11 -68
- data/vendor/kreuzberg/src/extractors/latex/mod.rs +2 -1
- data/vendor/kreuzberg/src/extractors/markdown.rs +5 -3
- data/vendor/kreuzberg/src/extractors/mdx.rs +2 -1
- data/vendor/kreuzberg/src/extractors/mod.rs +8 -8
- data/vendor/kreuzberg/src/extractors/odt.rs +2 -1
- data/vendor/kreuzberg/src/extractors/opml/core.rs +2 -1
- data/vendor/kreuzberg/src/extractors/orgmode.rs +2 -1
- data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +24 -56
- data/vendor/kreuzberg/src/extractors/pdf/mod.rs +16 -4
- data/vendor/kreuzberg/src/extractors/ppt.rs +2 -1
- data/vendor/kreuzberg/src/extractors/pptx.rs +2 -1
- data/vendor/kreuzberg/src/extractors/rst.rs +2 -1
- data/vendor/kreuzberg/src/extractors/rtf/mod.rs +2 -1
- data/vendor/kreuzberg/src/extractors/structured.rs +2 -1
- data/vendor/kreuzberg/src/extractors/text.rs +4 -2
- data/vendor/kreuzberg/src/extractors/typst.rs +2 -1
- data/vendor/kreuzberg/src/extractors/xml.rs +8 -3
- data/vendor/kreuzberg/src/language_detection/mod.rs +1 -1
- data/vendor/kreuzberg/src/lib.rs +4 -0
- data/vendor/kreuzberg/src/ocr/processor/execution.rs +1 -1
- data/vendor/kreuzberg/src/ocr/processor/validation.rs +8 -5
- data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +42 -5
- data/vendor/kreuzberg/src/pdf/images.rs +4 -3
- data/vendor/kreuzberg/src/pdf/markdown/bridge.rs +184 -46
- data/vendor/kreuzberg/src/pdf/markdown/classify.rs +29 -12
- data/vendor/kreuzberg/src/pdf/markdown/columns.rs +274 -0
- data/vendor/kreuzberg/src/pdf/markdown/lines.rs +1 -1
- data/vendor/kreuzberg/src/pdf/markdown/pipeline.rs +221 -32
- data/vendor/kreuzberg/src/pdf/rendering.rs +14 -2
- data/vendor/kreuzberg/src/pdf/text.rs +38 -17
- data/vendor/kreuzberg/src/plugins/extractor/registry.rs +2 -0
- data/vendor/kreuzberg/src/plugins/extractor/trait.rs +8 -1
- data/vendor/kreuzberg/src/plugins/mod.rs +8 -2
- data/vendor/kreuzberg/src/plugins/ocr.rs +9 -3
- data/vendor/kreuzberg/src/plugins/registry/ocr.rs +52 -4
- data/vendor/kreuzberg/src/text/quality.rs +7 -3
- data/vendor/kreuzberg/src/text/string_utils.rs +2 -2
- data/vendor/kreuzberg/src/text/token_reduction/filters/general.rs +7 -6
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +4 -2
- data/vendor/kreuzberg/src/types/extraction.rs +2 -2
- data/vendor/kreuzberg/src/utils/quality/heuristics.rs +2 -1
- data/vendor/kreuzberg/src/utils/quality/scoring.rs +6 -2
- data/vendor/kreuzberg/src/utils/string_utils.rs +8 -8
- data/vendor/kreuzberg/tests/api_tests.rs +87 -3
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +8 -4
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +8 -4
- data/vendor/kreuzberg/tests/pdf_markdown_extraction.rs +66 -0
- data/vendor/kreuzberg-ffi/Cargo.toml +8 -5
- data/vendor/kreuzberg-ffi/README.md +298 -197
- data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +1 -1
- data/vendor/kreuzberg-ffi/build.rs +31 -2
- data/vendor/kreuzberg-ffi/cbindgen.toml +20 -0
- data/vendor/kreuzberg-ffi/cmake/kreuzberg-ffi-config-version.cmake +25 -0
- data/vendor/kreuzberg-ffi/cmake/kreuzberg-ffi-config.cmake +105 -0
- data/vendor/kreuzberg-ffi/kreuzberg.h +156 -97
- data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +7 -0
- data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +12 -0
- data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +12 -0
- data/vendor/kreuzberg-ffi/src/plugins/validator.rs +6 -0
- data/vendor/kreuzberg-ffi/tests/c/.gitignore +5 -0
- data/vendor/kreuzberg-ffi/tests/c/Makefile +83 -0
- data/vendor/kreuzberg-ffi/tests/c/run_tests.sh +32 -0
- data/vendor/kreuzberg-ffi/tests/c/test_batch.c +129 -0
- data/vendor/kreuzberg-ffi/tests/c/test_concurrent.c +128 -0
- data/vendor/kreuzberg-ffi/tests/c/test_config.c +77 -0
- data/vendor/kreuzberg-ffi/tests/c/test_config_builder.c +83 -0
- data/vendor/kreuzberg-ffi/tests/c/test_error.c +38 -0
- data/vendor/kreuzberg-ffi/tests/c/test_error_extended.c +174 -0
- data/vendor/kreuzberg-ffi/tests/c/test_extraction.c +70 -0
- data/vendor/kreuzberg-ffi/tests/c/test_html_options.c +124 -0
- data/vendor/kreuzberg-ffi/tests/c/test_mime.c +59 -0
- data/vendor/kreuzberg-ffi/tests/c/test_plugins.c +305 -0
- data/vendor/kreuzberg-ffi/tests/c/test_plugins_errors.c +226 -0
- data/vendor/kreuzberg-ffi/tests/c/test_result_inspect.c +135 -0
- data/vendor/kreuzberg-ffi/tests/c/test_result_pool.c +97 -0
- data/vendor/kreuzberg-ffi/tests/c/test_string_intern.c +58 -0
- data/vendor/kreuzberg-ffi/tests/c/test_validation.c +90 -0
- data/vendor/kreuzberg-ffi/tests/c/test_version.c +21 -0
- data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -2
- data/vendor/kreuzberg-pdfium-render/Cargo.toml +4 -2
- data/vendor/kreuzberg-pdfium-render/src/bindings/dynamic_bindings.rs +53 -40
- data/vendor/kreuzberg-pdfium-render/src/bindings/static_bindings.rs +53 -40
- data/vendor/kreuzberg-pdfium-render/src/bindings/wasm_bindings.rs +17 -14
- data/vendor/kreuzberg-pdfium-render/src/lib.rs +2 -2
- data/vendor/kreuzberg-pdfium-render/src/pdf/bitmap.rs +8 -13
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/bookmark.rs +3 -3
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/form.rs +25 -19
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/extraction.rs +4 -2
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/index_cache.rs +13 -25
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/image.rs +1 -1
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/text/char.rs +2 -1
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page.rs +2 -2
- data/vendor/kreuzberg-tesseract/Cargo.toml +7 -3
- data/vendor/kreuzberg-tesseract/build.rs +403 -81
- data/vendor/kreuzberg-tesseract/patches/README.md +5 -5
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +1 -1
- data/vendor/kreuzberg-tesseract/src/api.rs +25 -5
- data/vendor/kreuzberg-tesseract/src/lib.rs +20 -1
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +1 -1
- metadata +23 -3
- data/vendor/kreuzberg/src/extraction/table.rs +0 -331
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 60064820e029a70308a28ac0f1232b62992511dda4b1f62f5ab9a4c83f3ac8ef
|
|
4
|
+
data.tar.gz: c564b12ca29c17695be86b2da184be0fc7666be15f2fca2dd26972b85cc93c8c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 162f3915a9e8e4cc51f163e053f284b45f2e228cc0cb1b3f2797a3aafdfe9ddafeb3177c842b5b2d232689d9304e3c9465a80adf93085e39776b76d1719adeed
|
|
7
|
+
data.tar.gz: 28802dd0a439b8a1d778143a650bae055dd73597c7edbb9db5aa6085e938da529f46216aa4cabf0f583329c9fadf49e5940b6e6376b267eff2baa56c78faff13
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.
|
|
4
|
+
kreuzberg (4.4.0)
|
|
5
5
|
rb_sys (~> 0.9.119)
|
|
6
6
|
sorbet-runtime (~> 0.5)
|
|
7
7
|
|
|
@@ -21,6 +21,8 @@ GEM
|
|
|
21
21
|
securerandom (>= 0.3)
|
|
22
22
|
tzinfo (~> 2.0, >= 2.0.5)
|
|
23
23
|
uri (>= 0.13.1)
|
|
24
|
+
addressable (2.8.9)
|
|
25
|
+
public_suffix (>= 2.0.2, < 8.0)
|
|
24
26
|
ast (2.4.3)
|
|
25
27
|
base64 (0.3.0)
|
|
26
28
|
bigdecimal (4.0.1)
|
|
@@ -48,6 +50,9 @@ GEM
|
|
|
48
50
|
concurrent-ruby (~> 1.0)
|
|
49
51
|
io-console (0.8.2)
|
|
50
52
|
json (2.18.1)
|
|
53
|
+
json-schema (6.1.0)
|
|
54
|
+
addressable (~> 2.8)
|
|
55
|
+
bigdecimal (>= 3.1, < 5)
|
|
51
56
|
language_server-protocol (3.17.0.5)
|
|
52
57
|
lint_roller (1.1.0)
|
|
53
58
|
listen (3.10.0)
|
|
@@ -55,8 +60,11 @@ GEM
|
|
|
55
60
|
rb-fsevent (~> 0.10, >= 0.10.3)
|
|
56
61
|
rb-inotify (~> 0.9, >= 0.9.10)
|
|
57
62
|
logger (1.7.0)
|
|
63
|
+
mcp (0.7.1)
|
|
64
|
+
json-schema (>= 4.1)
|
|
58
65
|
method_source (1.1.0)
|
|
59
|
-
minitest (6.0.
|
|
66
|
+
minitest (6.0.2)
|
|
67
|
+
drb (~> 2.0)
|
|
60
68
|
prism (~> 1.5)
|
|
61
69
|
mutex_m (0.3.0)
|
|
62
70
|
parallel (1.27.0)
|
|
@@ -71,6 +79,7 @@ GEM
|
|
|
71
79
|
pry-byebug (3.12.0)
|
|
72
80
|
byebug (~> 13.0)
|
|
73
81
|
pry (>= 0.13, < 0.17)
|
|
82
|
+
public_suffix (7.0.2)
|
|
74
83
|
racc (1.8.1)
|
|
75
84
|
rainbow (3.1.1)
|
|
76
85
|
rake (13.3.1)
|
|
@@ -97,14 +106,15 @@ GEM
|
|
|
97
106
|
rspec-expectations (3.13.5)
|
|
98
107
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
99
108
|
rspec-support (~> 3.13.0)
|
|
100
|
-
rspec-mocks (3.13.
|
|
109
|
+
rspec-mocks (3.13.8)
|
|
101
110
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
102
111
|
rspec-support (~> 3.13.0)
|
|
103
112
|
rspec-support (3.13.7)
|
|
104
|
-
rubocop (1.
|
|
113
|
+
rubocop (1.85.0)
|
|
105
114
|
json (~> 2.3)
|
|
106
115
|
language_server-protocol (~> 3.17.0.2)
|
|
107
116
|
lint_roller (~> 1.1.0)
|
|
117
|
+
mcp (~> 0.6)
|
|
108
118
|
parallel (~> 1.10)
|
|
109
119
|
parser (>= 3.3.0.2)
|
|
110
120
|
rainbow (>= 2.2.2, < 4.0)
|
|
@@ -124,7 +134,7 @@ GEM
|
|
|
124
134
|
rubocop (~> 1.81)
|
|
125
135
|
ruby-progressbar (1.13.0)
|
|
126
136
|
securerandom (0.4.1)
|
|
127
|
-
sorbet-runtime (0.6.
|
|
137
|
+
sorbet-runtime (0.6.12971)
|
|
128
138
|
steep (1.10.0)
|
|
129
139
|
activesupport (>= 5.1)
|
|
130
140
|
concurrent-ruby (>= 1.1.10)
|
|
@@ -185,6 +195,7 @@ DEPENDENCIES
|
|
|
185
195
|
|
|
186
196
|
CHECKSUMS
|
|
187
197
|
activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
|
|
198
|
+
addressable (2.8.9) sha256=cc154fcbe689711808a43601dee7b980238ce54368d23e127421753e46895485
|
|
188
199
|
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
189
200
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
190
201
|
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
|
@@ -210,19 +221,22 @@ CHECKSUMS
|
|
|
210
221
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
211
222
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
212
223
|
json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
|
|
213
|
-
|
|
224
|
+
json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
|
|
225
|
+
kreuzberg (4.4.0)
|
|
214
226
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
215
227
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
216
228
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
217
229
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
230
|
+
mcp (0.7.1) sha256=fa967895d6952bad0d981ea907731d8528d2c246d2079d56a9c8bae83d14f1c7
|
|
218
231
|
method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5
|
|
219
|
-
minitest (6.0.
|
|
232
|
+
minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
|
|
220
233
|
mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
|
|
221
234
|
parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
|
|
222
235
|
parser (3.3.10.2) sha256=6f60c84aa4bdcedb6d1a2434b738fe8a8136807b6adc8f7f53b97da9bc4e9357
|
|
223
236
|
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
224
237
|
pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e
|
|
225
238
|
pry-byebug (3.12.0) sha256=594e094ae8a8390a7ad4c7b36ae36e13304ed02664c67417d108dc5f7213d1b7
|
|
239
|
+
public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
|
|
226
240
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
227
241
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
228
242
|
rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
|
|
@@ -237,15 +251,15 @@ CHECKSUMS
|
|
|
237
251
|
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
238
252
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
239
253
|
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
240
|
-
rspec-mocks (3.13.
|
|
254
|
+
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
|
|
241
255
|
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
|
|
242
|
-
rubocop (1.
|
|
256
|
+
rubocop (1.85.0) sha256=317407feb681a07d54f64d2f9e1d6b6af1ce7678e51cd658e3ad8bd66da48c01
|
|
243
257
|
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
244
258
|
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
|
|
245
259
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
246
260
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
247
261
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
248
|
-
sorbet-runtime (0.6.
|
|
262
|
+
sorbet-runtime (0.6.12971) sha256=1c2c75a262f88c4fbdb36b5617b0b11bfc7c69b11a500b3334bd67d075288a45
|
|
249
263
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
250
264
|
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
251
265
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.0" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -36,6 +36,9 @@
|
|
|
36
36
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
|
|
37
37
|
<img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
|
|
38
38
|
</a>
|
|
39
|
+
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
40
|
+
<img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C">
|
|
41
|
+
</a>
|
|
39
42
|
|
|
40
43
|
<!-- Project Info -->
|
|
41
44
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
|
data/examples/async_patterns.rb
CHANGED
|
@@ -215,18 +215,17 @@ end
|
|
|
215
215
|
# ============================================================================
|
|
216
216
|
|
|
217
217
|
# Example OCR backend implementation for custom processing.
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
def register_ocr_backend
|
|
219
|
+
backend = Class.new do
|
|
220
|
+
def process_image(image_bytes, language)
|
|
221
|
+
"Extracted text from #{image_bytes.length} bytes using #{language}"
|
|
222
|
+
end
|
|
222
223
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
end
|
|
224
|
+
def supports_language?(lang)
|
|
225
|
+
%w[eng deu fra].include?(lang)
|
|
226
|
+
end
|
|
227
|
+
end.new
|
|
227
228
|
|
|
228
|
-
def register_ocr_backend
|
|
229
|
-
backend = CustomOcrBackend.new
|
|
230
229
|
Kreuzberg.register_ocr_backend('custom', backend)
|
|
231
230
|
|
|
232
231
|
config = {
|