kreuzberg 4.0.0.pre.rc.6 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -6
- data/.rubocop.yaml +534 -1
- data/Gemfile +2 -1
- data/Gemfile.lock +11 -11
- data/README.md +5 -10
- data/examples/async_patterns.rb +0 -1
- data/ext/kreuzberg_rb/extconf.rb +0 -10
- data/ext/kreuzberg_rb/native/Cargo.toml +15 -23
- data/ext/kreuzberg_rb/native/build.rs +2 -0
- data/ext/kreuzberg_rb/native/include/ieeefp.h +1 -1
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +1 -1
- data/ext/kreuzberg_rb/native/include/strings.h +2 -2
- data/ext/kreuzberg_rb/native/include/unistd.h +1 -1
- data/ext/kreuzberg_rb/native/src/lib.rs +16 -75
- data/kreuzberg.gemspec +14 -57
- data/lib/kreuzberg/cache_api.rb +0 -1
- data/lib/kreuzberg/cli.rb +2 -2
- data/lib/kreuzberg/config.rb +2 -9
- data/lib/kreuzberg/errors.rb +7 -75
- data/lib/kreuzberg/extraction_api.rb +0 -1
- data/lib/kreuzberg/setup_lib_path.rb +0 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +0 -21
- data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
- data/sig/kreuzberg.rbs +3 -55
- data/spec/binding/cli_proxy_spec.rb +4 -2
- data/spec/binding/cli_spec.rb +11 -12
- data/spec/examples.txt +104 -0
- data/spec/fixtures/config.yaml +1 -0
- data/spec/spec_helper.rb +1 -1
- data/vendor/kreuzberg/Cargo.toml +42 -112
- data/vendor/kreuzberg/README.md +2 -2
- data/vendor/kreuzberg/build.rs +4 -18
- data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
- data/vendor/kreuzberg/src/cache/mod.rs +3 -27
- data/vendor/kreuzberg/src/core/batch_mode.rs +0 -60
- data/vendor/kreuzberg/src/core/extractor.rs +81 -202
- data/vendor/kreuzberg/src/core/io.rs +2 -4
- data/vendor/kreuzberg/src/core/mime.rs +12 -2
- data/vendor/kreuzberg/src/core/mod.rs +1 -4
- data/vendor/kreuzberg/src/core/pipeline.rs +33 -111
- data/vendor/kreuzberg/src/embeddings.rs +16 -125
- data/vendor/kreuzberg/src/error.rs +1 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
- data/vendor/kreuzberg/src/extraction/image.rs +13 -13
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -0
- data/vendor/kreuzberg/src/extraction/mod.rs +5 -9
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +0 -2
- data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
- data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
- data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
- data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
- data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
- data/vendor/kreuzberg/src/extractors/archive.rs +0 -21
- data/vendor/kreuzberg/src/extractors/docx.rs +128 -16
- data/vendor/kreuzberg/src/extractors/email.rs +0 -14
- data/vendor/kreuzberg/src/extractors/excel.rs +20 -19
- data/vendor/kreuzberg/src/extractors/html.rs +154 -137
- data/vendor/kreuzberg/src/extractors/image.rs +4 -7
- data/vendor/kreuzberg/src/extractors/mod.rs +9 -106
- data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +15 -12
- data/vendor/kreuzberg/src/extractors/pptx.rs +3 -17
- data/vendor/kreuzberg/src/extractors/structured.rs +0 -14
- data/vendor/kreuzberg/src/extractors/text.rs +5 -23
- data/vendor/kreuzberg/src/extractors/xml.rs +0 -7
- data/vendor/kreuzberg/src/keywords/rake.rs +1 -0
- data/vendor/kreuzberg/src/lib.rs +1 -4
- data/vendor/kreuzberg/src/mcp/mod.rs +1 -1
- data/vendor/kreuzberg/src/mcp/server.rs +3 -5
- data/vendor/kreuzberg/src/ocr/processor.rs +2 -18
- data/vendor/kreuzberg/src/pdf/error.rs +1 -1
- data/vendor/kreuzberg/src/pdf/table.rs +44 -17
- data/vendor/kreuzberg/src/pdf/text.rs +3 -0
- data/vendor/kreuzberg/src/plugins/extractor.rs +5 -8
- data/vendor/kreuzberg/src/plugins/ocr.rs +11 -2
- data/vendor/kreuzberg/src/plugins/processor.rs +1 -2
- data/vendor/kreuzberg/src/plugins/registry.rs +0 -13
- data/vendor/kreuzberg/src/plugins/validator.rs +8 -9
- data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
- data/vendor/kreuzberg/src/types.rs +12 -42
- data/vendor/kreuzberg/tests/batch_orchestration.rs +5 -19
- data/vendor/kreuzberg/tests/batch_processing.rs +3 -15
- data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
- data/vendor/kreuzberg/tests/concurrency_stress.rs +1 -17
- data/vendor/kreuzberg/tests/config_features.rs +0 -18
- data/vendor/kreuzberg/tests/config_loading_tests.rs +39 -15
- data/vendor/kreuzberg/tests/core_integration.rs +7 -24
- data/vendor/kreuzberg/tests/csv_integration.rs +81 -71
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +25 -23
- data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1 -0
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +1 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +22 -1
- data/vendor/kreuzberg/tests/security_validation.rs +1 -12
- metadata +25 -90
- data/.rubocop.yml +0 -538
- data/ext/kreuzberg_rb/native/Cargo.lock +0 -6535
- data/lib/kreuzberg/error_context.rb +0 -32
- data/vendor/kreuzberg/benches/otel_overhead.rs +0 -48
- data/vendor/kreuzberg/src/extraction/markdown.rs +0 -213
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -287
- data/vendor/kreuzberg/src/extractors/bibtex.rs +0 -469
- data/vendor/kreuzberg/src/extractors/docbook.rs +0 -502
- data/vendor/kreuzberg/src/extractors/epub.rs +0 -707
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +0 -491
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/kreuzberg/src/extractors/jats.rs +0 -1051
- data/vendor/kreuzberg/src/extractors/jupyter.rs +0 -367
- data/vendor/kreuzberg/src/extractors/latex.rs +0 -652
- data/vendor/kreuzberg/src/extractors/markdown.rs +0 -700
- data/vendor/kreuzberg/src/extractors/odt.rs +0 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +0 -634
- data/vendor/kreuzberg/src/extractors/orgmode.rs +0 -528
- data/vendor/kreuzberg/src/extractors/rst.rs +0 -576
- data/vendor/kreuzberg/src/extractors/rtf.rs +0 -810
- data/vendor/kreuzberg/src/extractors/security.rs +0 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +0 -367
- data/vendor/kreuzberg/src/extractors/typst.rs +0 -650
- data/vendor/kreuzberg/src/panic_context.rs +0 -154
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +0 -52
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +0 -421
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -498
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +0 -370
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +0 -275
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +0 -228
- data/vendor/kreuzberg/tests/html_table_test.rs +0 -551
- data/vendor/kreuzberg/tests/instrumentation_test.rs +0 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +0 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +0 -704
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +0 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -490
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -695
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +0 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +0 -822
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +0 -692
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -776
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +0 -1259
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +0 -647
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/.cargo_vcs_info.json +0 -6
- data/vendor/rb-sys/Cargo.lock +0 -393
- data/vendor/rb-sys/Cargo.toml +0 -70
- data/vendor/rb-sys/Cargo.toml.orig +0 -57
- data/vendor/rb-sys/LICENSE-APACHE +0 -190
- data/vendor/rb-sys/LICENSE-MIT +0 -21
- data/vendor/rb-sys/bin/release.sh +0 -21
- data/vendor/rb-sys/build/features.rs +0 -108
- data/vendor/rb-sys/build/main.rs +0 -246
- data/vendor/rb-sys/build/stable_api_config.rs +0 -153
- data/vendor/rb-sys/build/version.rs +0 -48
- data/vendor/rb-sys/readme.md +0 -36
- data/vendor/rb-sys/src/bindings.rs +0 -21
- data/vendor/rb-sys/src/hidden.rs +0 -11
- data/vendor/rb-sys/src/lib.rs +0 -34
- data/vendor/rb-sys/src/macros.rs +0 -371
- data/vendor/rb-sys/src/memory.rs +0 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +0 -38
- data/vendor/rb-sys/src/special_consts.rs +0 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +0 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +0 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +0 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +0 -317
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +0 -315
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +0 -326
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +0 -327
- data/vendor/rb-sys/src/stable_api.rs +0 -261
- data/vendor/rb-sys/src/symbol.rs +0 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +0 -332
- data/vendor/rb-sys/src/utils.rs +0 -89
- data/vendor/rb-sys/src/value_type.rs +0 -7
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.0.0.
|
|
4
|
+
version: 4.0.0.rc1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-11-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -16,14 +16,14 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '2.0'
|
|
20
20
|
type: :development
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
26
|
+
version: '2.0'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: rake
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -58,14 +58,14 @@ dependencies:
|
|
|
58
58
|
requirements:
|
|
59
59
|
- - "~>"
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: 0.9
|
|
61
|
+
version: '0.9'
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: 0.9
|
|
68
|
+
version: '0.9'
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: rspec
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -178,7 +178,6 @@ files:
|
|
|
178
178
|
- ".gitignore"
|
|
179
179
|
- ".rspec"
|
|
180
180
|
- ".rubocop.yaml"
|
|
181
|
-
- ".rubocop.yml"
|
|
182
181
|
- Gemfile
|
|
183
182
|
- Gemfile.lock
|
|
184
183
|
- README.md
|
|
@@ -186,7 +185,6 @@ files:
|
|
|
186
185
|
- Steepfile
|
|
187
186
|
- examples/async_patterns.rb
|
|
188
187
|
- ext/kreuzberg_rb/extconf.rb
|
|
189
|
-
- ext/kreuzberg_rb/native/Cargo.lock
|
|
190
188
|
- ext/kreuzberg_rb/native/Cargo.toml
|
|
191
189
|
- ext/kreuzberg_rb/native/README.md
|
|
192
190
|
- ext/kreuzberg_rb/native/build.rs
|
|
@@ -203,7 +201,6 @@ files:
|
|
|
203
201
|
- lib/kreuzberg/cli.rb
|
|
204
202
|
- lib/kreuzberg/cli_proxy.rb
|
|
205
203
|
- lib/kreuzberg/config.rb
|
|
206
|
-
- lib/kreuzberg/error_context.rb
|
|
207
204
|
- lib/kreuzberg/errors.rb
|
|
208
205
|
- lib/kreuzberg/extraction_api.rb
|
|
209
206
|
- lib/kreuzberg/mcp_proxy.rb
|
|
@@ -213,6 +210,7 @@ files:
|
|
|
213
210
|
- lib/kreuzberg/setup_lib_path.rb
|
|
214
211
|
- lib/kreuzberg/validator_protocol.rb
|
|
215
212
|
- lib/kreuzberg/version.rb
|
|
213
|
+
- pkg/kreuzberg-4.0.0.rc1.gem
|
|
216
214
|
- sig/kreuzberg.rbs
|
|
217
215
|
- sig/kreuzberg/internal.rbs
|
|
218
216
|
- spec/binding/cache_spec.rb
|
|
@@ -225,6 +223,7 @@ files:
|
|
|
225
223
|
- spec/binding/plugins/ocr_backend_spec.rb
|
|
226
224
|
- spec/binding/plugins/postprocessor_spec.rb
|
|
227
225
|
- spec/binding/plugins/validator_spec.rb
|
|
226
|
+
- spec/examples.txt
|
|
228
227
|
- spec/fixtures/config.toml
|
|
229
228
|
- spec/fixtures/config.yaml
|
|
230
229
|
- spec/fixtures/invalid_config.toml
|
|
@@ -232,13 +231,13 @@ files:
|
|
|
232
231
|
- spec/spec_helper.rb
|
|
233
232
|
- vendor/kreuzberg/Cargo.toml
|
|
234
233
|
- vendor/kreuzberg/README.md
|
|
235
|
-
- vendor/kreuzberg/benches/otel_overhead.rs
|
|
236
234
|
- vendor/kreuzberg/build.rs
|
|
237
235
|
- vendor/kreuzberg/src/api/error.rs
|
|
238
236
|
- vendor/kreuzberg/src/api/handlers.rs
|
|
239
237
|
- vendor/kreuzberg/src/api/mod.rs
|
|
240
238
|
- vendor/kreuzberg/src/api/server.rs
|
|
241
239
|
- vendor/kreuzberg/src/api/types.rs
|
|
240
|
+
- vendor/kreuzberg/src/bin/profile_extract.rs
|
|
242
241
|
- vendor/kreuzberg/src/cache/mod.rs
|
|
243
242
|
- vendor/kreuzberg/src/chunking/mod.rs
|
|
244
243
|
- vendor/kreuzberg/src/core/batch_mode.rs
|
|
@@ -257,46 +256,34 @@ files:
|
|
|
257
256
|
- vendor/kreuzberg/src/extraction/html.rs
|
|
258
257
|
- vendor/kreuzberg/src/extraction/image.rs
|
|
259
258
|
- vendor/kreuzberg/src/extraction/libreoffice.rs
|
|
260
|
-
- vendor/kreuzberg/src/extraction/markdown.rs
|
|
261
259
|
- vendor/kreuzberg/src/extraction/mod.rs
|
|
262
260
|
- vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs
|
|
263
261
|
- vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs
|
|
264
262
|
- vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs
|
|
265
263
|
- vendor/kreuzberg/src/extraction/office_metadata/mod.rs
|
|
266
|
-
- vendor/kreuzberg/src/extraction/
|
|
264
|
+
- vendor/kreuzberg/src/extraction/pandoc/batch.rs
|
|
265
|
+
- vendor/kreuzberg/src/extraction/pandoc/mime_types.rs
|
|
266
|
+
- vendor/kreuzberg/src/extraction/pandoc/mod.rs
|
|
267
|
+
- vendor/kreuzberg/src/extraction/pandoc/server.rs
|
|
268
|
+
- vendor/kreuzberg/src/extraction/pandoc/subprocess.rs
|
|
269
|
+
- vendor/kreuzberg/src/extraction/pandoc/version.rs
|
|
267
270
|
- vendor/kreuzberg/src/extraction/pptx.rs
|
|
268
271
|
- vendor/kreuzberg/src/extraction/structured.rs
|
|
269
272
|
- vendor/kreuzberg/src/extraction/table.rs
|
|
270
273
|
- vendor/kreuzberg/src/extraction/text.rs
|
|
271
274
|
- vendor/kreuzberg/src/extraction/xml.rs
|
|
272
275
|
- vendor/kreuzberg/src/extractors/archive.rs
|
|
273
|
-
- vendor/kreuzberg/src/extractors/bibtex.rs
|
|
274
|
-
- vendor/kreuzberg/src/extractors/docbook.rs
|
|
275
276
|
- vendor/kreuzberg/src/extractors/docx.rs
|
|
276
277
|
- vendor/kreuzberg/src/extractors/email.rs
|
|
277
|
-
- vendor/kreuzberg/src/extractors/epub.rs
|
|
278
278
|
- vendor/kreuzberg/src/extractors/excel.rs
|
|
279
|
-
- vendor/kreuzberg/src/extractors/fictionbook.rs
|
|
280
|
-
- vendor/kreuzberg/src/extractors/fictionbook.rs.backup2
|
|
281
279
|
- vendor/kreuzberg/src/extractors/html.rs
|
|
282
280
|
- vendor/kreuzberg/src/extractors/image.rs
|
|
283
|
-
- vendor/kreuzberg/src/extractors/jats.rs
|
|
284
|
-
- vendor/kreuzberg/src/extractors/jupyter.rs
|
|
285
|
-
- vendor/kreuzberg/src/extractors/latex.rs
|
|
286
|
-
- vendor/kreuzberg/src/extractors/markdown.rs
|
|
287
281
|
- vendor/kreuzberg/src/extractors/mod.rs
|
|
288
|
-
- vendor/kreuzberg/src/extractors/
|
|
289
|
-
- vendor/kreuzberg/src/extractors/opml.rs
|
|
290
|
-
- vendor/kreuzberg/src/extractors/orgmode.rs
|
|
282
|
+
- vendor/kreuzberg/src/extractors/pandoc.rs
|
|
291
283
|
- vendor/kreuzberg/src/extractors/pdf.rs
|
|
292
284
|
- vendor/kreuzberg/src/extractors/pptx.rs
|
|
293
|
-
- vendor/kreuzberg/src/extractors/rst.rs
|
|
294
|
-
- vendor/kreuzberg/src/extractors/rtf.rs
|
|
295
|
-
- vendor/kreuzberg/src/extractors/security.rs
|
|
296
|
-
- vendor/kreuzberg/src/extractors/security_tests.rs
|
|
297
285
|
- vendor/kreuzberg/src/extractors/structured.rs
|
|
298
286
|
- vendor/kreuzberg/src/extractors/text.rs
|
|
299
|
-
- vendor/kreuzberg/src/extractors/typst.rs
|
|
300
287
|
- vendor/kreuzberg/src/extractors/xml.rs
|
|
301
288
|
- vendor/kreuzberg/src/image/dpi.rs
|
|
302
289
|
- vendor/kreuzberg/src/image/mod.rs
|
|
@@ -323,7 +310,6 @@ files:
|
|
|
323
310
|
- vendor/kreuzberg/src/ocr/types.rs
|
|
324
311
|
- vendor/kreuzberg/src/ocr/utils.rs
|
|
325
312
|
- vendor/kreuzberg/src/ocr/validation.rs
|
|
326
|
-
- vendor/kreuzberg/src/panic_context.rs
|
|
327
313
|
- vendor/kreuzberg/src/pdf/error.rs
|
|
328
314
|
- vendor/kreuzberg/src/pdf/images.rs
|
|
329
315
|
- vendor/kreuzberg/src/pdf/metadata.rs
|
|
@@ -417,43 +403,30 @@ files:
|
|
|
417
403
|
- vendor/kreuzberg/stopwords/yo_stopwords.json
|
|
418
404
|
- vendor/kreuzberg/stopwords/zh_stopwords.json
|
|
419
405
|
- vendor/kreuzberg/stopwords/zu_stopwords.json
|
|
420
|
-
- vendor/kreuzberg/tests/api_extract_multipart.rs
|
|
421
406
|
- vendor/kreuzberg/tests/api_tests.rs
|
|
422
407
|
- vendor/kreuzberg/tests/archive_integration.rs
|
|
423
408
|
- vendor/kreuzberg/tests/batch_orchestration.rs
|
|
424
409
|
- vendor/kreuzberg/tests/batch_processing.rs
|
|
425
|
-
- vendor/kreuzberg/tests/
|
|
410
|
+
- vendor/kreuzberg/tests/chunking_offset_demo.rs
|
|
426
411
|
- vendor/kreuzberg/tests/concurrency_stress.rs
|
|
427
412
|
- vendor/kreuzberg/tests/config_features.rs
|
|
428
413
|
- vendor/kreuzberg/tests/config_loading_tests.rs
|
|
429
414
|
- vendor/kreuzberg/tests/core_integration.rs
|
|
430
415
|
- vendor/kreuzberg/tests/csv_integration.rs
|
|
431
|
-
- vendor/kreuzberg/tests/docbook_extractor_tests.rs
|
|
432
416
|
- vendor/kreuzberg/tests/docx_metadata_extraction_test.rs
|
|
433
|
-
- vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs
|
|
434
417
|
- vendor/kreuzberg/tests/email_integration.rs
|
|
435
|
-
- vendor/kreuzberg/tests/epub_native_extractor_tests.rs
|
|
436
418
|
- vendor/kreuzberg/tests/error_handling.rs
|
|
437
|
-
- vendor/kreuzberg/tests/fictionbook_extractor_tests.rs
|
|
438
419
|
- vendor/kreuzberg/tests/format_integration.rs
|
|
439
420
|
- vendor/kreuzberg/tests/helpers/mod.rs
|
|
440
|
-
- vendor/kreuzberg/tests/html_table_test.rs
|
|
441
421
|
- vendor/kreuzberg/tests/image_integration.rs
|
|
442
|
-
- vendor/kreuzberg/tests/instrumentation_test.rs
|
|
443
|
-
- vendor/kreuzberg/tests/jats_extractor_tests.rs
|
|
444
|
-
- vendor/kreuzberg/tests/jupyter_extractor_tests.rs
|
|
445
422
|
- vendor/kreuzberg/tests/keywords_integration.rs
|
|
446
423
|
- vendor/kreuzberg/tests/keywords_quality.rs
|
|
447
|
-
- vendor/kreuzberg/tests/latex_extractor_tests.rs
|
|
448
|
-
- vendor/kreuzberg/tests/markdown_extractor_tests.rs
|
|
449
424
|
- vendor/kreuzberg/tests/mime_detection.rs
|
|
450
425
|
- vendor/kreuzberg/tests/ocr_configuration.rs
|
|
451
426
|
- vendor/kreuzberg/tests/ocr_errors.rs
|
|
452
427
|
- vendor/kreuzberg/tests/ocr_quality.rs
|
|
453
428
|
- vendor/kreuzberg/tests/ocr_stress.rs
|
|
454
|
-
- vendor/kreuzberg/tests/
|
|
455
|
-
- vendor/kreuzberg/tests/opml_extractor_tests.rs
|
|
456
|
-
- vendor/kreuzberg/tests/orgmode_extractor_tests.rs
|
|
429
|
+
- vendor/kreuzberg/tests/pandoc_integration.rs
|
|
457
430
|
- vendor/kreuzberg/tests/pdf_integration.rs
|
|
458
431
|
- vendor/kreuzberg/tests/pipeline_integration.rs
|
|
459
432
|
- vendor/kreuzberg/tests/plugin_ocr_backend_test.rs
|
|
@@ -461,59 +434,21 @@ files:
|
|
|
461
434
|
- vendor/kreuzberg/tests/plugin_system.rs
|
|
462
435
|
- vendor/kreuzberg/tests/plugin_validator_test.rs
|
|
463
436
|
- vendor/kreuzberg/tests/registry_integration_tests.rs
|
|
464
|
-
- vendor/kreuzberg/tests/rst_extractor_tests.rs
|
|
465
|
-
- vendor/kreuzberg/tests/rtf_extractor_tests.rs
|
|
466
437
|
- vendor/kreuzberg/tests/security_validation.rs
|
|
467
438
|
- vendor/kreuzberg/tests/stopwords_integration_test.rs
|
|
468
439
|
- vendor/kreuzberg/tests/test_fastembed.rs
|
|
469
|
-
- vendor/kreuzberg/tests/typst_behavioral_tests.rs
|
|
470
|
-
- vendor/kreuzberg/tests/typst_extractor_tests.rs
|
|
471
440
|
- vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs
|
|
472
|
-
|
|
473
|
-
- vendor/rb-sys/.cargo_vcs_info.json
|
|
474
|
-
- vendor/rb-sys/Cargo.lock
|
|
475
|
-
- vendor/rb-sys/Cargo.toml
|
|
476
|
-
- vendor/rb-sys/Cargo.toml.orig
|
|
477
|
-
- vendor/rb-sys/LICENSE-APACHE
|
|
478
|
-
- vendor/rb-sys/LICENSE-MIT
|
|
479
|
-
- vendor/rb-sys/bin/release.sh
|
|
480
|
-
- vendor/rb-sys/build/features.rs
|
|
481
|
-
- vendor/rb-sys/build/main.rs
|
|
482
|
-
- vendor/rb-sys/build/stable_api_config.rs
|
|
483
|
-
- vendor/rb-sys/build/version.rs
|
|
484
|
-
- vendor/rb-sys/readme.md
|
|
485
|
-
- vendor/rb-sys/src/bindings.rs
|
|
486
|
-
- vendor/rb-sys/src/hidden.rs
|
|
487
|
-
- vendor/rb-sys/src/lib.rs
|
|
488
|
-
- vendor/rb-sys/src/macros.rs
|
|
489
|
-
- vendor/rb-sys/src/memory.rs
|
|
490
|
-
- vendor/rb-sys/src/ruby_abi_version.rs
|
|
491
|
-
- vendor/rb-sys/src/special_consts.rs
|
|
492
|
-
- vendor/rb-sys/src/stable_api.rs
|
|
493
|
-
- vendor/rb-sys/src/stable_api/compiled.c
|
|
494
|
-
- vendor/rb-sys/src/stable_api/compiled.rs
|
|
495
|
-
- vendor/rb-sys/src/stable_api/ruby_2_6.rs
|
|
496
|
-
- vendor/rb-sys/src/stable_api/ruby_2_7.rs
|
|
497
|
-
- vendor/rb-sys/src/stable_api/ruby_3_0.rs
|
|
498
|
-
- vendor/rb-sys/src/stable_api/ruby_3_1.rs
|
|
499
|
-
- vendor/rb-sys/src/stable_api/ruby_3_2.rs
|
|
500
|
-
- vendor/rb-sys/src/stable_api/ruby_3_3.rs
|
|
501
|
-
- vendor/rb-sys/src/stable_api/ruby_3_4.rs
|
|
502
|
-
- vendor/rb-sys/src/symbol.rs
|
|
503
|
-
- vendor/rb-sys/src/tracking_allocator.rs
|
|
504
|
-
- vendor/rb-sys/src/utils.rs
|
|
505
|
-
- vendor/rb-sys/src/value_type.rs
|
|
506
|
-
homepage: https://github.com/kreuzberg-dev/kreuzberg
|
|
441
|
+
homepage: https://github.com/Goldziher/kreuzberg
|
|
507
442
|
licenses:
|
|
508
443
|
- MIT
|
|
509
444
|
metadata:
|
|
510
|
-
|
|
511
|
-
|
|
445
|
+
homepage_uri: https://github.com/Goldziher/kreuzberg
|
|
446
|
+
source_code_uri: https://github.com/Goldziher/kreuzberg
|
|
447
|
+
changelog_uri: https://github.com/Goldziher/kreuzberg/blob/main/CHANGELOG.md
|
|
512
448
|
documentation_uri: https://docs.kreuzberg.dev
|
|
513
|
-
|
|
514
|
-
keywords: document-intelligence,document-extraction,ocr,rust,bindings
|
|
449
|
+
bug_tracker_uri: https://github.com/Goldziher/kreuzberg/issues
|
|
515
450
|
rubygems_mfa_required: 'true'
|
|
516
|
-
|
|
451
|
+
keywords: document-intelligence,document-extraction,ocr,rust,bindings
|
|
517
452
|
post_install_message:
|
|
518
453
|
rdoc_options: []
|
|
519
454
|
require_paths:
|
|
@@ -529,7 +464,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
529
464
|
- !ruby/object:Gem::Version
|
|
530
465
|
version: '0'
|
|
531
466
|
requirements: []
|
|
532
|
-
rubygems_version: 3.
|
|
467
|
+
rubygems_version: 3.5.22
|
|
533
468
|
signing_key:
|
|
534
469
|
specification_version: 4
|
|
535
470
|
summary: High-performance document intelligence framework
|