kreuzberg 4.0.0.pre.rc.14 → 4.0.0.pre.rc.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/Gemfile.lock +2 -2
- data/ext/kreuzberg_rb/native/Cargo.lock +25 -215
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -2
- data/ext/kreuzberg_rb/native/build.rs +38 -1
- data/ext/kreuzberg_rb/native/src/lib.rs +682 -9
- data/lib/kreuzberg/config.rb +111 -8
- data/lib/kreuzberg/error_context.rb +76 -0
- data/lib/kreuzberg/result.rb +79 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/{libpdfium.dylib → libpdfium.so} +0 -0
- data/sig/kreuzberg.rbs +8 -0
- data/spec/binding/batch_spec.rb +374 -0
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_validation_spec.rb +98 -0
- data/vendor/Cargo.toml +2 -2
- data/vendor/kreuzberg/Cargo.toml +15 -2
- data/vendor/kreuzberg/benches/token_reduction.rs +135 -0
- data/vendor/kreuzberg/build.rs +54 -10
- data/vendor/kreuzberg/src/api/mod.rs +8 -0
- data/vendor/kreuzberg/src/chunking/mod.rs +464 -28
- data/vendor/kreuzberg/src/core/batch_optimizations.rs +304 -0
- data/vendor/kreuzberg/src/core/config_validation.rs +662 -0
- data/vendor/kreuzberg/src/core/extractor.rs +19 -2
- data/vendor/kreuzberg/src/core/formats.rs +251 -0
- data/vendor/kreuzberg/src/core/mod.rs +12 -0
- data/vendor/kreuzberg/src/core/pipeline.rs +103 -32
- data/vendor/kreuzberg/src/extraction/archive.rs +18 -6
- data/vendor/kreuzberg/src/extraction/docx.rs +7 -3
- data/vendor/kreuzberg/src/extraction/email.rs +15 -11
- data/vendor/kreuzberg/src/extraction/excel.rs +24 -5
- data/vendor/kreuzberg/src/extraction/html.rs +49 -8
- data/vendor/kreuzberg/src/extraction/markdown.rs +5 -2
- data/vendor/kreuzberg/src/extraction/pptx.rs +8 -6
- data/vendor/kreuzberg/src/extraction/structured.rs +5 -4
- data/vendor/kreuzberg/src/extraction/table.rs +3 -1
- data/vendor/kreuzberg/src/extraction/text.rs +27 -10
- data/vendor/kreuzberg/src/extractors/html.rs +2 -1
- data/vendor/kreuzberg/src/extractors/pdf.rs +74 -42
- data/vendor/kreuzberg/src/lib.rs +2 -2
- data/vendor/kreuzberg/src/ocr/language_registry.rs +526 -0
- data/vendor/kreuzberg/src/ocr/mod.rs +2 -0
- data/vendor/kreuzberg/src/pdf/bindings.rs +202 -19
- data/vendor/kreuzberg/src/pdf/bundled.rs +124 -9
- data/vendor/kreuzberg/src/pdf/metadata.rs +8 -0
- data/vendor/kreuzberg/src/pdf/rendering.rs +4 -0
- data/vendor/kreuzberg/src/pdf/text.rs +164 -30
- data/vendor/kreuzberg/src/text/mod.rs +2 -0
- data/vendor/kreuzberg/src/text/quality_processor.rs +37 -12
- data/vendor/kreuzberg/src/text/string_utils.rs +27 -10
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +37 -5
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +24 -10
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +2 -1
- data/vendor/kreuzberg/src/text/utf8_validation.rs +197 -0
- data/vendor/kreuzberg/src/types.rs +380 -6
- data/vendor/kreuzberg/src/utils/mod.rs +11 -0
- data/vendor/kreuzberg/src/utils/pool.rs +364 -0
- data/vendor/kreuzberg/src/utils/quality.rs +12 -3
- data/vendor/kreuzberg/src/utils/string_pool.rs +424 -0
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +169 -0
- data/vendor/kreuzberg/tests/format_integration.rs +1 -0
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +207 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +3 -1
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +17 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +23 -21
- data/vendor/kreuzberg-ffi/Cargo.toml +0 -63
- data/vendor/kreuzberg-ffi/README.md +0 -851
- data/vendor/kreuzberg-ffi/build.rs +0 -176
- data/vendor/kreuzberg-ffi/cbindgen.toml +0 -27
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +0 -1087
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -3616
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -247
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fe3add89c26722e26baf090f7b9a0c32671c449be6a34ea4285a5f6d15548b72
|
|
4
|
+
data.tar.gz: 49147ceab3fddc3161ff0df55f7c535134d63da7ce2577aad905c91179e875f3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5f2e0ab3d3dd4c975a99dcbf4a2e81347673eb74687034f8ef72cc3ece6561fbbed70811edc7363c911385c2d7c2eb0be2d8fa990872845458a3d3f5f019422c
|
|
7
|
+
data.tar.gz: 530bf825eb92e9a3df838ab14ec68277b17e575833ecdf0af11e32d8749e101e4fe68195841524c5e6b41c31a2330076e9da4507f5edf047d8670ad26c9dd928
|
data/.rubocop.yml
CHANGED
|
@@ -52,6 +52,7 @@ Metrics/AbcSize:
|
|
|
52
52
|
Exclude:
|
|
53
53
|
- 'spec/**/*'
|
|
54
54
|
- 'examples/**/*'
|
|
55
|
+
- 'lib/kreuzberg/config.rb'
|
|
55
56
|
|
|
56
57
|
Naming/FileName:
|
|
57
58
|
Enabled: true
|
|
@@ -99,6 +100,10 @@ Metrics/PerceivedComplexity:
|
|
|
99
100
|
Exclude:
|
|
100
101
|
- 'lib/kreuzberg/config.rb'
|
|
101
102
|
|
|
103
|
+
Metrics/ClassLength:
|
|
104
|
+
Exclude:
|
|
105
|
+
- 'lib/kreuzberg/config.rb'
|
|
106
|
+
|
|
102
107
|
RSpec/RepeatedExampleGroupBody:
|
|
103
108
|
Enabled: false
|
|
104
109
|
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.0.0.pre.rc.
|
|
4
|
+
kreuzberg (4.0.0.pre.rc.16)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
@@ -136,8 +136,8 @@ GEM
|
|
|
136
136
|
yard (0.9.38)
|
|
137
137
|
|
|
138
138
|
PLATFORMS
|
|
139
|
-
arm64-darwin-23
|
|
140
139
|
arm64-darwin-24
|
|
140
|
+
arm64-darwin-25
|
|
141
141
|
x86_64-linux
|
|
142
142
|
|
|
143
143
|
DEPENDENCIES
|
|
@@ -75,56 +75,6 @@ dependencies = [
|
|
|
75
75
|
"libc",
|
|
76
76
|
]
|
|
77
77
|
|
|
78
|
-
[[package]]
|
|
79
|
-
name = "anstream"
|
|
80
|
-
version = "0.6.21"
|
|
81
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
82
|
-
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
|
|
83
|
-
dependencies = [
|
|
84
|
-
"anstyle",
|
|
85
|
-
"anstyle-parse",
|
|
86
|
-
"anstyle-query",
|
|
87
|
-
"anstyle-wincon",
|
|
88
|
-
"colorchoice",
|
|
89
|
-
"is_terminal_polyfill",
|
|
90
|
-
"utf8parse",
|
|
91
|
-
]
|
|
92
|
-
|
|
93
|
-
[[package]]
|
|
94
|
-
name = "anstyle"
|
|
95
|
-
version = "1.0.13"
|
|
96
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
97
|
-
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
|
|
98
|
-
|
|
99
|
-
[[package]]
|
|
100
|
-
name = "anstyle-parse"
|
|
101
|
-
version = "0.2.7"
|
|
102
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
103
|
-
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
|
104
|
-
dependencies = [
|
|
105
|
-
"utf8parse",
|
|
106
|
-
]
|
|
107
|
-
|
|
108
|
-
[[package]]
|
|
109
|
-
name = "anstyle-query"
|
|
110
|
-
version = "1.1.5"
|
|
111
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
112
|
-
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
|
113
|
-
dependencies = [
|
|
114
|
-
"windows-sys 0.61.2",
|
|
115
|
-
]
|
|
116
|
-
|
|
117
|
-
[[package]]
|
|
118
|
-
name = "anstyle-wincon"
|
|
119
|
-
version = "3.0.11"
|
|
120
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
121
|
-
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
|
122
|
-
dependencies = [
|
|
123
|
-
"anstyle",
|
|
124
|
-
"once_cell_polyfill",
|
|
125
|
-
"windows-sys 0.61.2",
|
|
126
|
-
]
|
|
127
|
-
|
|
128
78
|
[[package]]
|
|
129
79
|
name = "anyhow"
|
|
130
80
|
version = "1.0.100"
|
|
@@ -466,30 +416,15 @@ dependencies = [
|
|
|
466
416
|
"syn",
|
|
467
417
|
]
|
|
468
418
|
|
|
469
|
-
[[package]]
|
|
470
|
-
name = "bit-set"
|
|
471
|
-
version = "0.6.0"
|
|
472
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
473
|
-
checksum = "f0481a0e032742109b1133a095184ee93d88f3dc9e0d28a5d033dc77a073f44f"
|
|
474
|
-
dependencies = [
|
|
475
|
-
"bit-vec 0.7.0",
|
|
476
|
-
]
|
|
477
|
-
|
|
478
419
|
[[package]]
|
|
479
420
|
name = "bit-set"
|
|
480
421
|
version = "0.8.0"
|
|
481
422
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
482
423
|
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
|
483
424
|
dependencies = [
|
|
484
|
-
"bit-vec
|
|
425
|
+
"bit-vec",
|
|
485
426
|
]
|
|
486
427
|
|
|
487
|
-
[[package]]
|
|
488
|
-
name = "bit-vec"
|
|
489
|
-
version = "0.7.0"
|
|
490
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
491
|
-
checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
|
|
492
|
-
|
|
493
428
|
[[package]]
|
|
494
429
|
name = "bit-vec"
|
|
495
430
|
version = "0.8.0"
|
|
@@ -661,25 +596,6 @@ dependencies = [
|
|
|
661
596
|
"cipher",
|
|
662
597
|
]
|
|
663
598
|
|
|
664
|
-
[[package]]
|
|
665
|
-
name = "cbindgen"
|
|
666
|
-
version = "0.29.2"
|
|
667
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
668
|
-
checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799"
|
|
669
|
-
dependencies = [
|
|
670
|
-
"clap",
|
|
671
|
-
"heck",
|
|
672
|
-
"indexmap",
|
|
673
|
-
"log",
|
|
674
|
-
"proc-macro2",
|
|
675
|
-
"quote",
|
|
676
|
-
"serde",
|
|
677
|
-
"serde_json",
|
|
678
|
-
"syn",
|
|
679
|
-
"tempfile",
|
|
680
|
-
"toml 0.9.10+spec-1.1.0",
|
|
681
|
-
]
|
|
682
|
-
|
|
683
599
|
[[package]]
|
|
684
600
|
name = "cc"
|
|
685
601
|
version = "1.2.50"
|
|
@@ -780,33 +696,6 @@ dependencies = [
|
|
|
780
696
|
"libloading 0.8.9",
|
|
781
697
|
]
|
|
782
698
|
|
|
783
|
-
[[package]]
|
|
784
|
-
name = "clap"
|
|
785
|
-
version = "4.5.53"
|
|
786
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
787
|
-
checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
|
|
788
|
-
dependencies = [
|
|
789
|
-
"clap_builder",
|
|
790
|
-
]
|
|
791
|
-
|
|
792
|
-
[[package]]
|
|
793
|
-
name = "clap_builder"
|
|
794
|
-
version = "4.5.53"
|
|
795
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
796
|
-
checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
|
|
797
|
-
dependencies = [
|
|
798
|
-
"anstream",
|
|
799
|
-
"anstyle",
|
|
800
|
-
"clap_lex",
|
|
801
|
-
"strsim",
|
|
802
|
-
]
|
|
803
|
-
|
|
804
|
-
[[package]]
|
|
805
|
-
name = "clap_lex"
|
|
806
|
-
version = "0.7.6"
|
|
807
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
808
|
-
checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|
809
|
-
|
|
810
699
|
[[package]]
|
|
811
700
|
name = "cmake"
|
|
812
701
|
version = "0.1.57"
|
|
@@ -831,12 +720,6 @@ version = "1.1.0"
|
|
|
831
720
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
832
721
|
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
|
|
833
722
|
|
|
834
|
-
[[package]]
|
|
835
|
-
name = "colorchoice"
|
|
836
|
-
version = "1.0.4"
|
|
837
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
838
|
-
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
|
839
|
-
|
|
840
723
|
[[package]]
|
|
841
724
|
name = "compact_str"
|
|
842
725
|
version = "0.9.0"
|
|
@@ -945,9 +828,9 @@ dependencies = [
|
|
|
945
828
|
|
|
946
829
|
[[package]]
|
|
947
830
|
name = "crc"
|
|
948
|
-
version = "3.
|
|
831
|
+
version = "3.3.0"
|
|
949
832
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
950
|
-
checksum = "
|
|
833
|
+
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
|
|
951
834
|
dependencies = [
|
|
952
835
|
"crc-catalog",
|
|
953
836
|
]
|
|
@@ -1401,7 +1284,7 @@ version = "0.14.0"
|
|
|
1401
1284
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1402
1285
|
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
|
|
1403
1286
|
dependencies = [
|
|
1404
|
-
"bit-set
|
|
1287
|
+
"bit-set",
|
|
1405
1288
|
"regex-automata",
|
|
1406
1289
|
"regex-syntax",
|
|
1407
1290
|
]
|
|
@@ -1497,17 +1380,6 @@ dependencies = [
|
|
|
1497
1380
|
"windows-sys 0.60.2",
|
|
1498
1381
|
]
|
|
1499
1382
|
|
|
1500
|
-
[[package]]
|
|
1501
|
-
name = "filetime_creation"
|
|
1502
|
-
version = "0.2.0"
|
|
1503
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1504
|
-
checksum = "c25b5d475550e559de5b0c0084761c65325444e3b6c9e298af9cefe7a9ef3a5f"
|
|
1505
|
-
dependencies = [
|
|
1506
|
-
"cfg-if",
|
|
1507
|
-
"filetime",
|
|
1508
|
-
"windows-sys 0.52.0",
|
|
1509
|
-
]
|
|
1510
|
-
|
|
1511
1383
|
[[package]]
|
|
1512
1384
|
name = "find-msvc-tools"
|
|
1513
1385
|
version = "0.1.5"
|
|
@@ -2312,12 +2184,6 @@ dependencies = [
|
|
|
2312
2184
|
"serde",
|
|
2313
2185
|
]
|
|
2314
2186
|
|
|
2315
|
-
[[package]]
|
|
2316
|
-
name = "is_terminal_polyfill"
|
|
2317
|
-
version = "1.70.2"
|
|
2318
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2319
|
-
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
|
2320
|
-
|
|
2321
2187
|
[[package]]
|
|
2322
2188
|
name = "itertools"
|
|
2323
2189
|
version = "0.12.1"
|
|
@@ -2414,7 +2280,7 @@ dependencies = [
|
|
|
2414
2280
|
|
|
2415
2281
|
[[package]]
|
|
2416
2282
|
name = "kreuzberg"
|
|
2417
|
-
version = "4.0.0-rc.
|
|
2283
|
+
version = "4.0.0-rc.14"
|
|
2418
2284
|
dependencies = [
|
|
2419
2285
|
"ahash",
|
|
2420
2286
|
"async-trait",
|
|
@@ -2467,7 +2333,7 @@ dependencies = [
|
|
|
2467
2333
|
"serde",
|
|
2468
2334
|
"serde_json",
|
|
2469
2335
|
"serde_yaml_ng",
|
|
2470
|
-
"sevenz-
|
|
2336
|
+
"sevenz-rust2",
|
|
2471
2337
|
"tar",
|
|
2472
2338
|
"text-splitter",
|
|
2473
2339
|
"thiserror 2.0.17",
|
|
@@ -2483,30 +2349,16 @@ dependencies = [
|
|
|
2483
2349
|
"uuid",
|
|
2484
2350
|
"whatlang",
|
|
2485
2351
|
"yake-rust",
|
|
2486
|
-
"zip
|
|
2487
|
-
]
|
|
2488
|
-
|
|
2489
|
-
[[package]]
|
|
2490
|
-
name = "kreuzberg-ffi"
|
|
2491
|
-
version = "4.0.0-rc.13"
|
|
2492
|
-
dependencies = [
|
|
2493
|
-
"async-trait",
|
|
2494
|
-
"cbindgen",
|
|
2495
|
-
"html-to-markdown-rs",
|
|
2496
|
-
"kreuzberg",
|
|
2497
|
-
"serde",
|
|
2498
|
-
"serde_json",
|
|
2499
|
-
"tokio",
|
|
2352
|
+
"zip 7.0.0",
|
|
2500
2353
|
]
|
|
2501
2354
|
|
|
2502
2355
|
[[package]]
|
|
2503
2356
|
name = "kreuzberg-rb"
|
|
2504
|
-
version = "4.0.0-rc.
|
|
2357
|
+
version = "4.0.0-rc.16"
|
|
2505
2358
|
dependencies = [
|
|
2506
2359
|
"async-trait",
|
|
2507
2360
|
"html-to-markdown-rs",
|
|
2508
2361
|
"kreuzberg",
|
|
2509
|
-
"kreuzberg-ffi",
|
|
2510
2362
|
"magnus",
|
|
2511
2363
|
"pretty_assertions",
|
|
2512
2364
|
"rb-sys",
|
|
@@ -2516,14 +2368,14 @@ dependencies = [
|
|
|
2516
2368
|
|
|
2517
2369
|
[[package]]
|
|
2518
2370
|
name = "kreuzberg-tesseract"
|
|
2519
|
-
version = "4.0.0-rc.
|
|
2371
|
+
version = "4.0.0-rc.14"
|
|
2520
2372
|
dependencies = [
|
|
2521
2373
|
"cc",
|
|
2522
2374
|
"cmake",
|
|
2523
2375
|
"libc",
|
|
2524
2376
|
"reqwest",
|
|
2525
2377
|
"thiserror 2.0.17",
|
|
2526
|
-
"zip
|
|
2378
|
+
"zip 7.0.0",
|
|
2527
2379
|
]
|
|
2528
2380
|
|
|
2529
2381
|
[[package]]
|
|
@@ -2726,20 +2578,11 @@ dependencies = [
|
|
|
2726
2578
|
"libc",
|
|
2727
2579
|
]
|
|
2728
2580
|
|
|
2729
|
-
[[package]]
|
|
2730
|
-
name = "lzma-rust"
|
|
2731
|
-
version = "0.1.7"
|
|
2732
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2733
|
-
checksum = "5baab2bbbd7d75a144d671e9ff79270e903957d92fb7386fd39034c709bd2661"
|
|
2734
|
-
dependencies = [
|
|
2735
|
-
"byteorder",
|
|
2736
|
-
]
|
|
2737
|
-
|
|
2738
2581
|
[[package]]
|
|
2739
2582
|
name = "lzma-rust2"
|
|
2740
|
-
version = "0.
|
|
2583
|
+
version = "0.15.4"
|
|
2741
2584
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2742
|
-
checksum = "
|
|
2585
|
+
checksum = "48172246aa7c3ea28e423295dd1ca2589a24617cc4e588bb8cfe177cb2c54d95"
|
|
2743
2586
|
dependencies = [
|
|
2744
2587
|
"crc",
|
|
2745
2588
|
"sha2",
|
|
@@ -3076,16 +2919,6 @@ dependencies = [
|
|
|
3076
2919
|
"chrono",
|
|
3077
2920
|
]
|
|
3078
2921
|
|
|
3079
|
-
[[package]]
|
|
3080
|
-
name = "nt-time"
|
|
3081
|
-
version = "0.8.1"
|
|
3082
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3083
|
-
checksum = "2de419e64947cd8830e66beb584acc3fb42ed411d103e3c794dda355d1b374b5"
|
|
3084
|
-
dependencies = [
|
|
3085
|
-
"chrono",
|
|
3086
|
-
"time",
|
|
3087
|
-
]
|
|
3088
|
-
|
|
3089
2922
|
[[package]]
|
|
3090
2923
|
name = "num-bigint"
|
|
3091
2924
|
version = "0.4.6"
|
|
@@ -3218,12 +3051,6 @@ version = "1.21.3"
|
|
|
3218
3051
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3219
3052
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
3220
3053
|
|
|
3221
|
-
[[package]]
|
|
3222
|
-
name = "once_cell_polyfill"
|
|
3223
|
-
version = "1.70.2"
|
|
3224
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3225
|
-
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
|
3226
|
-
|
|
3227
3054
|
[[package]]
|
|
3228
3055
|
name = "onig"
|
|
3229
3056
|
version = "6.5.1"
|
|
@@ -5042,18 +4869,19 @@ dependencies = [
|
|
|
5042
4869
|
]
|
|
5043
4870
|
|
|
5044
4871
|
[[package]]
|
|
5045
|
-
name = "sevenz-
|
|
5046
|
-
version = "0.
|
|
4872
|
+
name = "sevenz-rust2"
|
|
4873
|
+
version = "0.20.0"
|
|
5047
4874
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5048
|
-
checksum = "
|
|
4875
|
+
checksum = "611081ec4fc67633b979fc0c24385de90fa60acd18126d796c8758a24294a950"
|
|
5049
4876
|
dependencies = [
|
|
5050
|
-
"
|
|
5051
|
-
"
|
|
5052
|
-
"
|
|
5053
|
-
"
|
|
4877
|
+
"aes",
|
|
4878
|
+
"bzip2",
|
|
4879
|
+
"cbc",
|
|
4880
|
+
"crc32fast",
|
|
4881
|
+
"getrandom 0.3.4",
|
|
5054
4882
|
"js-sys",
|
|
5055
|
-
"lzma-
|
|
5056
|
-
"
|
|
4883
|
+
"lzma-rust2",
|
|
4884
|
+
"ppmd-rust",
|
|
5057
4885
|
"sha2",
|
|
5058
4886
|
"wasm-bindgen",
|
|
5059
4887
|
]
|
|
@@ -5401,19 +5229,6 @@ dependencies = [
|
|
|
5401
5229
|
"xattr",
|
|
5402
5230
|
]
|
|
5403
5231
|
|
|
5404
|
-
[[package]]
|
|
5405
|
-
name = "tempfile"
|
|
5406
|
-
version = "3.23.0"
|
|
5407
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5408
|
-
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
|
|
5409
|
-
dependencies = [
|
|
5410
|
-
"fastrand",
|
|
5411
|
-
"getrandom 0.3.4",
|
|
5412
|
-
"once_cell",
|
|
5413
|
-
"rustix",
|
|
5414
|
-
"windows-sys 0.61.2",
|
|
5415
|
-
]
|
|
5416
|
-
|
|
5417
5232
|
[[package]]
|
|
5418
5233
|
name = "tendril"
|
|
5419
5234
|
version = "0.4.3"
|
|
@@ -6122,12 +5937,6 @@ version = "1.0.4"
|
|
|
6122
5937
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6123
5938
|
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
|
|
6124
5939
|
|
|
6125
|
-
[[package]]
|
|
6126
|
-
name = "utf8parse"
|
|
6127
|
-
version = "0.2.2"
|
|
6128
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6129
|
-
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
|
6130
|
-
|
|
6131
5940
|
[[package]]
|
|
6132
5941
|
name = "uuid"
|
|
6133
5942
|
version = "1.19.0"
|
|
@@ -6829,9 +6638,9 @@ dependencies = [
|
|
|
6829
6638
|
|
|
6830
6639
|
[[package]]
|
|
6831
6640
|
name = "zip"
|
|
6832
|
-
version = "
|
|
6641
|
+
version = "7.0.0"
|
|
6833
6642
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6834
|
-
checksum = "
|
|
6643
|
+
checksum = "bdd8a47718a4ee5fe78e07667cd36f3de80e7c2bfe727c7074245ffc7303c037"
|
|
6835
6644
|
dependencies = [
|
|
6836
6645
|
"aes",
|
|
6837
6646
|
"arbitrary",
|
|
@@ -6840,6 +6649,7 @@ dependencies = [
|
|
|
6840
6649
|
"crc32fast",
|
|
6841
6650
|
"deflate64",
|
|
6842
6651
|
"flate2",
|
|
6652
|
+
"generic-array",
|
|
6843
6653
|
"getrandom 0.3.4",
|
|
6844
6654
|
"hmac",
|
|
6845
6655
|
"indexmap",
|
|
@@ -7,7 +7,7 @@ rb-sys = { path = "../../../vendor/rb-sys" }
|
|
|
7
7
|
|
|
8
8
|
[package]
|
|
9
9
|
name = "kreuzberg-rb"
|
|
10
|
-
version = "4.0.0-rc.
|
|
10
|
+
version = "4.0.0-rc.16"
|
|
11
11
|
edition = "2024"
|
|
12
12
|
rust-version = "1.91"
|
|
13
13
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -30,7 +30,6 @@ default = []
|
|
|
30
30
|
[dependencies]
|
|
31
31
|
async-trait = "0.1.89"
|
|
32
32
|
kreuzberg = { path = "../../../vendor/kreuzberg", features = ["full"] }
|
|
33
|
-
kreuzberg-ffi = { path = "../../../vendor/kreuzberg-ffi", features = ["embeddings"] }
|
|
34
33
|
magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = [
|
|
35
34
|
"rb-sys",
|
|
36
35
|
] }
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
#[cfg(target_os = "macos")]
|
|
2
2
|
fn main() {
|
|
3
|
+
if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
|
|
4
|
+
let lib_path = std::path::Path::new(&cargo_manifest_dir)
|
|
5
|
+
.parent()
|
|
6
|
+
.and_then(|p| p.parent())
|
|
7
|
+
.and_then(|p| p.parent())
|
|
8
|
+
.and_then(|p| p.parent())
|
|
9
|
+
.and_then(|p| p.parent())
|
|
10
|
+
.map(|p| p.join("target/release"))
|
|
11
|
+
.expect("Failed to construct lib path");
|
|
12
|
+
println!("cargo:rustc-link-search={}", lib_path.display());
|
|
13
|
+
}
|
|
3
14
|
println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
|
|
4
15
|
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
5
16
|
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
@@ -7,9 +18,35 @@ fn main() {
|
|
|
7
18
|
|
|
8
19
|
#[cfg(target_os = "linux")]
|
|
9
20
|
fn main() {
|
|
21
|
+
if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
|
|
22
|
+
let lib_path = std::path::Path::new(&cargo_manifest_dir)
|
|
23
|
+
.parent()
|
|
24
|
+
.and_then(|p| p.parent())
|
|
25
|
+
.and_then(|p| p.parent())
|
|
26
|
+
.and_then(|p| p.parent())
|
|
27
|
+
.and_then(|p| p.parent())
|
|
28
|
+
.map(|p| p.join("target/release"))
|
|
29
|
+
.expect("Failed to construct lib path");
|
|
30
|
+
println!("cargo:rustc-link-search={}", lib_path.display());
|
|
31
|
+
}
|
|
10
32
|
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
11
33
|
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
12
34
|
}
|
|
13
35
|
|
|
14
|
-
#[cfg(
|
|
36
|
+
#[cfg(target_os = "windows")]
|
|
37
|
+
fn main() {
|
|
38
|
+
if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
|
|
39
|
+
let lib_path = std::path::Path::new(&cargo_manifest_dir)
|
|
40
|
+
.parent()
|
|
41
|
+
.and_then(|p| p.parent())
|
|
42
|
+
.and_then(|p| p.parent())
|
|
43
|
+
.and_then(|p| p.parent())
|
|
44
|
+
.and_then(|p| p.parent())
|
|
45
|
+
.map(|p| p.join("target/release"))
|
|
46
|
+
.expect("Failed to construct lib path");
|
|
47
|
+
println!("cargo:rustc-link-search={}", lib_path.display());
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
|
|
15
52
|
fn main() {}
|