kreuzberg 4.1.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/kreuzberg.gemspec +13 -1
- data/lib/kreuzberg/config.rb +70 -35
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +5 -1
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +55 -53
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +10 -2
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rubocop:disable RSpec/RepeatedExample
|
|
4
|
+
RSpec.describe 'Output Format and Result Format Configuration' do
|
|
5
|
+
describe Kreuzberg::Config::Extraction do
|
|
6
|
+
describe 'output_format' do
|
|
7
|
+
it 'accepts output_format as initialization parameter' do
|
|
8
|
+
config = described_class.new(output_format: 'markdown')
|
|
9
|
+
|
|
10
|
+
expect(config.output_format).to eq 'markdown'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'defaults to nil when not specified' do
|
|
14
|
+
config = described_class.new
|
|
15
|
+
|
|
16
|
+
expect(config.output_format).to be_nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'accepts plain format' do
|
|
20
|
+
config = described_class.new(output_format: 'plain')
|
|
21
|
+
|
|
22
|
+
expect(config.output_format).to eq 'plain'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'accepts markdown format' do
|
|
26
|
+
config = described_class.new(output_format: 'markdown')
|
|
27
|
+
|
|
28
|
+
expect(config.output_format).to eq 'markdown'
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it 'accepts djot format' do
|
|
32
|
+
config = described_class.new(output_format: 'djot')
|
|
33
|
+
|
|
34
|
+
expect(config.output_format).to eq 'djot'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it 'accepts html format' do
|
|
38
|
+
config = described_class.new(output_format: 'html')
|
|
39
|
+
|
|
40
|
+
expect(config.output_format).to eq 'html'
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'converts output_format to string' do
|
|
44
|
+
config = described_class.new(output_format: :markdown)
|
|
45
|
+
|
|
46
|
+
expect(config.output_format).to eq 'markdown'
|
|
47
|
+
expect(config.output_format).to be_a String
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'includes output_format in to_h' do
|
|
51
|
+
config = described_class.new(output_format: 'markdown')
|
|
52
|
+
hash = config.to_h
|
|
53
|
+
|
|
54
|
+
expect(hash[:output_format]).to eq 'markdown'
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it 'excludes nil output_format from to_h' do
|
|
58
|
+
config = described_class.new(output_format: nil)
|
|
59
|
+
hash = config.to_h
|
|
60
|
+
|
|
61
|
+
expect(hash.key?(:output_format)).to be false
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it 'includes output_format in JSON' do
|
|
65
|
+
config = described_class.new(output_format: 'markdown')
|
|
66
|
+
json = config.to_json
|
|
67
|
+
parsed = JSON.parse(json)
|
|
68
|
+
|
|
69
|
+
expect(parsed['output_format']).to eq 'markdown'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'retrieves output_format with get_field' do
|
|
73
|
+
config = described_class.new(output_format: 'djot')
|
|
74
|
+
|
|
75
|
+
expect(config.get_field('output_format')).to eq 'djot'
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it 'can be set with []=' do
|
|
79
|
+
config = described_class.new
|
|
80
|
+
config[:output_format] = 'html'
|
|
81
|
+
|
|
82
|
+
expect(config.output_format).to eq 'html'
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
it 'can be set with []= using symbol' do
|
|
86
|
+
config = described_class.new
|
|
87
|
+
config[:output_format] = :plain
|
|
88
|
+
|
|
89
|
+
expect(config.output_format).to eq 'plain'
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it 'can be retrieved with []' do
|
|
93
|
+
config = described_class.new(output_format: 'markdown')
|
|
94
|
+
|
|
95
|
+
expect(config[:output_format]).to eq 'markdown'
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
describe 'result_format' do
|
|
100
|
+
it 'accepts result_format as initialization parameter' do
|
|
101
|
+
config = described_class.new(result_format: 'unified')
|
|
102
|
+
|
|
103
|
+
expect(config.result_format).to eq 'unified'
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it 'defaults to nil when not specified' do
|
|
107
|
+
config = described_class.new
|
|
108
|
+
|
|
109
|
+
expect(config.result_format).to be_nil
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it 'accepts unified format' do
|
|
113
|
+
config = described_class.new(result_format: 'unified')
|
|
114
|
+
|
|
115
|
+
expect(config.result_format).to eq 'unified'
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it 'accepts element_based format' do
|
|
119
|
+
config = described_class.new(result_format: 'element_based')
|
|
120
|
+
|
|
121
|
+
expect(config.result_format).to eq 'element_based'
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'converts result_format to string' do
|
|
125
|
+
config = described_class.new(result_format: :unified)
|
|
126
|
+
|
|
127
|
+
expect(config.result_format).to eq 'unified'
|
|
128
|
+
expect(config.result_format).to be_a String
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it 'includes result_format in to_h' do
|
|
132
|
+
config = described_class.new(result_format: 'element_based')
|
|
133
|
+
hash = config.to_h
|
|
134
|
+
|
|
135
|
+
expect(hash[:result_format]).to eq 'element_based'
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it 'excludes nil result_format from to_h' do
|
|
139
|
+
config = described_class.new(result_format: nil)
|
|
140
|
+
hash = config.to_h
|
|
141
|
+
|
|
142
|
+
expect(hash.key?(:result_format)).to be false
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
it 'includes result_format in JSON' do
|
|
146
|
+
config = described_class.new(result_format: 'element_based')
|
|
147
|
+
json = config.to_json
|
|
148
|
+
parsed = JSON.parse(json)
|
|
149
|
+
|
|
150
|
+
expect(parsed['result_format']).to eq 'element_based'
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it 'retrieves result_format with get_field' do
|
|
154
|
+
config = described_class.new(result_format: 'unified')
|
|
155
|
+
|
|
156
|
+
expect(config.get_field('result_format')).to eq 'unified'
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
it 'can be set with []=' do
|
|
160
|
+
config = described_class.new
|
|
161
|
+
config[:result_format] = 'unified'
|
|
162
|
+
|
|
163
|
+
expect(config.result_format).to eq 'unified'
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it 'can be set with []= using symbol' do
|
|
167
|
+
config = described_class.new
|
|
168
|
+
config[:result_format] = :element_based
|
|
169
|
+
|
|
170
|
+
expect(config.result_format).to eq 'element_based'
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
it 'can be retrieved with []' do
|
|
174
|
+
config = described_class.new(result_format: 'element_based')
|
|
175
|
+
|
|
176
|
+
expect(config[:result_format]).to eq 'element_based'
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
describe 'combined output and result formats' do
|
|
181
|
+
it 'accepts both output_format and result_format' do
|
|
182
|
+
config = described_class.new(
|
|
183
|
+
output_format: 'markdown',
|
|
184
|
+
result_format: 'unified'
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
expect(config.output_format).to eq 'markdown'
|
|
188
|
+
expect(config.result_format).to eq 'unified'
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
it 'serializes both formats in to_h' do
|
|
192
|
+
config = described_class.new(
|
|
193
|
+
output_format: 'djot',
|
|
194
|
+
result_format: 'element_based'
|
|
195
|
+
)
|
|
196
|
+
hash = config.to_h
|
|
197
|
+
|
|
198
|
+
expect(hash[:output_format]).to eq 'djot'
|
|
199
|
+
expect(hash[:result_format]).to eq 'element_based'
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
it 'serializes both formats in JSON' do
|
|
203
|
+
config = described_class.new(
|
|
204
|
+
output_format: 'html',
|
|
205
|
+
result_format: 'unified'
|
|
206
|
+
)
|
|
207
|
+
json = config.to_json
|
|
208
|
+
parsed = JSON.parse(json)
|
|
209
|
+
|
|
210
|
+
expect(parsed['output_format']).to eq 'html'
|
|
211
|
+
expect(parsed['result_format']).to eq 'unified'
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it 'merges both formats correctly' do
|
|
215
|
+
base = described_class.new(
|
|
216
|
+
output_format: 'markdown',
|
|
217
|
+
result_format: 'unified'
|
|
218
|
+
)
|
|
219
|
+
override = described_class.new(output_format: 'html')
|
|
220
|
+
merged = base.merge(override)
|
|
221
|
+
|
|
222
|
+
expect(merged.output_format).to eq 'html'
|
|
223
|
+
expect(merged.result_format).to eq 'unified'
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
it 'merges both formats with merge!' do
|
|
227
|
+
config = described_class.new(
|
|
228
|
+
output_format: 'markdown',
|
|
229
|
+
result_format: 'unified'
|
|
230
|
+
)
|
|
231
|
+
override = described_class.new(
|
|
232
|
+
output_format: 'djot',
|
|
233
|
+
result_format: 'element_based'
|
|
234
|
+
)
|
|
235
|
+
config.merge!(override)
|
|
236
|
+
|
|
237
|
+
expect(config.output_format).to eq 'djot'
|
|
238
|
+
expect(config.result_format).to eq 'element_based'
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
it 'handles merge with hash containing both formats' do
|
|
242
|
+
config = described_class.new(
|
|
243
|
+
output_format: 'plain',
|
|
244
|
+
result_format: 'unified'
|
|
245
|
+
)
|
|
246
|
+
merged = config.merge({ output_format: 'markdown' })
|
|
247
|
+
|
|
248
|
+
expect(merged.output_format).to eq 'markdown'
|
|
249
|
+
expect(merged.result_format).to eq 'unified'
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
describe 'format persistence across operations' do
|
|
254
|
+
it 'persists output_format through multiple conversions' do
|
|
255
|
+
config = described_class.new(output_format: 'markdown')
|
|
256
|
+
hash = config.to_h
|
|
257
|
+
new_config = described_class.new(**hash)
|
|
258
|
+
|
|
259
|
+
expect(new_config.output_format).to eq 'markdown'
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
it 'persists result_format through multiple conversions' do
|
|
263
|
+
config = described_class.new(result_format: 'element_based')
|
|
264
|
+
hash = config.to_h
|
|
265
|
+
new_config = described_class.new(**hash)
|
|
266
|
+
|
|
267
|
+
expect(new_config.result_format).to eq 'element_based'
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
it 'round-trips through JSON' do
|
|
271
|
+
config = described_class.new(
|
|
272
|
+
output_format: 'djot',
|
|
273
|
+
result_format: 'unified'
|
|
274
|
+
)
|
|
275
|
+
json = config.to_json
|
|
276
|
+
parsed = JSON.parse(json)
|
|
277
|
+
new_config = described_class.new(**parsed.transform_keys(&:to_sym))
|
|
278
|
+
|
|
279
|
+
expect(new_config.output_format).to eq 'djot'
|
|
280
|
+
expect(new_config.result_format).to eq 'unified'
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
describe 'format validation and edge cases' do
|
|
285
|
+
it 'handles empty string output_format' do
|
|
286
|
+
config = described_class.new(output_format: '')
|
|
287
|
+
|
|
288
|
+
expect(config.output_format).to eq ''
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
it 'handles empty string result_format' do
|
|
292
|
+
config = described_class.new(result_format: '')
|
|
293
|
+
|
|
294
|
+
expect(config.result_format).to eq ''
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
it 'handles whitespace in output_format' do
|
|
298
|
+
config = described_class.new(output_format: ' plain ')
|
|
299
|
+
|
|
300
|
+
expect(config.output_format).to eq ' plain '
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
it 'handles case sensitivity in output_format' do
|
|
304
|
+
config = described_class.new(output_format: 'MarkDown')
|
|
305
|
+
|
|
306
|
+
expect(config.output_format).to eq 'MarkDown'
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
it 'handles custom string in result_format' do
|
|
310
|
+
config = described_class.new(result_format: 'custom_format')
|
|
311
|
+
|
|
312
|
+
expect(config.result_format).to eq 'custom_format'
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
describe 'integration with other config fields' do
|
|
317
|
+
it 'works with output_format and chunking together' do
|
|
318
|
+
config = described_class.new(
|
|
319
|
+
output_format: 'markdown',
|
|
320
|
+
chunking: { max_chars: 500 }
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
expect(config.output_format).to eq 'markdown'
|
|
324
|
+
expect(config.chunking.max_chars).to eq 500
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
it 'works with result_format and OCR together' do
|
|
328
|
+
config = described_class.new(
|
|
329
|
+
result_format: 'element_based',
|
|
330
|
+
ocr: { backend: 'tesseract' }
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
expect(config.result_format).to eq 'element_based'
|
|
334
|
+
expect(config.ocr.backend).to eq 'tesseract'
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
it 'works with both formats and language detection' do
|
|
338
|
+
config = described_class.new(
|
|
339
|
+
output_format: 'html',
|
|
340
|
+
result_format: 'unified',
|
|
341
|
+
language_detection: { enabled: true }
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
expect(config.output_format).to eq 'html'
|
|
345
|
+
expect(config.result_format).to eq 'unified'
|
|
346
|
+
expect(config.language_detection.enabled).to be true
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
it 'preserves formats in complex config merge' do
|
|
350
|
+
base = described_class.new(
|
|
351
|
+
output_format: 'markdown',
|
|
352
|
+
result_format: 'unified',
|
|
353
|
+
chunking: { max_chars: 500 },
|
|
354
|
+
ocr: { backend: 'tesseract' }
|
|
355
|
+
)
|
|
356
|
+
override = described_class.new(
|
|
357
|
+
output_format: 'djot',
|
|
358
|
+
chunking: { max_chars: 750 }
|
|
359
|
+
)
|
|
360
|
+
merged = base.merge(override)
|
|
361
|
+
|
|
362
|
+
expect(merged.output_format).to eq 'djot'
|
|
363
|
+
expect(merged.result_format).to eq 'unified'
|
|
364
|
+
expect(merged.chunking.max_chars).to eq 750
|
|
365
|
+
expect(merged.ocr.backend).to eq 'tesseract'
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
describe 'allowed keys integration' do
|
|
370
|
+
it 'includes output_format in ALLOWED_KEYS' do
|
|
371
|
+
expect(Kreuzberg::Config::Extraction::ALLOWED_KEYS).to include(:output_format)
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
it 'includes result_format in ALLOWED_KEYS' do
|
|
375
|
+
expect(Kreuzberg::Config::Extraction::ALLOWED_KEYS).to include(:result_format)
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
# rubocop:enable RSpec/RepeatedExample
|
data/vendor/Cargo.toml
CHANGED
data/vendor/kreuzberg/Cargo.toml
CHANGED
data/vendor/kreuzberg/README.md
CHANGED
|
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
|
|
|
17
17
|
|
|
18
18
|
This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
|
|
19
19
|
|
|
20
|
-
> **🚀 Version 4.
|
|
20
|
+
> **🚀 Version 4.2.0 Release**
|
|
21
21
|
> This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
|
|
22
22
|
>
|
|
23
23
|
> **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
|
|
@@ -219,10 +219,10 @@ pub fn get_or_init_model(
|
|
|
219
219
|
// This prevents panics that cannot unwind through FFI boundaries
|
|
220
220
|
fn ensure_onnx_available() -> Result<(), String> {
|
|
221
221
|
// Check if ORT_DYLIB_PATH is already set and valid
|
|
222
|
-
if let Ok(path) = std::env::var("ORT_DYLIB_PATH")
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
222
|
+
if let Ok(path) = std::env::var("ORT_DYLIB_PATH")
|
|
223
|
+
&& std::path::Path::new(&path).exists()
|
|
224
|
+
{
|
|
225
|
+
return Ok(());
|
|
226
226
|
}
|
|
227
227
|
|
|
228
228
|
// Check common installation paths and set ORT_DYLIB_PATH if found
|