kreuzberg 4.0.0.pre.rc.6 → 4.0.0.pre.rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/README.md +15 -9
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +511 -325
- data/ext/kreuzberg_rb/native/Cargo.toml +13 -3
- data/ext/kreuzberg_rb/native/src/lib.rs +139 -2
- data/kreuzberg.gemspec +38 -4
- data/lib/kreuzberg/config.rb +34 -1
- data/lib/kreuzberg/result.rb +77 -14
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +23 -6
- data/vendor/kreuzberg/Cargo.toml +32 -11
- data/vendor/kreuzberg/README.md +54 -8
- data/vendor/kreuzberg/build.rs +549 -132
- data/vendor/kreuzberg/src/chunking/mod.rs +1279 -79
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -0
- data/vendor/kreuzberg/src/core/config.rs +49 -1
- data/vendor/kreuzberg/src/core/extractor.rs +134 -2
- data/vendor/kreuzberg/src/core/mod.rs +4 -2
- data/vendor/kreuzberg/src/core/pipeline.rs +188 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +358 -0
- data/vendor/kreuzberg/src/extraction/html.rs +24 -8
- data/vendor/kreuzberg/src/extraction/image.rs +124 -1
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -2
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -3
- data/vendor/kreuzberg/src/extraction/pptx.rs +187 -87
- data/vendor/kreuzberg/src/extractors/archive.rs +1 -0
- data/vendor/kreuzberg/src/extractors/bibtex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +50 -17
- data/vendor/kreuzberg/src/extractors/email.rs +29 -15
- data/vendor/kreuzberg/src/extractors/epub.rs +1 -0
- data/vendor/kreuzberg/src/extractors/excel.rs +2 -0
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +1 -0
- data/vendor/kreuzberg/src/extractors/html.rs +29 -15
- data/vendor/kreuzberg/src/extractors/image.rs +25 -4
- data/vendor/kreuzberg/src/extractors/jats.rs +3 -0
- data/vendor/kreuzberg/src/extractors/jupyter.rs +1 -0
- data/vendor/kreuzberg/src/extractors/latex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/markdown.rs +1 -0
- data/vendor/kreuzberg/src/extractors/mod.rs +78 -14
- data/vendor/kreuzberg/src/extractors/odt.rs +3 -3
- data/vendor/kreuzberg/src/extractors/opml.rs +1 -0
- data/vendor/kreuzberg/src/extractors/orgmode.rs +1 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +197 -17
- data/vendor/kreuzberg/src/extractors/pptx.rs +32 -13
- data/vendor/kreuzberg/src/extractors/rst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/rtf.rs +3 -4
- data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
- data/vendor/kreuzberg/src/extractors/text.rs +7 -2
- data/vendor/kreuzberg/src/extractors/typst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/xml.rs +27 -15
- data/vendor/kreuzberg/src/keywords/processor.rs +9 -1
- data/vendor/kreuzberg/src/language_detection/mod.rs +43 -0
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -0
- data/vendor/kreuzberg/src/lib.rs +10 -2
- data/vendor/kreuzberg/src/mcp/mod.rs +3 -0
- data/vendor/kreuzberg/src/mcp/server.rs +120 -12
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +2 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +328 -0
- data/vendor/kreuzberg/src/pdf/error.rs +8 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +238 -95
- data/vendor/kreuzberg/src/pdf/mod.rs +18 -2
- data/vendor/kreuzberg/src/pdf/rendering.rs +1 -2
- data/vendor/kreuzberg/src/pdf/table.rs +26 -2
- data/vendor/kreuzberg/src/pdf/text.rs +89 -7
- data/vendor/kreuzberg/src/plugins/extractor.rs +34 -3
- data/vendor/kreuzberg/src/plugins/mod.rs +3 -0
- data/vendor/kreuzberg/src/plugins/ocr.rs +22 -3
- data/vendor/kreuzberg/src/plugins/processor.rs +8 -0
- data/vendor/kreuzberg/src/plugins/registry.rs +2 -0
- data/vendor/kreuzberg/src/plugins/validator.rs +11 -0
- data/vendor/kreuzberg/src/text/mod.rs +6 -0
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -0
- data/vendor/kreuzberg/src/types.rs +173 -21
- data/vendor/kreuzberg/tests/archive_integration.rs +2 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +5 -3
- data/vendor/kreuzberg/tests/concurrency_stress.rs +14 -6
- data/vendor/kreuzberg/tests/config_features.rs +15 -1
- data/vendor/kreuzberg/tests/config_loading_tests.rs +1 -0
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/email_integration.rs +2 -0
- data/vendor/kreuzberg/tests/error_handling.rs +43 -34
- data/vendor/kreuzberg/tests/format_integration.rs +2 -0
- data/vendor/kreuzberg/tests/image_integration.rs +2 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +17 -16
- data/vendor/kreuzberg/tests/ocr_configuration.rs +4 -0
- data/vendor/kreuzberg/tests/ocr_errors.rs +22 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +2 -0
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -21
- data/vendor/kreuzberg/tests/pdf_integration.rs +2 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +25 -0
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +5 -0
- data/vendor/kreuzberg/tests/plugin_system.rs +6 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +1 -0
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -1
- data/vendor/kreuzberg/tests/security_validation.rs +1 -0
- data/vendor/kreuzberg/tests/test_fastembed.rs +45 -23
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1 -0
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +3 -2
- data/vendor/rb-sys/.cargo_vcs_info.json +2 -2
- data/vendor/rb-sys/Cargo.lock +15 -15
- data/vendor/rb-sys/Cargo.toml +4 -4
- data/vendor/rb-sys/Cargo.toml.orig +4 -4
- data/vendor/rb-sys/build/features.rs +5 -2
- data/vendor/rb-sys/build/main.rs +55 -15
- data/vendor/rb-sys/build/stable_api_config.rs +4 -2
- data/vendor/rb-sys/build/version.rs +3 -1
- data/vendor/rb-sys/src/lib.rs +1 -0
- data/vendor/rb-sys/src/macros.rs +2 -2
- data/vendor/rb-sys/src/special_consts.rs +1 -1
- data/vendor/rb-sys/src/stable_api/compiled.rs +1 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +19 -6
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +17 -5
- data/vendor/rb-sys/src/stable_api.rs +0 -1
- data/vendor/rb-sys/src/tracking_allocator.rs +1 -3
- metadata +13 -10
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
|
@@ -144,33 +144,39 @@ async fn test_generate_embeddings_for_chunks_basic() {
|
|
|
144
144
|
content: "Hello world, this is the first chunk.".to_string(),
|
|
145
145
|
embedding: None,
|
|
146
146
|
metadata: ChunkMetadata {
|
|
147
|
-
|
|
148
|
-
|
|
147
|
+
byte_start: 0,
|
|
148
|
+
byte_end: 38,
|
|
149
149
|
chunk_index: 0,
|
|
150
150
|
total_chunks: 1,
|
|
151
151
|
token_count: None,
|
|
152
|
+
first_page: None,
|
|
153
|
+
last_page: None,
|
|
152
154
|
},
|
|
153
155
|
},
|
|
154
156
|
Chunk {
|
|
155
157
|
content: "This is the second chunk with different content.".to_string(),
|
|
156
158
|
embedding: None,
|
|
157
159
|
metadata: ChunkMetadata {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
+
byte_start: 39,
|
|
161
|
+
byte_end: 87,
|
|
160
162
|
chunk_index: 1,
|
|
161
163
|
total_chunks: 1,
|
|
162
164
|
token_count: None,
|
|
165
|
+
first_page: None,
|
|
166
|
+
last_page: None,
|
|
163
167
|
},
|
|
164
168
|
},
|
|
165
169
|
Chunk {
|
|
166
170
|
content: "And this is the third and final chunk.".to_string(),
|
|
167
171
|
embedding: None,
|
|
168
172
|
metadata: ChunkMetadata {
|
|
169
|
-
|
|
170
|
-
|
|
173
|
+
byte_start: 88,
|
|
174
|
+
byte_end: 126,
|
|
171
175
|
chunk_index: 2,
|
|
172
176
|
total_chunks: 1,
|
|
173
177
|
token_count: None,
|
|
178
|
+
first_page: None,
|
|
179
|
+
last_page: None,
|
|
174
180
|
},
|
|
175
181
|
},
|
|
176
182
|
];
|
|
@@ -214,11 +220,13 @@ async fn test_generate_embeddings_for_chunks_normalization() {
|
|
|
214
220
|
content: test_text.to_string(),
|
|
215
221
|
embedding: None,
|
|
216
222
|
metadata: ChunkMetadata {
|
|
217
|
-
|
|
218
|
-
|
|
223
|
+
byte_start: 0,
|
|
224
|
+
byte_end: test_text.len(),
|
|
219
225
|
chunk_index: 0,
|
|
220
226
|
total_chunks: 1,
|
|
221
227
|
token_count: None,
|
|
228
|
+
first_page: None,
|
|
229
|
+
last_page: None,
|
|
222
230
|
},
|
|
223
231
|
}];
|
|
224
232
|
|
|
@@ -239,11 +247,13 @@ async fn test_generate_embeddings_for_chunks_normalization() {
|
|
|
239
247
|
content: test_text.to_string(),
|
|
240
248
|
embedding: None,
|
|
241
249
|
metadata: ChunkMetadata {
|
|
242
|
-
|
|
243
|
-
|
|
250
|
+
byte_start: 0,
|
|
251
|
+
byte_end: test_text.len(),
|
|
244
252
|
chunk_index: 0,
|
|
245
253
|
total_chunks: 1,
|
|
246
254
|
token_count: None,
|
|
255
|
+
first_page: None,
|
|
256
|
+
last_page: None,
|
|
247
257
|
},
|
|
248
258
|
}];
|
|
249
259
|
|
|
@@ -319,11 +329,13 @@ async fn test_generate_embeddings_for_chunks_model_caching() {
|
|
|
319
329
|
content: "First batch of text.".to_string(),
|
|
320
330
|
embedding: None,
|
|
321
331
|
metadata: ChunkMetadata {
|
|
322
|
-
|
|
323
|
-
|
|
332
|
+
byte_start: 0,
|
|
333
|
+
byte_end: 20,
|
|
324
334
|
chunk_index: 0,
|
|
325
335
|
total_chunks: 1,
|
|
326
336
|
token_count: None,
|
|
337
|
+
first_page: None,
|
|
338
|
+
last_page: None,
|
|
327
339
|
},
|
|
328
340
|
}];
|
|
329
341
|
|
|
@@ -345,11 +357,13 @@ async fn test_generate_embeddings_for_chunks_model_caching() {
|
|
|
345
357
|
content: "Second batch of text.".to_string(),
|
|
346
358
|
embedding: None,
|
|
347
359
|
metadata: ChunkMetadata {
|
|
348
|
-
|
|
349
|
-
|
|
360
|
+
byte_start: 0,
|
|
361
|
+
byte_end: 21,
|
|
350
362
|
chunk_index: 0,
|
|
351
363
|
total_chunks: 1,
|
|
352
364
|
token_count: None,
|
|
365
|
+
first_page: None,
|
|
366
|
+
last_page: None,
|
|
353
367
|
},
|
|
354
368
|
}];
|
|
355
369
|
|
|
@@ -377,11 +391,13 @@ async fn test_generate_embeddings_for_chunks_invalid_preset() {
|
|
|
377
391
|
content: "Test content".to_string(),
|
|
378
392
|
embedding: None,
|
|
379
393
|
metadata: ChunkMetadata {
|
|
380
|
-
|
|
381
|
-
|
|
394
|
+
byte_start: 0,
|
|
395
|
+
byte_end: 12,
|
|
382
396
|
chunk_index: 0,
|
|
383
397
|
total_chunks: 1,
|
|
384
398
|
token_count: None,
|
|
399
|
+
first_page: None,
|
|
400
|
+
last_page: None,
|
|
385
401
|
},
|
|
386
402
|
}];
|
|
387
403
|
|
|
@@ -420,11 +436,13 @@ async fn test_generate_embeddings_for_chunks_unknown_model() {
|
|
|
420
436
|
content: "Test content".to_string(),
|
|
421
437
|
embedding: None,
|
|
422
438
|
metadata: ChunkMetadata {
|
|
423
|
-
|
|
424
|
-
|
|
439
|
+
byte_start: 0,
|
|
440
|
+
byte_end: 12,
|
|
425
441
|
chunk_index: 0,
|
|
426
442
|
total_chunks: 1,
|
|
427
443
|
token_count: None,
|
|
444
|
+
first_page: None,
|
|
445
|
+
last_page: None,
|
|
428
446
|
},
|
|
429
447
|
}];
|
|
430
448
|
|
|
@@ -464,11 +482,13 @@ async fn test_generate_embeddings_for_chunks_custom_model_not_supported() {
|
|
|
464
482
|
content: "Test content".to_string(),
|
|
465
483
|
embedding: None,
|
|
466
484
|
metadata: ChunkMetadata {
|
|
467
|
-
|
|
468
|
-
|
|
485
|
+
byte_start: 0,
|
|
486
|
+
byte_end: 12,
|
|
469
487
|
chunk_index: 0,
|
|
470
488
|
total_chunks: 1,
|
|
471
489
|
token_count: None,
|
|
490
|
+
first_page: None,
|
|
491
|
+
last_page: None,
|
|
472
492
|
},
|
|
473
493
|
}];
|
|
474
494
|
|
|
@@ -509,11 +529,13 @@ async fn test_generate_embeddings_for_chunks_batch_size() {
|
|
|
509
529
|
content: format!("This is test chunk number {}.", i),
|
|
510
530
|
embedding: None,
|
|
511
531
|
metadata: ChunkMetadata {
|
|
512
|
-
|
|
513
|
-
|
|
532
|
+
byte_start: i * 30,
|
|
533
|
+
byte_end: (i + 1) * 30,
|
|
514
534
|
chunk_index: i,
|
|
515
535
|
total_chunks: 10,
|
|
516
536
|
token_count: None,
|
|
537
|
+
first_page: None,
|
|
538
|
+
last_page: None,
|
|
517
539
|
},
|
|
518
540
|
})
|
|
519
541
|
.collect();
|
|
@@ -566,7 +588,7 @@ async fn test_generate_embeddings_chunking_integration() {
|
|
|
566
588
|
..Default::default()
|
|
567
589
|
};
|
|
568
590
|
|
|
569
|
-
let mut chunking_result = chunk_text(text, &chunking_config).expect("Chunking failed");
|
|
591
|
+
let mut chunking_result = chunk_text(text, &chunking_config, None).expect("Chunking failed");
|
|
570
592
|
|
|
571
593
|
assert!(
|
|
572
594
|
chunking_result.chunks.len() > 1,
|
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
//!
|
|
13
13
|
//! Each test document is extracted and validated for correct content extraction.
|
|
14
14
|
|
|
15
|
+
#![cfg(feature = "office")]
|
|
16
|
+
|
|
15
17
|
use kreuzberg::core::config::ExtractionConfig;
|
|
16
18
|
use kreuzberg::core::extractor::extract_bytes;
|
|
17
19
|
use std::{fs, path::PathBuf};
|
|
@@ -495,8 +497,7 @@ async fn test_typst_mime_type_variants() {
|
|
|
495
497
|
for mime_type in mime_types {
|
|
496
498
|
let result = extract_bytes(&content, mime_type, &config).await;
|
|
497
499
|
|
|
498
|
-
if
|
|
499
|
-
let extraction = result.unwrap();
|
|
500
|
+
if let Ok(extraction) = result {
|
|
500
501
|
assert!(
|
|
501
502
|
!extraction.content.is_empty(),
|
|
502
503
|
"Should extract content with MIME type: {}",
|
data/vendor/rb-sys/Cargo.lock
CHANGED
|
@@ -164,9 +164,9 @@ dependencies = [
|
|
|
164
164
|
|
|
165
165
|
[[package]]
|
|
166
166
|
name = "proc-macro2"
|
|
167
|
-
version = "1.0.
|
|
167
|
+
version = "1.0.103"
|
|
168
168
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
169
|
-
checksum = "
|
|
169
|
+
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
|
170
170
|
dependencies = [
|
|
171
171
|
"unicode-ident",
|
|
172
172
|
]
|
|
@@ -179,16 +179,16 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
|
|
179
179
|
|
|
180
180
|
[[package]]
|
|
181
181
|
name = "quote"
|
|
182
|
-
version = "1.0.
|
|
182
|
+
version = "1.0.42"
|
|
183
183
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
184
|
-
checksum = "
|
|
184
|
+
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
|
185
185
|
dependencies = [
|
|
186
186
|
"proc-macro2",
|
|
187
187
|
]
|
|
188
188
|
|
|
189
189
|
[[package]]
|
|
190
190
|
name = "rb-sys"
|
|
191
|
-
version = "0.9.
|
|
191
|
+
version = "0.9.119"
|
|
192
192
|
dependencies = [
|
|
193
193
|
"rb-sys-build",
|
|
194
194
|
"rusty-fork",
|
|
@@ -196,9 +196,9 @@ dependencies = [
|
|
|
196
196
|
|
|
197
197
|
[[package]]
|
|
198
198
|
name = "rb-sys-build"
|
|
199
|
-
version = "0.9.
|
|
199
|
+
version = "0.9.119"
|
|
200
200
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
201
|
-
checksum = "
|
|
201
|
+
checksum = "2e0109499e06c85f56df4abad7d9c642ea8a2dd821d1d7132b4d1b69534677f3"
|
|
202
202
|
dependencies = [
|
|
203
203
|
"bindgen",
|
|
204
204
|
"lazy_static",
|
|
@@ -211,9 +211,9 @@ dependencies = [
|
|
|
211
211
|
|
|
212
212
|
[[package]]
|
|
213
213
|
name = "regex"
|
|
214
|
-
version = "1.
|
|
214
|
+
version = "1.12.2"
|
|
215
215
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
216
|
-
checksum = "
|
|
216
|
+
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
|
|
217
217
|
dependencies = [
|
|
218
218
|
"aho-corasick",
|
|
219
219
|
"memchr",
|
|
@@ -223,9 +223,9 @@ dependencies = [
|
|
|
223
223
|
|
|
224
224
|
[[package]]
|
|
225
225
|
name = "regex-automata"
|
|
226
|
-
version = "0.4.
|
|
226
|
+
version = "0.4.13"
|
|
227
227
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
228
|
-
checksum = "
|
|
228
|
+
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
|
229
229
|
dependencies = [
|
|
230
230
|
"aho-corasick",
|
|
231
231
|
"memchr",
|
|
@@ -259,9 +259,9 @@ dependencies = [
|
|
|
259
259
|
|
|
260
260
|
[[package]]
|
|
261
261
|
name = "rusty-fork"
|
|
262
|
-
version = "0.3.
|
|
262
|
+
version = "0.3.1"
|
|
263
263
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
264
|
-
checksum = "
|
|
264
|
+
checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
|
|
265
265
|
dependencies = [
|
|
266
266
|
"fnv",
|
|
267
267
|
"quick-error",
|
|
@@ -283,9 +283,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
|
283
283
|
|
|
284
284
|
[[package]]
|
|
285
285
|
name = "syn"
|
|
286
|
-
version = "2.0.
|
|
286
|
+
version = "2.0.111"
|
|
287
287
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
288
|
-
checksum = "
|
|
288
|
+
checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
|
|
289
289
|
dependencies = [
|
|
290
290
|
"proc-macro2",
|
|
291
291
|
"quote",
|
data/vendor/rb-sys/Cargo.toml
CHANGED
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
|
|
12
12
|
[package]
|
|
13
13
|
edition = "2018"
|
|
14
|
-
rust-version = "1.
|
|
14
|
+
rust-version = "1.71"
|
|
15
15
|
name = "rb-sys"
|
|
16
|
-
version = "
|
|
16
|
+
version = "0.9.119"
|
|
17
17
|
build = "build/main.rs"
|
|
18
18
|
links = "rb"
|
|
19
19
|
autolib = false
|
|
@@ -55,10 +55,10 @@ path = "src/lib.rs"
|
|
|
55
55
|
doctest = false
|
|
56
56
|
|
|
57
57
|
[dev-dependencies.rusty-fork]
|
|
58
|
-
version = "0.3.
|
|
58
|
+
version = "0.3.1"
|
|
59
59
|
|
|
60
60
|
[build-dependencies.rb-sys-build]
|
|
61
|
-
version = "0.9.
|
|
61
|
+
version = "0.9.119"
|
|
62
62
|
|
|
63
63
|
[lints.rust.unexpected_cfgs]
|
|
64
64
|
level = "warn"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
build = "build/main.rs"
|
|
3
3
|
name = "rb-sys"
|
|
4
|
-
version = "0.9.
|
|
4
|
+
version = "0.9.119"
|
|
5
5
|
edition = "2018"
|
|
6
6
|
readme = "readme.md"
|
|
7
7
|
categories = ["external-ffi-bindings"]
|
|
@@ -11,14 +11,14 @@ homepage = "https://github.com/oxidize-rb/rb-sys"
|
|
|
11
11
|
license = "MIT OR Apache-2.0"
|
|
12
12
|
links = "rb"
|
|
13
13
|
repository = "https://github.com/oxidize-rb/rb-sys"
|
|
14
|
-
rust-version = "1.
|
|
14
|
+
rust-version = "1.71"
|
|
15
15
|
|
|
16
16
|
[build-dependencies]
|
|
17
|
-
rb-sys-build = { version = "0.9.
|
|
17
|
+
rb-sys-build = { version = "0.9.119", path = "../rb-sys-build" }
|
|
18
18
|
|
|
19
19
|
[dev-dependencies]
|
|
20
20
|
rb-sys = { path = ".", features = ["link-ruby"] }
|
|
21
|
-
rusty-fork = "0.3.
|
|
21
|
+
rusty-fork = "0.3.1"
|
|
22
22
|
|
|
23
23
|
[features]
|
|
24
24
|
default = ["stable-api-compiled-fallback"]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use rb_sys_build::{
|
|
1
|
+
use rb_sys_build::{utils::is_mswin_or_mingw, RbConfig};
|
|
2
2
|
|
|
3
3
|
use crate::version::Version;
|
|
4
4
|
|
|
@@ -45,7 +45,10 @@ pub(crate) fn is_ruby_static_enabled(rbconfig: &RbConfig) -> bool {
|
|
|
45
45
|
Ok(val) => val == "true" || val == "1",
|
|
46
46
|
_ => {
|
|
47
47
|
is_env_variable_defined("CARGO_FEATURE_RUBY_STATIC")
|
|
48
|
-
|| rbconfig
|
|
48
|
+
|| rbconfig
|
|
49
|
+
.get("ENABLE_SHARED")
|
|
50
|
+
.map(|v| v == "no")
|
|
51
|
+
.unwrap_or(false)
|
|
49
52
|
}
|
|
50
53
|
}
|
|
51
54
|
}
|
data/vendor/rb-sys/build/main.rs
CHANGED
|
@@ -4,7 +4,7 @@ mod stable_api_config;
|
|
|
4
4
|
mod version;
|
|
5
5
|
|
|
6
6
|
use features::*;
|
|
7
|
-
use rb_sys_build::{RbConfig, RubyEngine
|
|
7
|
+
use rb_sys_build::{bindings, RbConfig, RubyEngine};
|
|
8
8
|
use std::io::Write;
|
|
9
9
|
use std::{
|
|
10
10
|
env,
|
|
@@ -42,10 +42,17 @@ fn main() {
|
|
|
42
42
|
println!("cargo:rerun-if-changed={}", file.unwrap().path().display());
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
let bindings_path = bindings::generate(
|
|
46
|
-
|
|
45
|
+
let bindings_path = bindings::generate(
|
|
46
|
+
&rbconfig,
|
|
47
|
+
is_ruby_static_enabled(&rbconfig),
|
|
48
|
+
&mut cfg_capture_file,
|
|
49
|
+
)
|
|
50
|
+
.expect("generate bindings");
|
|
47
51
|
println!("Bindings generated at: {}", bindings_path.display());
|
|
48
|
-
println!(
|
|
52
|
+
println!(
|
|
53
|
+
"cargo:rustc-env=RB_SYS_BINDINGS_PATH={}",
|
|
54
|
+
bindings_path.display()
|
|
55
|
+
);
|
|
49
56
|
export_cargo_cfg(&mut rbconfig, &mut cfg_capture_file);
|
|
50
57
|
|
|
51
58
|
#[cfg(feature = "stable-api")]
|
|
@@ -147,7 +154,11 @@ fn export_cargo_cfg(rbconfig: &mut RbConfig, cap: &mut File) {
|
|
|
147
154
|
for v in SUPPORTED_RUBY_VERSIONS.iter() {
|
|
148
155
|
let v = v.to_owned();
|
|
149
156
|
|
|
150
|
-
println!(
|
|
157
|
+
println!(
|
|
158
|
+
"cargo:rustc-check-cfg=cfg(ruby_lt_{}_{})",
|
|
159
|
+
v.major(),
|
|
160
|
+
v.minor()
|
|
161
|
+
);
|
|
151
162
|
if version < v {
|
|
152
163
|
println!(r#"cargo:rustc-cfg=ruby_lt_{}_{}"#, v.major(), v.minor());
|
|
153
164
|
cfg_capture!(cap, r#"cargo:version_lt_{}_{}=true"#, v.major(), v.minor());
|
|
@@ -155,15 +166,28 @@ fn export_cargo_cfg(rbconfig: &mut RbConfig, cap: &mut File) {
|
|
|
155
166
|
cfg_capture!(cap, r#"cargo:version_lt_{}_{}=false"#, v.major(), v.minor());
|
|
156
167
|
}
|
|
157
168
|
|
|
158
|
-
println!(
|
|
169
|
+
println!(
|
|
170
|
+
"cargo:rustc-check-cfg=cfg(ruby_lte_{}_{})",
|
|
171
|
+
v.major(),
|
|
172
|
+
v.minor()
|
|
173
|
+
);
|
|
159
174
|
if version <= v {
|
|
160
175
|
println!(r#"cargo:rustc-cfg=ruby_lte_{}_{}"#, v.major(), v.minor());
|
|
161
176
|
cfg_capture!(cap, r#"cargo:version_lte_{}_{}=true"#, v.major(), v.minor());
|
|
162
177
|
} else {
|
|
163
|
-
cfg_capture!(
|
|
178
|
+
cfg_capture!(
|
|
179
|
+
cap,
|
|
180
|
+
r#"cargo:version_lte_{}_{}=false"#,
|
|
181
|
+
v.major(),
|
|
182
|
+
v.minor()
|
|
183
|
+
);
|
|
164
184
|
}
|
|
165
185
|
|
|
166
|
-
println!(
|
|
186
|
+
println!(
|
|
187
|
+
"cargo:rustc-check-cfg=cfg(ruby_eq_{}_{})",
|
|
188
|
+
v.major(),
|
|
189
|
+
v.minor()
|
|
190
|
+
);
|
|
167
191
|
if version == v {
|
|
168
192
|
println!(r#"cargo:rustc-cfg=ruby_eq_{}_{}"#, v.major(), v.minor());
|
|
169
193
|
cfg_capture!(cap, r#"cargo:version_eq_{}_{}=true"#, v.major(), v.minor());
|
|
@@ -171,15 +195,28 @@ fn export_cargo_cfg(rbconfig: &mut RbConfig, cap: &mut File) {
|
|
|
171
195
|
cfg_capture!(cap, r#"cargo:version_eq_{}_{}=false"#, v.major(), v.minor());
|
|
172
196
|
}
|
|
173
197
|
|
|
174
|
-
println!(
|
|
198
|
+
println!(
|
|
199
|
+
"cargo:rustc-check-cfg=cfg(ruby_gte_{}_{})",
|
|
200
|
+
v.major(),
|
|
201
|
+
v.minor()
|
|
202
|
+
);
|
|
175
203
|
if version >= v {
|
|
176
204
|
println!(r#"cargo:rustc-cfg=ruby_gte_{}_{}"#, v.major(), v.minor());
|
|
177
205
|
cfg_capture!(cap, r#"cargo:version_gte_{}_{}=true"#, v.major(), v.minor());
|
|
178
206
|
} else {
|
|
179
|
-
cfg_capture!(
|
|
207
|
+
cfg_capture!(
|
|
208
|
+
cap,
|
|
209
|
+
r#"cargo:version_gte_{}_{}=false"#,
|
|
210
|
+
v.major(),
|
|
211
|
+
v.minor()
|
|
212
|
+
);
|
|
180
213
|
}
|
|
181
214
|
|
|
182
|
-
println!(
|
|
215
|
+
println!(
|
|
216
|
+
"cargo:rustc-check-cfg=cfg(ruby_gt_{}_{})",
|
|
217
|
+
v.major(),
|
|
218
|
+
v.minor()
|
|
219
|
+
);
|
|
183
220
|
if version > v {
|
|
184
221
|
println!(r#"cargo:rustc-cfg=ruby_gt_{}_{}"#, v.major(), v.minor());
|
|
185
222
|
cfg_capture!(cap, r#"cargo:version_gt_{}_{}=true"#, v.major(), v.minor());
|
|
@@ -200,7 +237,12 @@ fn export_cargo_cfg(rbconfig: &mut RbConfig, cap: &mut File) {
|
|
|
200
237
|
cfg_capture!(cap, "cargo:engine={}", rbconfig.ruby_engine());
|
|
201
238
|
|
|
202
239
|
for key in rbconfig.all_keys() {
|
|
203
|
-
cfg_capture!(
|
|
240
|
+
cfg_capture!(
|
|
241
|
+
cap,
|
|
242
|
+
"cargo:rbconfig_{}={}",
|
|
243
|
+
key,
|
|
244
|
+
rbconfig.get(key).expect("key")
|
|
245
|
+
);
|
|
204
246
|
}
|
|
205
247
|
|
|
206
248
|
if is_ruby_static_enabled(rbconfig) {
|
|
@@ -239,8 +281,6 @@ fn expose_cargo_features(cap: &mut File) {
|
|
|
239
281
|
|
|
240
282
|
fn warn_deprecated_feature_flags() {
|
|
241
283
|
if cfg!(feature = "ruby-macros") {
|
|
242
|
-
println!(
|
|
243
|
-
"cargo:warning=The \"ruby-macros\" feature flag is deprecated and will be removed in a future release. Please use \"stable-api\" instead."
|
|
244
|
-
);
|
|
284
|
+
println!("cargo:warning=The \"ruby-macros\" feature flag is deprecated and will be removed in a future release. Please use \"stable-api\" instead.");
|
|
245
285
|
}
|
|
246
286
|
}
|
|
@@ -2,7 +2,7 @@ use rb_sys_build::{RbConfig, RubyEngine};
|
|
|
2
2
|
|
|
3
3
|
use crate::{
|
|
4
4
|
features::is_env_variable_defined,
|
|
5
|
-
version::{
|
|
5
|
+
version::{Version, MIN_SUPPORTED_STABLE_VERSION},
|
|
6
6
|
};
|
|
7
7
|
use std::{convert::TryFrom, error::Error, path::Path};
|
|
8
8
|
|
|
@@ -27,7 +27,9 @@ enum Strategy {
|
|
|
27
27
|
impl TryFrom<(RubyEngine, Version)> for Strategy {
|
|
28
28
|
type Error = Box<dyn Error>;
|
|
29
29
|
|
|
30
|
-
fn try_from(
|
|
30
|
+
fn try_from(
|
|
31
|
+
(engine, current_ruby_version): (RubyEngine, Version),
|
|
32
|
+
) -> Result<Self, Self::Error> {
|
|
31
33
|
let mut strategy = None;
|
|
32
34
|
|
|
33
35
|
match engine {
|
|
@@ -23,7 +23,9 @@ impl Version {
|
|
|
23
23
|
|
|
24
24
|
pub fn current(rbconfig: &RbConfig) -> Version {
|
|
25
25
|
match (rbconfig.get("MAJOR"), rbconfig.get("MINOR")) {
|
|
26
|
-
(Some(major), Some(minor)) =>
|
|
26
|
+
(Some(major), Some(minor)) => {
|
|
27
|
+
Version::new(major.parse::<u32>().unwrap(), minor.parse::<u32>().unwrap())
|
|
28
|
+
}
|
|
27
29
|
_ => {
|
|
28
30
|
// Try to parse out the first 3 components of the version string (for truffleruby)
|
|
29
31
|
let version_string = rbconfig.get("ruby_version").expect("ruby_version");
|
data/vendor/rb-sys/src/lib.rs
CHANGED
data/vendor/rb-sys/src/macros.rs
CHANGED
|
@@ -13,11 +13,11 @@
|
|
|
13
13
|
#![allow(non_upper_case_globals)]
|
|
14
14
|
#![allow(non_snake_case)]
|
|
15
15
|
|
|
16
|
-
use crate::StableApiDefinition;
|
|
17
|
-
use crate::VALUE;
|
|
18
16
|
use crate::rb_data_type_t;
|
|
19
17
|
use crate::ruby_value_type;
|
|
20
18
|
use crate::stable_api::get_default as api;
|
|
19
|
+
use crate::StableApiDefinition;
|
|
20
|
+
use crate::VALUE;
|
|
21
21
|
use std::ffi::c_void;
|
|
22
22
|
use std::os::raw::{c_char, c_long};
|
|
23
23
|
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
use std::ffi::c_long;
|
|
11
11
|
|
|
12
|
-
use crate::{
|
|
12
|
+
use crate::{ruby_special_consts, VALUE};
|
|
13
13
|
|
|
14
14
|
pub const Qfalse: ruby_special_consts = ruby_special_consts::RUBY_Qfalse;
|
|
15
15
|
pub const Qtrue: ruby_special_consts = ruby_special_consts::RUBY_Qtrue;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
|
-
use crate::{
|
|
2
|
+
use crate::{ruby_value_type, timeval, RUBY_API_VERSION_MAJOR, RUBY_API_VERSION_MINOR, VALUE};
|
|
3
3
|
use std::{
|
|
4
4
|
ffi::c_void,
|
|
5
5
|
os::raw::{c_char, c_long},
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
2
|
use crate::{
|
|
3
|
-
|
|
3
|
+
debug_ruby_assert_type,
|
|
4
4
|
internal::{RArray, RString, RTypedData},
|
|
5
5
|
ruby_value_type::RUBY_T_DATA,
|
|
6
|
-
value_type,
|
|
6
|
+
value_type, VALUE,
|
|
7
7
|
};
|
|
8
8
|
use std::{
|
|
9
9
|
ffi::c_void,
|
|
@@ -299,7 +299,11 @@ impl StableApiDefinition for Definition {
|
|
|
299
299
|
|
|
300
300
|
#[inline]
|
|
301
301
|
unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
|
|
302
|
-
debug_ruby_assert_type!(
|
|
302
|
+
debug_ruby_assert_type!(
|
|
303
|
+
obj,
|
|
304
|
+
RUBY_T_DATA,
|
|
305
|
+
"rtypeddata_type called on non-T_DATA object"
|
|
306
|
+
);
|
|
303
307
|
|
|
304
308
|
let rdata = obj as *const RTypedData;
|
|
305
309
|
(*rdata).type_
|
|
@@ -307,7 +311,11 @@ impl StableApiDefinition for Definition {
|
|
|
307
311
|
|
|
308
312
|
#[inline]
|
|
309
313
|
unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
|
|
310
|
-
debug_ruby_assert_type!(
|
|
314
|
+
debug_ruby_assert_type!(
|
|
315
|
+
obj,
|
|
316
|
+
RUBY_T_DATA,
|
|
317
|
+
"rtypeddata_get_data called on non-T_DATA object"
|
|
318
|
+
);
|
|
311
319
|
|
|
312
320
|
// For Ruby 2.7, simply return the data field
|
|
313
321
|
let rdata = obj as *const RTypedData;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use super::StableApiDefinition;
|
|
2
2
|
use crate::{
|
|
3
|
-
|
|
3
|
+
debug_ruby_assert_type,
|
|
4
4
|
internal::{RArray, RString, RTypedData},
|
|
5
5
|
ruby_value_type::RUBY_T_DATA,
|
|
6
|
-
value_type,
|
|
6
|
+
value_type, VALUE,
|
|
7
7
|
};
|
|
8
8
|
use std::{
|
|
9
9
|
ffi::c_void,
|
|
@@ -307,7 +307,11 @@ impl StableApiDefinition for Definition {
|
|
|
307
307
|
|
|
308
308
|
#[inline]
|
|
309
309
|
unsafe fn rtypeddata_type(&self, obj: VALUE) -> *const crate::rb_data_type_t {
|
|
310
|
-
debug_ruby_assert_type!(
|
|
310
|
+
debug_ruby_assert_type!(
|
|
311
|
+
obj,
|
|
312
|
+
RUBY_T_DATA,
|
|
313
|
+
"rtypeddata_type called on non-T_DATA object"
|
|
314
|
+
);
|
|
311
315
|
|
|
312
316
|
let rdata = obj as *const RTypedData;
|
|
313
317
|
(*rdata).type_
|
|
@@ -315,7 +319,11 @@ impl StableApiDefinition for Definition {
|
|
|
315
319
|
|
|
316
320
|
#[inline]
|
|
317
321
|
unsafe fn rtypeddata_get_data(&self, obj: VALUE) -> *mut c_void {
|
|
318
|
-
debug_ruby_assert_type!(
|
|
322
|
+
debug_ruby_assert_type!(
|
|
323
|
+
obj,
|
|
324
|
+
RUBY_T_DATA,
|
|
325
|
+
"rtypeddata_get_data called on non-T_DATA object"
|
|
326
|
+
);
|
|
319
327
|
|
|
320
328
|
// For Ruby 3.0 and lower, simply return the data field
|
|
321
329
|
let rdata = obj as *const RTypedData;
|