kreuzberg 4.0.0.pre.rc.13 → 4.0.0.pre.rc.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -14
- data/.rspec +3 -3
- data/.rubocop.yaml +1 -1
- data/.rubocop.yml +538 -538
- data/Gemfile +8 -8
- data/Gemfile.lock +104 -2
- data/README.md +454 -454
- data/Rakefile +33 -25
- data/Steepfile +47 -47
- data/examples/async_patterns.rb +341 -341
- data/ext/kreuzberg_rb/extconf.rb +45 -45
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -2
- data/ext/kreuzberg_rb/native/Cargo.lock +6750 -6941
- data/ext/kreuzberg_rb/native/Cargo.toml +53 -54
- data/ext/kreuzberg_rb/native/README.md +425 -425
- data/ext/kreuzberg_rb/native/build.rs +52 -15
- data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -11
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -14
- data/ext/kreuzberg_rb/native/include/strings.h +20 -20
- data/ext/kreuzberg_rb/native/include/unistd.h +47 -47
- data/ext/kreuzberg_rb/native/src/lib.rs +3158 -3158
- data/extconf.rb +28 -28
- data/kreuzberg.gemspec +214 -214
- data/lib/kreuzberg/api_proxy.rb +142 -142
- data/lib/kreuzberg/cache_api.rb +81 -81
- data/lib/kreuzberg/cli.rb +55 -55
- data/lib/kreuzberg/cli_proxy.rb +127 -127
- data/lib/kreuzberg/config.rb +724 -724
- data/lib/kreuzberg/error_context.rb +80 -80
- data/lib/kreuzberg/errors.rb +118 -118
- data/lib/kreuzberg/extraction_api.rb +340 -340
- data/lib/kreuzberg/mcp_proxy.rb +186 -186
- data/lib/kreuzberg/ocr_backend_protocol.rb +113 -113
- data/lib/kreuzberg/post_processor_protocol.rb +86 -86
- data/lib/kreuzberg/result.rb +279 -279
- data/lib/kreuzberg/setup_lib_path.rb +80 -80
- data/lib/kreuzberg/validator_protocol.rb +89 -89
- data/lib/kreuzberg/version.rb +5 -5
- data/lib/kreuzberg.rb +109 -109
- data/lib/{pdfium.dll → libpdfium.so} +0 -0
- data/sig/kreuzberg/internal.rbs +184 -184
- data/sig/kreuzberg.rbs +546 -546
- data/spec/binding/cache_spec.rb +227 -227
- data/spec/binding/cli_proxy_spec.rb +85 -85
- data/spec/binding/cli_spec.rb +55 -55
- data/spec/binding/config_spec.rb +345 -345
- data/spec/binding/config_validation_spec.rb +283 -283
- data/spec/binding/error_handling_spec.rb +213 -213
- data/spec/binding/errors_spec.rb +66 -66
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -307
- data/spec/binding/plugins/postprocessor_spec.rb +269 -269
- data/spec/binding/plugins/validator_spec.rb +274 -274
- data/spec/fixtures/config.toml +39 -39
- data/spec/fixtures/config.yaml +41 -41
- data/spec/fixtures/invalid_config.toml +4 -4
- data/spec/smoke/package_spec.rb +178 -178
- data/spec/spec_helper.rb +42 -42
- data/vendor/Cargo.toml +2 -2
- data/vendor/kreuzberg/Cargo.toml +5 -5
- data/vendor/kreuzberg/README.md +230 -230
- data/vendor/kreuzberg/benches/otel_overhead.rs +48 -48
- data/vendor/kreuzberg/build.rs +887 -843
- data/vendor/kreuzberg/src/api/error.rs +81 -81
- data/vendor/kreuzberg/src/api/handlers.rs +199 -199
- data/vendor/kreuzberg/src/api/mod.rs +87 -79
- data/vendor/kreuzberg/src/api/server.rs +353 -353
- data/vendor/kreuzberg/src/api/types.rs +170 -170
- data/vendor/kreuzberg/src/cache/mod.rs +1167 -1167
- data/vendor/kreuzberg/src/chunking/mod.rs +1877 -1877
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -220
- data/vendor/kreuzberg/src/core/batch_mode.rs +95 -95
- data/vendor/kreuzberg/src/core/config.rs +1080 -1080
- data/vendor/kreuzberg/src/core/extractor.rs +1156 -1156
- data/vendor/kreuzberg/src/core/io.rs +329 -329
- data/vendor/kreuzberg/src/core/mime.rs +605 -605
- data/vendor/kreuzberg/src/core/mod.rs +47 -47
- data/vendor/kreuzberg/src/core/pipeline.rs +1184 -1184
- data/vendor/kreuzberg/src/embeddings.rs +500 -500
- data/vendor/kreuzberg/src/error.rs +431 -431
- data/vendor/kreuzberg/src/extraction/archive.rs +954 -954
- data/vendor/kreuzberg/src/extraction/docx.rs +398 -398
- data/vendor/kreuzberg/src/extraction/email.rs +854 -854
- data/vendor/kreuzberg/src/extraction/excel.rs +688 -688
- data/vendor/kreuzberg/src/extraction/html.rs +634 -601
- data/vendor/kreuzberg/src/extraction/image.rs +491 -491
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +574 -574
- data/vendor/kreuzberg/src/extraction/markdown.rs +213 -213
- data/vendor/kreuzberg/src/extraction/mod.rs +81 -81
- data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -398
- data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -247
- data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -240
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +130 -130
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -284
- data/vendor/kreuzberg/src/extraction/pptx.rs +3100 -3100
- data/vendor/kreuzberg/src/extraction/structured.rs +490 -490
- data/vendor/kreuzberg/src/extraction/table.rs +328 -328
- data/vendor/kreuzberg/src/extraction/text.rs +269 -269
- data/vendor/kreuzberg/src/extraction/xml.rs +333 -333
- data/vendor/kreuzberg/src/extractors/archive.rs +447 -447
- data/vendor/kreuzberg/src/extractors/bibtex.rs +470 -470
- data/vendor/kreuzberg/src/extractors/docbook.rs +504 -504
- data/vendor/kreuzberg/src/extractors/docx.rs +400 -400
- data/vendor/kreuzberg/src/extractors/email.rs +157 -157
- data/vendor/kreuzberg/src/extractors/epub.rs +708 -708
- data/vendor/kreuzberg/src/extractors/excel.rs +345 -345
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +492 -492
- data/vendor/kreuzberg/src/extractors/html.rs +407 -407
- data/vendor/kreuzberg/src/extractors/image.rs +219 -219
- data/vendor/kreuzberg/src/extractors/jats.rs +1054 -1054
- data/vendor/kreuzberg/src/extractors/jupyter.rs +368 -368
- data/vendor/kreuzberg/src/extractors/latex.rs +653 -653
- data/vendor/kreuzberg/src/extractors/markdown.rs +701 -701
- data/vendor/kreuzberg/src/extractors/mod.rs +429 -429
- data/vendor/kreuzberg/src/extractors/odt.rs +628 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +635 -635
- data/vendor/kreuzberg/src/extractors/orgmode.rs +529 -529
- data/vendor/kreuzberg/src/extractors/pdf.rs +749 -749
- data/vendor/kreuzberg/src/extractors/pptx.rs +267 -267
- data/vendor/kreuzberg/src/extractors/rst.rs +577 -577
- data/vendor/kreuzberg/src/extractors/rtf.rs +809 -809
- data/vendor/kreuzberg/src/extractors/security.rs +484 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +367 -367
- data/vendor/kreuzberg/src/extractors/structured.rs +142 -142
- data/vendor/kreuzberg/src/extractors/text.rs +265 -265
- data/vendor/kreuzberg/src/extractors/typst.rs +651 -651
- data/vendor/kreuzberg/src/extractors/xml.rs +147 -147
- data/vendor/kreuzberg/src/image/dpi.rs +164 -164
- data/vendor/kreuzberg/src/image/mod.rs +6 -6
- data/vendor/kreuzberg/src/image/preprocessing.rs +417 -417
- data/vendor/kreuzberg/src/image/resize.rs +89 -89
- data/vendor/kreuzberg/src/keywords/config.rs +154 -154
- data/vendor/kreuzberg/src/keywords/mod.rs +237 -237
- data/vendor/kreuzberg/src/keywords/processor.rs +275 -275
- data/vendor/kreuzberg/src/keywords/rake.rs +293 -293
- data/vendor/kreuzberg/src/keywords/types.rs +68 -68
- data/vendor/kreuzberg/src/keywords/yake.rs +163 -163
- data/vendor/kreuzberg/src/language_detection/mod.rs +985 -985
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -219
- data/vendor/kreuzberg/src/lib.rs +113 -113
- data/vendor/kreuzberg/src/mcp/mod.rs +35 -35
- data/vendor/kreuzberg/src/mcp/server.rs +2076 -2076
- data/vendor/kreuzberg/src/ocr/cache.rs +469 -469
- data/vendor/kreuzberg/src/ocr/error.rs +37 -37
- data/vendor/kreuzberg/src/ocr/hocr.rs +216 -216
- data/vendor/kreuzberg/src/ocr/mod.rs +58 -58
- data/vendor/kreuzberg/src/ocr/processor.rs +863 -863
- data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -4
- data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -144
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +452 -452
- data/vendor/kreuzberg/src/ocr/types.rs +393 -393
- data/vendor/kreuzberg/src/ocr/utils.rs +47 -47
- data/vendor/kreuzberg/src/ocr/validation.rs +206 -206
- data/vendor/kreuzberg/src/panic_context.rs +154 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +44 -44
- data/vendor/kreuzberg/src/pdf/bundled.rs +452 -346
- data/vendor/kreuzberg/src/pdf/error.rs +130 -130
- data/vendor/kreuzberg/src/pdf/images.rs +139 -139
- data/vendor/kreuzberg/src/pdf/metadata.rs +489 -489
- data/vendor/kreuzberg/src/pdf/mod.rs +68 -68
- data/vendor/kreuzberg/src/pdf/rendering.rs +368 -368
- data/vendor/kreuzberg/src/pdf/table.rs +420 -420
- data/vendor/kreuzberg/src/pdf/text.rs +240 -240
- data/vendor/kreuzberg/src/plugins/extractor.rs +1044 -1044
- data/vendor/kreuzberg/src/plugins/mod.rs +212 -212
- data/vendor/kreuzberg/src/plugins/ocr.rs +639 -639
- data/vendor/kreuzberg/src/plugins/processor.rs +650 -650
- data/vendor/kreuzberg/src/plugins/registry.rs +1339 -1339
- data/vendor/kreuzberg/src/plugins/traits.rs +258 -258
- data/vendor/kreuzberg/src/plugins/validator.rs +967 -967
- data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -1470
- data/vendor/kreuzberg/src/text/mod.rs +25 -25
- data/vendor/kreuzberg/src/text/quality.rs +697 -697
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -219
- data/vendor/kreuzberg/src/text/string_utils.rs +217 -217
- data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -164
- data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -100
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +796 -796
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +902 -902
- data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -160
- data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -619
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +147 -147
- data/vendor/kreuzberg/src/types.rs +1055 -1055
- data/vendor/kreuzberg/src/utils/mod.rs +17 -17
- data/vendor/kreuzberg/src/utils/quality.rs +959 -959
- data/vendor/kreuzberg/src/utils/string_utils.rs +381 -381
- data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -53
- data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -482
- data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -261
- data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -400
- data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -1205
- data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -280
- data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -425
- data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -172
- data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -622
- data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -1300
- data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -175
- data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -734
- data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -37
- data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -100
- data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -801
- data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -693
- data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -111
- data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -162
- data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -226
- data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -41
- data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -196
- data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -227
- data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -181
- data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -791
- data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -47
- data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -760
- data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -634
- data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -136
- data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -84
- data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -681
- data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -64
- data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -51
- data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -476
- data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -163
- data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -1
- data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -101
- data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -477
- data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -490
- data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -415
- data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -223
- data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -331
- data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -562
- data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -436
- data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -561
- data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -193
- data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -448
- data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -32
- data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -33
- data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -76
- data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -129
- data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -54
- data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -118
- data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -149
- data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -506
- data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -75
- data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -519
- data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -647
- data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -62
- data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -796
- data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -31
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +52 -52
- data/vendor/kreuzberg/tests/api_tests.rs +966 -966
- data/vendor/kreuzberg/tests/archive_integration.rs +545 -545
- data/vendor/kreuzberg/tests/batch_orchestration.rs +556 -556
- data/vendor/kreuzberg/tests/batch_processing.rs +318 -318
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +421 -421
- data/vendor/kreuzberg/tests/concurrency_stress.rs +533 -533
- data/vendor/kreuzberg/tests/config_features.rs +612 -612
- data/vendor/kreuzberg/tests/config_loading_tests.rs +416 -416
- data/vendor/kreuzberg/tests/core_integration.rs +510 -510
- data/vendor/kreuzberg/tests/csv_integration.rs +414 -414
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +500 -500
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -122
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -370
- data/vendor/kreuzberg/tests/email_integration.rs +327 -327
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +275 -275
- data/vendor/kreuzberg/tests/error_handling.rs +402 -402
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -228
- data/vendor/kreuzberg/tests/format_integration.rs +165 -164
- data/vendor/kreuzberg/tests/helpers/mod.rs +142 -142
- data/vendor/kreuzberg/tests/html_table_test.rs +551 -551
- data/vendor/kreuzberg/tests/image_integration.rs +255 -255
- data/vendor/kreuzberg/tests/instrumentation_test.rs +139 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +639 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +704 -704
- data/vendor/kreuzberg/tests/keywords_integration.rs +479 -479
- data/vendor/kreuzberg/tests/keywords_quality.rs +509 -509
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +496 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +490 -490
- data/vendor/kreuzberg/tests/mime_detection.rs +429 -429
- data/vendor/kreuzberg/tests/ocr_configuration.rs +514 -514
- data/vendor/kreuzberg/tests/ocr_errors.rs +698 -698
- data/vendor/kreuzberg/tests/ocr_quality.rs +629 -629
- data/vendor/kreuzberg/tests/ocr_stress.rs +469 -469
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +674 -674
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +616 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +822 -822
- data/vendor/kreuzberg/tests/pdf_integration.rs +45 -45
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -374
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1436 -1436
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -776
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +560 -560
- data/vendor/kreuzberg/tests/plugin_system.rs +927 -927
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -783
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +587 -587
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +694 -694
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +775 -775
- data/vendor/kreuzberg/tests/security_validation.rs +416 -416
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -888
- data/vendor/kreuzberg/tests/test_fastembed.rs +631 -631
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1260 -1260
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +648 -648
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -87
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -13
- data/vendor/kreuzberg-tesseract/.crate-ignore +2 -2
- data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -2933
- data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
- data/vendor/kreuzberg-tesseract/LICENSE +22 -22
- data/vendor/kreuzberg-tesseract/README.md +399 -399
- data/vendor/kreuzberg-tesseract/build.rs +1354 -1354
- data/vendor/kreuzberg-tesseract/patches/README.md +71 -71
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -199
- data/vendor/kreuzberg-tesseract/src/api.rs +1371 -1371
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -77
- data/vendor/kreuzberg-tesseract/src/enums.rs +297 -297
- data/vendor/kreuzberg-tesseract/src/error.rs +81 -81
- data/vendor/kreuzberg-tesseract/src/lib.rs +145 -145
- data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -57
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -197
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -253
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -286
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -183
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -211
- data/vendor/rb-sys/.cargo_vcs_info.json +5 -5
- data/vendor/rb-sys/Cargo.lock +393 -393
- data/vendor/rb-sys/Cargo.toml +70 -70
- data/vendor/rb-sys/Cargo.toml.orig +57 -57
- data/vendor/rb-sys/LICENSE-APACHE +190 -190
- data/vendor/rb-sys/LICENSE-MIT +21 -21
- data/vendor/rb-sys/build/features.rs +111 -111
- data/vendor/rb-sys/build/main.rs +286 -286
- data/vendor/rb-sys/build/stable_api_config.rs +155 -155
- data/vendor/rb-sys/build/version.rs +50 -50
- data/vendor/rb-sys/readme.md +36 -36
- data/vendor/rb-sys/src/bindings.rs +21 -21
- data/vendor/rb-sys/src/hidden.rs +11 -11
- data/vendor/rb-sys/src/lib.rs +35 -35
- data/vendor/rb-sys/src/macros.rs +371 -371
- data/vendor/rb-sys/src/memory.rs +53 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +38 -38
- data/vendor/rb-sys/src/special_consts.rs +31 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +179 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +257 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +324 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +332 -332
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +325 -325
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +323 -323
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +339 -339
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +339 -339
- data/vendor/rb-sys/src/stable_api.rs +260 -260
- data/vendor/rb-sys/src/symbol.rs +31 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +330 -330
- data/vendor/rb-sys/src/utils.rs +89 -89
- data/vendor/rb-sys/src/value_type.rs +7 -7
- metadata +81 -22
- data/vendor/kreuzberg-ffi/Cargo.toml +0 -63
- data/vendor/kreuzberg-ffi/README.md +0 -851
- data/vendor/kreuzberg-ffi/build.rs +0 -176
- data/vendor/kreuzberg-ffi/cbindgen.toml +0 -27
- data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +0 -1087
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -3616
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -247
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
|
@@ -1,330 +1,330 @@
|
|
|
1
|
-
//! Support for reporting Rust memory usage to the Ruby GC.
|
|
2
|
-
|
|
3
|
-
use std::{
|
|
4
|
-
fmt::Formatter,
|
|
5
|
-
sync::{
|
|
6
|
-
atomic::{AtomicIsize, Ordering},
|
|
7
|
-
Arc,
|
|
8
|
-
},
|
|
9
|
-
};
|
|
10
|
-
|
|
11
|
-
#[cfg(ruby_engine = "mri")]
|
|
12
|
-
mod mri {
|
|
13
|
-
use crate::{rb_gc_adjust_memory_usage, utils::is_ruby_vm_started};
|
|
14
|
-
use std::alloc::{GlobalAlloc, Layout, System};
|
|
15
|
-
|
|
16
|
-
/// A simple wrapper over [`System`] which reports memory usage to
|
|
17
|
-
/// the Ruby GC. This gives the GC a more accurate picture of the process'
|
|
18
|
-
/// memory usage so it can make better decisions about when to run.
|
|
19
|
-
#[derive(Debug)]
|
|
20
|
-
pub struct TrackingAllocator;
|
|
21
|
-
|
|
22
|
-
impl TrackingAllocator {
|
|
23
|
-
/// Create a new [`TrackingAllocator`].
|
|
24
|
-
#[allow(clippy::new_without_default)]
|
|
25
|
-
pub const fn new() -> Self {
|
|
26
|
-
Self
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/// Create a new [`TrackingAllocator`] with default values.
|
|
30
|
-
pub const fn default() -> Self {
|
|
31
|
-
Self::new()
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/// Adjust the memory usage reported to the Ruby GC by `delta`. Useful for
|
|
35
|
-
/// tracking allocations invisible to the Rust allocator, such as `mmap` or
|
|
36
|
-
/// direct `malloc` calls.
|
|
37
|
-
///
|
|
38
|
-
/// # Example
|
|
39
|
-
/// ```
|
|
40
|
-
/// use rb_sys::TrackingAllocator;
|
|
41
|
-
///
|
|
42
|
-
/// // Allocate 1024 bytes of memory using `mmap` or `malloc`...
|
|
43
|
-
/// TrackingAllocator::adjust_memory_usage(1024);
|
|
44
|
-
///
|
|
45
|
-
/// // ...and then after the memory is freed, adjust the memory usage again.
|
|
46
|
-
/// TrackingAllocator::adjust_memory_usage(-1024);
|
|
47
|
-
/// ```
|
|
48
|
-
#[inline]
|
|
49
|
-
pub fn adjust_memory_usage(delta: isize) -> isize {
|
|
50
|
-
if delta == 0 {
|
|
51
|
-
return 0;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
#[cfg(target_pointer_width = "32")]
|
|
55
|
-
let delta = delta as i32;
|
|
56
|
-
|
|
57
|
-
#[cfg(target_pointer_width = "64")]
|
|
58
|
-
let delta = delta as i64;
|
|
59
|
-
|
|
60
|
-
unsafe {
|
|
61
|
-
if is_ruby_vm_started() {
|
|
62
|
-
#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
|
|
63
|
-
rb_gc_adjust_memory_usage(delta as i32);
|
|
64
|
-
|
|
65
|
-
#[cfg(not(all(target_pointer_width = "64", target_os = "windows")))]
|
|
66
|
-
rb_gc_adjust_memory_usage(delta);
|
|
67
|
-
delta as isize
|
|
68
|
-
} else {
|
|
69
|
-
0
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
unsafe impl GlobalAlloc for TrackingAllocator {
|
|
76
|
-
#[inline]
|
|
77
|
-
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
78
|
-
let ret = System.alloc(layout);
|
|
79
|
-
let delta = layout.size() as isize;
|
|
80
|
-
|
|
81
|
-
if !ret.is_null() && delta != 0 {
|
|
82
|
-
Self::adjust_memory_usage(delta);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
ret
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
#[inline]
|
|
89
|
-
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
|
|
90
|
-
let ret = System.alloc_zeroed(layout);
|
|
91
|
-
let delta = layout.size() as isize;
|
|
92
|
-
|
|
93
|
-
if !ret.is_null() && delta != 0 {
|
|
94
|
-
Self::adjust_memory_usage(delta);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
ret
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
#[inline]
|
|
101
|
-
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
102
|
-
System.dealloc(ptr, layout);
|
|
103
|
-
let delta = -(layout.size() as isize);
|
|
104
|
-
|
|
105
|
-
if delta != 0 {
|
|
106
|
-
Self::adjust_memory_usage(delta);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
#[inline]
|
|
111
|
-
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
|
|
112
|
-
let ret = System.realloc(ptr, layout, new_size);
|
|
113
|
-
let delta = new_size as isize - layout.size() as isize;
|
|
114
|
-
|
|
115
|
-
if !ret.is_null() && delta != 0 {
|
|
116
|
-
Self::adjust_memory_usage(delta);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
ret
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
#[cfg(not(ruby_engine = "mri"))]
|
|
125
|
-
mod non_mri {
|
|
126
|
-
use std::alloc::{GlobalAlloc, Layout, System};
|
|
127
|
-
|
|
128
|
-
/// A simple wrapper over [`System`] as a fallback for non-MRI Ruby engines.
|
|
129
|
-
pub struct TrackingAllocator;
|
|
130
|
-
|
|
131
|
-
impl TrackingAllocator {
|
|
132
|
-
#[allow(clippy::new_without_default)]
|
|
133
|
-
pub const fn new() -> Self {
|
|
134
|
-
Self
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
pub const fn default() -> Self {
|
|
138
|
-
Self::new()
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
pub fn adjust_memory_usage(_delta: isize) -> isize {
|
|
142
|
-
0
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
unsafe impl GlobalAlloc for TrackingAllocator {
|
|
147
|
-
#[inline]
|
|
148
|
-
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
149
|
-
System.alloc(layout)
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
#[inline]
|
|
153
|
-
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
|
|
154
|
-
System.alloc_zeroed(layout)
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
#[inline]
|
|
158
|
-
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
159
|
-
System.dealloc(ptr, layout)
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
#[inline]
|
|
163
|
-
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
|
|
164
|
-
System.realloc(ptr, layout, new_size)
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
#[cfg(ruby_engine = "mri")]
|
|
170
|
-
pub use mri::*;
|
|
171
|
-
|
|
172
|
-
#[cfg(not(ruby_engine = "mri"))]
|
|
173
|
-
pub use non_mri::*;
|
|
174
|
-
|
|
175
|
-
/// Set the global allocator to [`TrackingAllocator`].
|
|
176
|
-
///
|
|
177
|
-
/// # Example
|
|
178
|
-
/// ```
|
|
179
|
-
/// // File: ext/my_gem/src/lib.rs
|
|
180
|
-
/// use rb_sys::set_global_tracking_allocator;
|
|
181
|
-
///
|
|
182
|
-
/// set_global_tracking_allocator!();
|
|
183
|
-
/// ```
|
|
184
|
-
#[macro_export]
|
|
185
|
-
macro_rules! set_global_tracking_allocator {
|
|
186
|
-
() => {
|
|
187
|
-
#[global_allocator]
|
|
188
|
-
static RUBY_GLOBAL_TRACKING_ALLOCATOR: $crate::tracking_allocator::TrackingAllocator =
|
|
189
|
-
$crate::tracking_allocator::TrackingAllocator;
|
|
190
|
-
};
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
#[derive(Debug)]
|
|
194
|
-
#[repr(transparent)]
|
|
195
|
-
struct MemsizeDelta(Arc<AtomicIsize>);
|
|
196
|
-
|
|
197
|
-
impl MemsizeDelta {
|
|
198
|
-
fn new(delta: isize) -> Self {
|
|
199
|
-
let delta = TrackingAllocator::adjust_memory_usage(delta);
|
|
200
|
-
Self(Arc::new(AtomicIsize::new(delta)))
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
fn add(&self, delta: usize) {
|
|
204
|
-
if delta == 0 {
|
|
205
|
-
return;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
let delta = TrackingAllocator::adjust_memory_usage(delta as _);
|
|
209
|
-
self.0.fetch_add(delta as _, Ordering::SeqCst);
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
fn sub(&self, delta: usize) {
|
|
213
|
-
if delta == 0 {
|
|
214
|
-
return;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
let delta = TrackingAllocator::adjust_memory_usage(-(delta as isize));
|
|
218
|
-
self.0.fetch_add(delta, Ordering::SeqCst);
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
fn get(&self) -> isize {
|
|
222
|
-
self.0.load(Ordering::SeqCst)
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
impl Clone for MemsizeDelta {
|
|
227
|
-
fn clone(&self) -> Self {
|
|
228
|
-
Self(Arc::clone(&self.0))
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
impl Drop for MemsizeDelta {
|
|
233
|
-
fn drop(&mut self) {
|
|
234
|
-
let memsize = self.0.swap(0, Ordering::SeqCst);
|
|
235
|
-
TrackingAllocator::adjust_memory_usage(0 - memsize);
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
/// A guard which adjusts the memory usage reported to the Ruby GC by `delta`.
|
|
240
|
-
/// This allows you to track resources which are invisible to the Rust
|
|
241
|
-
/// allocator, such as items that are known to internally use `mmap` or direct
|
|
242
|
-
/// `malloc` in their implementation.
|
|
243
|
-
///
|
|
244
|
-
/// Internally, it uses an [`Arc<AtomicIsize>`] to track the memory usage delta,
|
|
245
|
-
/// and is safe to clone when `T` is [`Clone`].
|
|
246
|
-
///
|
|
247
|
-
/// # Example
|
|
248
|
-
/// ```
|
|
249
|
-
/// use rb_sys::tracking_allocator::ManuallyTracked;
|
|
250
|
-
///
|
|
251
|
-
/// type SomethingThatUsedMmap = ();
|
|
252
|
-
///
|
|
253
|
-
/// // Will tell the Ruby GC that 1024 bytes were allocated.
|
|
254
|
-
/// let item = ManuallyTracked::new(SomethingThatUsedMmap, 1024);
|
|
255
|
-
///
|
|
256
|
-
/// // Will tell the Ruby GC that 1024 bytes were freed.
|
|
257
|
-
/// std::mem::drop(item);
|
|
258
|
-
/// ```
|
|
259
|
-
pub struct ManuallyTracked<T> {
|
|
260
|
-
item: T,
|
|
261
|
-
memsize_delta: MemsizeDelta,
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
impl<T> ManuallyTracked<T> {
|
|
265
|
-
/// Create a new `ManuallyTracked<T>`, and immediately report that `memsize`
|
|
266
|
-
/// bytes were allocated.
|
|
267
|
-
pub fn wrap(item: T, memsize: usize) -> Self {
|
|
268
|
-
Self {
|
|
269
|
-
item,
|
|
270
|
-
memsize_delta: MemsizeDelta::new(memsize as _),
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
/// Increase the memory usage reported to the Ruby GC by `memsize` bytes.
|
|
275
|
-
pub fn increase_memory_usage(&self, memsize: usize) {
|
|
276
|
-
self.memsize_delta.add(memsize);
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
/// Decrease the memory usage reported to the Ruby GC by `memsize` bytes.
|
|
280
|
-
pub fn decrease_memory_usage(&self, memsize: usize) {
|
|
281
|
-
self.memsize_delta.sub(memsize);
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
/// Get the current memory usage delta.
|
|
285
|
-
pub fn memsize_delta(&self) -> isize {
|
|
286
|
-
self.memsize_delta.get()
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
/// Get a shared reference to the inner `T`.
|
|
290
|
-
pub fn get(&self) -> &T {
|
|
291
|
-
&self.item
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
/// Get a mutable reference to the inner `T`.
|
|
295
|
-
pub fn get_mut(&mut self) -> &mut T {
|
|
296
|
-
&mut self.item
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
impl ManuallyTracked<()> {
|
|
301
|
-
/// Create a new `ManuallyTracked<()>`, and immediately report that
|
|
302
|
-
/// `memsize` bytes were allocated.
|
|
303
|
-
pub fn new(memsize: usize) -> Self {
|
|
304
|
-
Self::wrap((), memsize)
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
impl Default for ManuallyTracked<()> {
|
|
309
|
-
fn default() -> Self {
|
|
310
|
-
Self::wrap((), 0)
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
impl<T: Clone> Clone for ManuallyTracked<T> {
|
|
315
|
-
fn clone(&self) -> Self {
|
|
316
|
-
Self {
|
|
317
|
-
item: self.item.clone(),
|
|
318
|
-
memsize_delta: self.memsize_delta.clone(),
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
impl<T: std::fmt::Debug> std::fmt::Debug for ManuallyTracked<T> {
|
|
324
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
325
|
-
f.debug_struct("ManuallyTracked")
|
|
326
|
-
.field("item", &self.item)
|
|
327
|
-
.field("memsize_delta", &self.memsize_delta)
|
|
328
|
-
.finish()
|
|
329
|
-
}
|
|
330
|
-
}
|
|
1
|
+
//! Support for reporting Rust memory usage to the Ruby GC.
|
|
2
|
+
|
|
3
|
+
use std::{
|
|
4
|
+
fmt::Formatter,
|
|
5
|
+
sync::{
|
|
6
|
+
atomic::{AtomicIsize, Ordering},
|
|
7
|
+
Arc,
|
|
8
|
+
},
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
#[cfg(ruby_engine = "mri")]
|
|
12
|
+
mod mri {
|
|
13
|
+
use crate::{rb_gc_adjust_memory_usage, utils::is_ruby_vm_started};
|
|
14
|
+
use std::alloc::{GlobalAlloc, Layout, System};
|
|
15
|
+
|
|
16
|
+
/// A simple wrapper over [`System`] which reports memory usage to
|
|
17
|
+
/// the Ruby GC. This gives the GC a more accurate picture of the process'
|
|
18
|
+
/// memory usage so it can make better decisions about when to run.
|
|
19
|
+
#[derive(Debug)]
|
|
20
|
+
pub struct TrackingAllocator;
|
|
21
|
+
|
|
22
|
+
impl TrackingAllocator {
|
|
23
|
+
/// Create a new [`TrackingAllocator`].
|
|
24
|
+
#[allow(clippy::new_without_default)]
|
|
25
|
+
pub const fn new() -> Self {
|
|
26
|
+
Self
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/// Create a new [`TrackingAllocator`] with default values.
|
|
30
|
+
pub const fn default() -> Self {
|
|
31
|
+
Self::new()
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/// Adjust the memory usage reported to the Ruby GC by `delta`. Useful for
|
|
35
|
+
/// tracking allocations invisible to the Rust allocator, such as `mmap` or
|
|
36
|
+
/// direct `malloc` calls.
|
|
37
|
+
///
|
|
38
|
+
/// # Example
|
|
39
|
+
/// ```
|
|
40
|
+
/// use rb_sys::TrackingAllocator;
|
|
41
|
+
///
|
|
42
|
+
/// // Allocate 1024 bytes of memory using `mmap` or `malloc`...
|
|
43
|
+
/// TrackingAllocator::adjust_memory_usage(1024);
|
|
44
|
+
///
|
|
45
|
+
/// // ...and then after the memory is freed, adjust the memory usage again.
|
|
46
|
+
/// TrackingAllocator::adjust_memory_usage(-1024);
|
|
47
|
+
/// ```
|
|
48
|
+
#[inline]
|
|
49
|
+
pub fn adjust_memory_usage(delta: isize) -> isize {
|
|
50
|
+
if delta == 0 {
|
|
51
|
+
return 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
#[cfg(target_pointer_width = "32")]
|
|
55
|
+
let delta = delta as i32;
|
|
56
|
+
|
|
57
|
+
#[cfg(target_pointer_width = "64")]
|
|
58
|
+
let delta = delta as i64;
|
|
59
|
+
|
|
60
|
+
unsafe {
|
|
61
|
+
if is_ruby_vm_started() {
|
|
62
|
+
#[cfg(all(target_pointer_width = "64", target_os = "windows"))]
|
|
63
|
+
rb_gc_adjust_memory_usage(delta as i32);
|
|
64
|
+
|
|
65
|
+
#[cfg(not(all(target_pointer_width = "64", target_os = "windows")))]
|
|
66
|
+
rb_gc_adjust_memory_usage(delta);
|
|
67
|
+
delta as isize
|
|
68
|
+
} else {
|
|
69
|
+
0
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
unsafe impl GlobalAlloc for TrackingAllocator {
|
|
76
|
+
#[inline]
|
|
77
|
+
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
78
|
+
let ret = System.alloc(layout);
|
|
79
|
+
let delta = layout.size() as isize;
|
|
80
|
+
|
|
81
|
+
if !ret.is_null() && delta != 0 {
|
|
82
|
+
Self::adjust_memory_usage(delta);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
ret
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
#[inline]
|
|
89
|
+
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
|
|
90
|
+
let ret = System.alloc_zeroed(layout);
|
|
91
|
+
let delta = layout.size() as isize;
|
|
92
|
+
|
|
93
|
+
if !ret.is_null() && delta != 0 {
|
|
94
|
+
Self::adjust_memory_usage(delta);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
ret
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
#[inline]
|
|
101
|
+
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
102
|
+
System.dealloc(ptr, layout);
|
|
103
|
+
let delta = -(layout.size() as isize);
|
|
104
|
+
|
|
105
|
+
if delta != 0 {
|
|
106
|
+
Self::adjust_memory_usage(delta);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
#[inline]
|
|
111
|
+
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
|
|
112
|
+
let ret = System.realloc(ptr, layout, new_size);
|
|
113
|
+
let delta = new_size as isize - layout.size() as isize;
|
|
114
|
+
|
|
115
|
+
if !ret.is_null() && delta != 0 {
|
|
116
|
+
Self::adjust_memory_usage(delta);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
ret
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
#[cfg(not(ruby_engine = "mri"))]
|
|
125
|
+
mod non_mri {
|
|
126
|
+
use std::alloc::{GlobalAlloc, Layout, System};
|
|
127
|
+
|
|
128
|
+
/// A simple wrapper over [`System`] as a fallback for non-MRI Ruby engines.
|
|
129
|
+
pub struct TrackingAllocator;
|
|
130
|
+
|
|
131
|
+
impl TrackingAllocator {
|
|
132
|
+
#[allow(clippy::new_without_default)]
|
|
133
|
+
pub const fn new() -> Self {
|
|
134
|
+
Self
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
pub const fn default() -> Self {
|
|
138
|
+
Self::new()
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
pub fn adjust_memory_usage(_delta: isize) -> isize {
|
|
142
|
+
0
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
unsafe impl GlobalAlloc for TrackingAllocator {
|
|
147
|
+
#[inline]
|
|
148
|
+
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
149
|
+
System.alloc(layout)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
#[inline]
|
|
153
|
+
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
|
|
154
|
+
System.alloc_zeroed(layout)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
#[inline]
|
|
158
|
+
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
159
|
+
System.dealloc(ptr, layout)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
#[inline]
|
|
163
|
+
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
|
|
164
|
+
System.realloc(ptr, layout, new_size)
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
#[cfg(ruby_engine = "mri")]
|
|
170
|
+
pub use mri::*;
|
|
171
|
+
|
|
172
|
+
#[cfg(not(ruby_engine = "mri"))]
|
|
173
|
+
pub use non_mri::*;
|
|
174
|
+
|
|
175
|
+
/// Set the global allocator to [`TrackingAllocator`].
|
|
176
|
+
///
|
|
177
|
+
/// # Example
|
|
178
|
+
/// ```
|
|
179
|
+
/// // File: ext/my_gem/src/lib.rs
|
|
180
|
+
/// use rb_sys::set_global_tracking_allocator;
|
|
181
|
+
///
|
|
182
|
+
/// set_global_tracking_allocator!();
|
|
183
|
+
/// ```
|
|
184
|
+
#[macro_export]
|
|
185
|
+
macro_rules! set_global_tracking_allocator {
|
|
186
|
+
() => {
|
|
187
|
+
#[global_allocator]
|
|
188
|
+
static RUBY_GLOBAL_TRACKING_ALLOCATOR: $crate::tracking_allocator::TrackingAllocator =
|
|
189
|
+
$crate::tracking_allocator::TrackingAllocator;
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
#[derive(Debug)]
|
|
194
|
+
#[repr(transparent)]
|
|
195
|
+
struct MemsizeDelta(Arc<AtomicIsize>);
|
|
196
|
+
|
|
197
|
+
impl MemsizeDelta {
|
|
198
|
+
fn new(delta: isize) -> Self {
|
|
199
|
+
let delta = TrackingAllocator::adjust_memory_usage(delta);
|
|
200
|
+
Self(Arc::new(AtomicIsize::new(delta)))
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
fn add(&self, delta: usize) {
|
|
204
|
+
if delta == 0 {
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
let delta = TrackingAllocator::adjust_memory_usage(delta as _);
|
|
209
|
+
self.0.fetch_add(delta as _, Ordering::SeqCst);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
fn sub(&self, delta: usize) {
|
|
213
|
+
if delta == 0 {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
let delta = TrackingAllocator::adjust_memory_usage(-(delta as isize));
|
|
218
|
+
self.0.fetch_add(delta, Ordering::SeqCst);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
fn get(&self) -> isize {
|
|
222
|
+
self.0.load(Ordering::SeqCst)
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
impl Clone for MemsizeDelta {
|
|
227
|
+
fn clone(&self) -> Self {
|
|
228
|
+
Self(Arc::clone(&self.0))
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
impl Drop for MemsizeDelta {
|
|
233
|
+
fn drop(&mut self) {
|
|
234
|
+
let memsize = self.0.swap(0, Ordering::SeqCst);
|
|
235
|
+
TrackingAllocator::adjust_memory_usage(0 - memsize);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/// A guard which adjusts the memory usage reported to the Ruby GC by `delta`.
|
|
240
|
+
/// This allows you to track resources which are invisible to the Rust
|
|
241
|
+
/// allocator, such as items that are known to internally use `mmap` or direct
|
|
242
|
+
/// `malloc` in their implementation.
|
|
243
|
+
///
|
|
244
|
+
/// Internally, it uses an [`Arc<AtomicIsize>`] to track the memory usage delta,
|
|
245
|
+
/// and is safe to clone when `T` is [`Clone`].
|
|
246
|
+
///
|
|
247
|
+
/// # Example
|
|
248
|
+
/// ```
|
|
249
|
+
/// use rb_sys::tracking_allocator::ManuallyTracked;
|
|
250
|
+
///
|
|
251
|
+
/// type SomethingThatUsedMmap = ();
|
|
252
|
+
///
|
|
253
|
+
/// // Will tell the Ruby GC that 1024 bytes were allocated.
|
|
254
|
+
/// let item = ManuallyTracked::new(SomethingThatUsedMmap, 1024);
|
|
255
|
+
///
|
|
256
|
+
/// // Will tell the Ruby GC that 1024 bytes were freed.
|
|
257
|
+
/// std::mem::drop(item);
|
|
258
|
+
/// ```
|
|
259
|
+
pub struct ManuallyTracked<T> {
|
|
260
|
+
item: T,
|
|
261
|
+
memsize_delta: MemsizeDelta,
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
impl<T> ManuallyTracked<T> {
|
|
265
|
+
/// Create a new `ManuallyTracked<T>`, and immediately report that `memsize`
|
|
266
|
+
/// bytes were allocated.
|
|
267
|
+
pub fn wrap(item: T, memsize: usize) -> Self {
|
|
268
|
+
Self {
|
|
269
|
+
item,
|
|
270
|
+
memsize_delta: MemsizeDelta::new(memsize as _),
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/// Increase the memory usage reported to the Ruby GC by `memsize` bytes.
|
|
275
|
+
pub fn increase_memory_usage(&self, memsize: usize) {
|
|
276
|
+
self.memsize_delta.add(memsize);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/// Decrease the memory usage reported to the Ruby GC by `memsize` bytes.
|
|
280
|
+
pub fn decrease_memory_usage(&self, memsize: usize) {
|
|
281
|
+
self.memsize_delta.sub(memsize);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/// Get the current memory usage delta.
|
|
285
|
+
pub fn memsize_delta(&self) -> isize {
|
|
286
|
+
self.memsize_delta.get()
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/// Get a shared reference to the inner `T`.
|
|
290
|
+
pub fn get(&self) -> &T {
|
|
291
|
+
&self.item
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/// Get a mutable reference to the inner `T`.
|
|
295
|
+
pub fn get_mut(&mut self) -> &mut T {
|
|
296
|
+
&mut self.item
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
impl ManuallyTracked<()> {
|
|
301
|
+
/// Create a new `ManuallyTracked<()>`, and immediately report that
|
|
302
|
+
/// `memsize` bytes were allocated.
|
|
303
|
+
pub fn new(memsize: usize) -> Self {
|
|
304
|
+
Self::wrap((), memsize)
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
impl Default for ManuallyTracked<()> {
|
|
309
|
+
fn default() -> Self {
|
|
310
|
+
Self::wrap((), 0)
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
impl<T: Clone> Clone for ManuallyTracked<T> {
|
|
315
|
+
fn clone(&self) -> Self {
|
|
316
|
+
Self {
|
|
317
|
+
item: self.item.clone(),
|
|
318
|
+
memsize_delta: self.memsize_delta.clone(),
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
impl<T: std::fmt::Debug> std::fmt::Debug for ManuallyTracked<T> {
|
|
324
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
325
|
+
f.debug_struct("ManuallyTracked")
|
|
326
|
+
.field("item", &self.item)
|
|
327
|
+
.field("memsize_delta", &self.memsize_delta)
|
|
328
|
+
.finish()
|
|
329
|
+
}
|
|
330
|
+
}
|