osv 0.3.15 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Cargo.lock +11 -1
 - data/README.md +27 -27
 - data/ext/osv/Cargo.toml +1 -0
 - data/ext/osv/src/csv/builder.rs +92 -85
 - data/ext/osv/src/csv/header_cache.rs +105 -26
 - data/ext/osv/src/csv/mod.rs +2 -2
 - data/ext/osv/src/csv/parser.rs +22 -85
 - data/ext/osv/src/csv/record.rs +25 -8
 - data/ext/osv/src/csv/record_reader.rs +53 -118
 - data/ext/osv/src/csv/ruby_integration.rs +10 -21
 - data/ext/osv/src/csv/ruby_reader.rs +9 -4
 - data/ext/osv/src/reader.rs +64 -46
 - data/ext/osv/src/utils.rs +4 -12
 - data/lib/osv/version.rb +1 -1
 - metadata +2 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 4469c67b2a39d9ffa23923e36cd894eac415ca004a432e700102a334af11efd8
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 8dee3117fe6511b9c5b6005ae37d991891e0f314508986743b659080c7885855
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: d8c94dc1c576cca0043c7501752bdd6dee0c8bf0523d9c99a0e8ab4d614a0eb4e6f087fa62be97bb5816f9998f2c414758ffcab260e90889afada8379fb03aec
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: c51ece65a713af0b351a183415816302fcdc35ad598d0e5ee9e5b693c1ef66826c5dfc3dab90f04499c90e994e590e6dd7121999b5dfe54ce20997e41df0ac02
         
     | 
    
        data/Cargo.lock
    CHANGED
    
    | 
         @@ -45,7 +45,7 @@ dependencies = [ 
     | 
|
| 
       45 
45 
     | 
    
         
             
             "bitflags",
         
     | 
| 
       46 
46 
     | 
    
         
             
             "cexpr",
         
     | 
| 
       47 
47 
     | 
    
         
             
             "clang-sys",
         
     | 
| 
       48 
     | 
    
         
            -
             "itertools",
         
     | 
| 
      
 48 
     | 
    
         
            +
             "itertools 0.12.1",
         
     | 
| 
       49 
49 
     | 
    
         
             
             "lazy_static",
         
     | 
| 
       50 
50 
     | 
    
         
             
             "lazycell",
         
     | 
| 
       51 
51 
     | 
    
         
             
             "proc-macro2",
         
     | 
| 
         @@ -175,6 +175,15 @@ dependencies = [ 
     | 
|
| 
       175 
175 
     | 
    
         
             
             "either",
         
     | 
| 
       176 
176 
     | 
    
         
             
            ]
         
     | 
| 
       177 
177 
     | 
    
         | 
| 
      
 178 
     | 
    
         
            +
            [[package]]
         
     | 
| 
      
 179 
     | 
    
         
            +
            name = "itertools"
         
     | 
| 
      
 180 
     | 
    
         
            +
            version = "0.14.0"
         
     | 
| 
      
 181 
     | 
    
         
            +
            source = "registry+https://github.com/rust-lang/crates.io-index"
         
     | 
| 
      
 182 
     | 
    
         
            +
            checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
         
     | 
| 
      
 183 
     | 
    
         
            +
            dependencies = [
         
     | 
| 
      
 184 
     | 
    
         
            +
             "either",
         
     | 
| 
      
 185 
     | 
    
         
            +
            ]
         
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
       178 
187 
     | 
    
         
             
            [[package]]
         
     | 
| 
       179 
188 
     | 
    
         
             
            name = "itoa"
         
     | 
| 
       180 
189 
     | 
    
         
             
            version = "1.0.14"
         
     | 
| 
         @@ -347,6 +356,7 @@ dependencies = [ 
     | 
|
| 
       347 
356 
     | 
    
         
             
             "ahash",
         
     | 
| 
       348 
357 
     | 
    
         
             
             "csv",
         
     | 
| 
       349 
358 
     | 
    
         
             
             "flate2",
         
     | 
| 
      
 359 
     | 
    
         
            +
             "itertools 0.14.0",
         
     | 
| 
       350 
360 
     | 
    
         
             
             "jemallocator",
         
     | 
| 
       351 
361 
     | 
    
         
             
             "kanal",
         
     | 
| 
       352 
362 
     | 
    
         
             
             "magnus 0.7.1",
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -121,7 +121,7 @@ Here's some unscientific benchmarks. You can find the code in the [benchmark/com 
     | 
|
| 
       121 
121 
     | 
    
         
             
            ### 1,000,000 records
         
     | 
| 
       122 
122 
     | 
    
         | 
| 
       123 
123 
     | 
    
         
             
            ```
         
     | 
| 
       124 
     | 
    
         
            -
            🏃 
     | 
| 
      
 124 
     | 
    
         
            +
            🏃 Running benchmarks...
         
     | 
| 
       125 
125 
     | 
    
         
             
            Benchmarking with 3000001 lines of data
         
     | 
| 
       126 
126 
     | 
    
         | 
| 
       127 
127 
     | 
    
         
             
            ruby 3.3.6 (2024-11-05 revision 75015d4c1f) +YJIT [arm64-darwin24]
         
     | 
| 
         @@ -142,34 +142,34 @@ OSV - Gzipped Direct     1.000 i/100ms 
     | 
|
| 
       142 
142 
     | 
    
         
             
               FastCSV - Gzipped     1.000 i/100ms
         
     | 
| 
       143 
143 
     | 
    
         
             
                   CSV - Gzipped     1.000 i/100ms
         
     | 
| 
       144 
144 
     | 
    
         
             
            Calculating -------------------------------------
         
     | 
| 
       145 
     | 
    
         
            -
                  CSV - StringIO      0. 
     | 
| 
       146 
     | 
    
         
            -
              FastCSV - StringIO      0. 
     | 
| 
       147 
     | 
    
         
            -
                  OSV - StringIO      0. 
     | 
| 
       148 
     | 
    
         
            -
               CSV - Hash output      0. 
     | 
| 
       149 
     | 
    
         
            -
               OSV - Hash output      0. 
     | 
| 
       150 
     | 
    
         
            -
              CSV - Array output      0.066 (± 0.0%) i/s    (15. 
     | 
| 
       151 
     | 
    
         
            -
              OSV - Array output      0. 
     | 
| 
      
 145 
     | 
    
         
            +
                  CSV - StringIO      0.080 (± 0.0%) i/s    (12.43 s/i) -      3.000 in  37.301114s
         
     | 
| 
      
 146 
     | 
    
         
            +
              FastCSV - StringIO      0.368 (± 0.0%) i/s     (2.72 s/i) -     12.000 in  32.619020s
         
     | 
| 
      
 147 
     | 
    
         
            +
                  OSV - StringIO      0.699 (± 0.0%) i/s     (1.43 s/i) -     21.000 in  30.091225s
         
     | 
| 
      
 148 
     | 
    
         
            +
               CSV - Hash output      0.059 (± 0.0%) i/s    (16.95 s/i) -      2.000 in  33.908533s
         
     | 
| 
      
 149 
     | 
    
         
            +
               OSV - Hash output      0.329 (± 0.0%) i/s     (3.04 s/i) -     10.000 in  30.551275s
         
     | 
| 
      
 150 
     | 
    
         
            +
              CSV - Array output      0.066 (± 0.0%) i/s    (15.18 s/i) -      2.000 in  30.357327s
         
     | 
| 
      
 151 
     | 
    
         
            +
              OSV - Array output      0.632 (± 0.0%) i/s     (1.58 s/i) -     19.000 in  30.150113s
         
     | 
| 
       152 
152 
     | 
    
         
             
            FastCSV - Array output
         
     | 
| 
       153 
     | 
    
         
            -
                                      0. 
     | 
| 
      
 153 
     | 
    
         
            +
                                      0.350 (± 0.0%) i/s     (2.86 s/i) -     11.000 in  31.477268s
         
     | 
| 
       154 
154 
     | 
    
         
             
            OSV - Direct Open Array output
         
     | 
| 
       155 
     | 
    
         
            -
                                      0. 
     | 
| 
       156 
     | 
    
         
            -
                   OSV - Gzipped      0. 
     | 
| 
       157 
     | 
    
         
            -
            OSV - Gzipped Direct      0. 
     | 
| 
       158 
     | 
    
         
            -
               FastCSV - Gzipped      0. 
     | 
| 
       159 
     | 
    
         
            -
                   CSV - Gzipped      0. 
     | 
| 
      
 155 
     | 
    
         
            +
                                      0.641 (± 0.0%) i/s     (1.56 s/i) -     20.000 in  31.275201s
         
     | 
| 
      
 156 
     | 
    
         
            +
                   OSV - Gzipped      0.530 (± 0.0%) i/s     (1.89 s/i) -     16.000 in  30.183753s
         
     | 
| 
      
 157 
     | 
    
         
            +
            OSV - Gzipped Direct      0.727 (± 0.0%) i/s     (1.37 s/i) -     22.000 in  30.283991s
         
     | 
| 
      
 158 
     | 
    
         
            +
               FastCSV - Gzipped      0.323 (± 0.0%) i/s     (3.09 s/i) -     10.000 in  30.949600s
         
     | 
| 
      
 159 
     | 
    
         
            +
                   CSV - Gzipped      0.056 (± 0.0%) i/s    (17.72 s/i) -      2.000 in  35.440473s
         
     | 
| 
       160 
160 
     | 
    
         | 
| 
       161 
161 
     | 
    
         
             
            Comparison:
         
     | 
| 
       162 
     | 
    
         
            -
            OSV - Direct 
     | 
| 
       163 
     | 
    
         
            -
            OSV -  
     | 
| 
       164 
     | 
    
         
            -
             
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
                   OSV - Gzipped:        0.5 i/s - 1. 
     | 
| 
       167 
     | 
    
         
            -
              FastCSV - StringIO:        0.4 i/s - 1. 
     | 
| 
       168 
     | 
    
         
            -
            FastCSV - Array output:        0. 
     | 
| 
       169 
     | 
    
         
            -
                
     | 
| 
       170 
     | 
    
         
            -
                
     | 
| 
       171 
     | 
    
         
            -
                  CSV - StringIO:        0.1 i/s - 9. 
     | 
| 
       172 
     | 
    
         
            -
              CSV - Array output:        0.1 i/s -  
     | 
| 
       173 
     | 
    
         
            -
               CSV - Hash output:        0.1 i/s - 12. 
     | 
| 
       174 
     | 
    
         
            -
                   CSV - Gzipped:        0.1 i/s - 12. 
     | 
| 
      
 162 
     | 
    
         
            +
            OSV - Gzipped Direct:        0.7 i/s
         
     | 
| 
      
 163 
     | 
    
         
            +
                  OSV - StringIO:        0.7 i/s - 1.04x  slower
         
     | 
| 
      
 164 
     | 
    
         
            +
            OSV - Direct Open Array output:        0.6 i/s - 1.14x  slower
         
     | 
| 
      
 165 
     | 
    
         
            +
              OSV - Array output:        0.6 i/s - 1.15x  slower
         
     | 
| 
      
 166 
     | 
    
         
            +
                   OSV - Gzipped:        0.5 i/s - 1.37x  slower
         
     | 
| 
      
 167 
     | 
    
         
            +
              FastCSV - StringIO:        0.4 i/s - 1.98x  slower
         
     | 
| 
      
 168 
     | 
    
         
            +
            FastCSV - Array output:        0.3 i/s - 2.08x  slower
         
     | 
| 
      
 169 
     | 
    
         
            +
               OSV - Hash output:        0.3 i/s - 2.21x  slower
         
     | 
| 
      
 170 
     | 
    
         
            +
               FastCSV - Gzipped:        0.3 i/s - 2.25x  slower
         
     | 
| 
      
 171 
     | 
    
         
            +
                  CSV - StringIO:        0.1 i/s - 9.04x  slower
         
     | 
| 
      
 172 
     | 
    
         
            +
              CSV - Array output:        0.1 i/s - 11.04x  slower
         
     | 
| 
      
 173 
     | 
    
         
            +
               CSV - Hash output:        0.1 i/s - 12.33x  slower
         
     | 
| 
      
 174 
     | 
    
         
            +
                   CSV - Gzipped:        0.1 i/s - 12.89x  slower
         
     | 
| 
       175 
175 
     | 
    
         
             
            ```
         
     | 
    
        data/ext/osv/Cargo.toml
    CHANGED
    
    | 
         @@ -16,6 +16,7 @@ rb-sys = "^0.9" 
     | 
|
| 
       16 
16 
     | 
    
         
             
            serde = { version = "1.0", features = ["derive"] }
         
     | 
| 
       17 
17 
     | 
    
         
             
            serde_magnus = "0.8.1"
         
     | 
| 
       18 
18 
     | 
    
         
             
            thiserror = "2.0"
         
     | 
| 
      
 19 
     | 
    
         
            +
            itertools = "^0.14"
         
     | 
| 
       19 
20 
     | 
    
         | 
| 
       20 
21 
     | 
    
         
             
            [target.'cfg(target_os = "linux")'.dependencies]
         
     | 
| 
       21 
22 
     | 
    
         
             
            jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
         
     | 
    
        data/ext/osv/src/csv/builder.rs
    CHANGED
    
    | 
         @@ -6,8 +6,10 @@ use super::{ 
     | 
|
| 
       6 
6 
     | 
    
         
             
                ForgottenFileHandle,
         
     | 
| 
       7 
7 
     | 
    
         
             
            };
         
     | 
| 
       8 
8 
     | 
    
         
             
            use flate2::read::GzDecoder;
         
     | 
| 
       9 
     | 
    
         
            -
            use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, Ruby, Value};
         
     | 
| 
      
 9 
     | 
    
         
            +
            use magnus::{rb_sys::AsRawValue, value::ReprValue, Error as MagnusError, RString, Ruby, Value};
         
     | 
| 
       10 
10 
     | 
    
         
             
            use std::{
         
     | 
| 
      
 11 
     | 
    
         
            +
                borrow::Cow,
         
     | 
| 
      
 12 
     | 
    
         
            +
                fmt::Debug,
         
     | 
| 
       11 
13 
     | 
    
         
             
                fs::File,
         
     | 
| 
       12 
14 
     | 
    
         
             
                io::{self, BufReader, Read},
         
     | 
| 
       13 
15 
     | 
    
         
             
                marker::PhantomData,
         
     | 
| 
         @@ -17,18 +19,21 @@ use std::{ 
     | 
|
| 
       17 
19 
     | 
    
         | 
| 
       18 
20 
     | 
    
         
             
            use thiserror::Error;
         
     | 
| 
       19 
21 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
      
 22 
     | 
    
         
            +
            /// Errors that can occur when building a RecordReader
         
     | 
| 
       22 
23 
     | 
    
         
             
            #[derive(Error, Debug)]
         
     | 
| 
       23 
24 
     | 
    
         
             
            pub enum ReaderError {
         
     | 
| 
       24 
25 
     | 
    
         
             
                #[error("Failed to get file descriptor: {0}")]
         
     | 
| 
       25 
26 
     | 
    
         
             
                FileDescriptor(String),
         
     | 
| 
       26 
     | 
    
         
            -
                #[error("Invalid file descriptor")]
         
     | 
| 
       27 
     | 
    
         
            -
                InvalidFileDescriptor,
         
     | 
| 
      
 27 
     | 
    
         
            +
                #[error("Invalid file descriptor: {0}")]
         
     | 
| 
      
 28 
     | 
    
         
            +
                InvalidFileDescriptor(i32),
         
     | 
| 
       28 
29 
     | 
    
         
             
                #[error("Failed to open file: {0}")]
         
     | 
| 
       29 
30 
     | 
    
         
             
                FileOpen(#[from] io::Error),
         
     | 
| 
       30 
31 
     | 
    
         
             
                #[error("Failed to intern headers: {0}")]
         
     | 
| 
       31 
32 
     | 
    
         
             
                HeaderIntern(#[from] CacheError),
         
     | 
| 
      
 33 
     | 
    
         
            +
                #[error("Invalid flexible default value: {0}")]
         
     | 
| 
      
 34 
     | 
    
         
            +
                InvalidFlexibleDefault(String),
         
     | 
| 
      
 35 
     | 
    
         
            +
                #[error("Invalid null string value: {0}")]
         
     | 
| 
      
 36 
     | 
    
         
            +
                InvalidNullString(String),
         
     | 
| 
       32 
37 
     | 
    
         
             
                #[error("Ruby error: {0}")]
         
     | 
| 
       33 
38 
     | 
    
         
             
                Ruby(String),
         
     | 
| 
       34 
39 
     | 
    
         
             
            }
         
     | 
| 
         @@ -48,63 +53,27 @@ impl From<ReaderError> for MagnusError { 
     | 
|
| 
       48 
53 
     | 
    
         
             
                }
         
     | 
| 
       49 
54 
     | 
    
         
             
            }
         
     | 
| 
       50 
55 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
      
 56 
     | 
    
         
            +
            /// Builder for configuring and creating a RecordReader instance.
         
     | 
| 
      
 57 
     | 
    
         
            +
            ///
         
     | 
| 
      
 58 
     | 
    
         
            +
            /// This struct provides a fluent interface for setting up CSV parsing options
         
     | 
| 
      
 59 
     | 
    
         
            +
            /// and creating a RecordReader with the specified configuration.
         
     | 
| 
      
 60 
     | 
    
         
            +
            pub struct RecordReaderBuilder<'a, T: RecordParser<'a>> {
         
     | 
| 
      
 61 
     | 
    
         
            +
                ruby: Ruby,
         
     | 
| 
       53 
62 
     | 
    
         
             
                to_read: Value,
         
     | 
| 
       54 
63 
     | 
    
         
             
                has_headers: bool,
         
     | 
| 
       55 
64 
     | 
    
         
             
                delimiter: u8,
         
     | 
| 
       56 
65 
     | 
    
         
             
                quote_char: u8,
         
     | 
| 
       57 
66 
     | 
    
         
             
                null_string: Option<String>,
         
     | 
| 
       58 
     | 
    
         
            -
                buffer: usize,
         
     | 
| 
       59 
67 
     | 
    
         
             
                flexible: bool,
         
     | 
| 
       60 
     | 
    
         
            -
                flexible_default: Option 
     | 
| 
      
 68 
     | 
    
         
            +
                flexible_default: Option<String>,
         
     | 
| 
       61 
69 
     | 
    
         
             
                trim: csv::Trim,
         
     | 
| 
       62 
70 
     | 
    
         
             
                _phantom: PhantomData<T>,
         
     | 
| 
      
 71 
     | 
    
         
            +
                _phantom_a: PhantomData<&'a ()>,
         
     | 
| 
       63 
72 
     | 
    
         
             
            }
         
     | 
| 
       64 
73 
     | 
    
         | 
| 
       65 
     | 
    
         
            -
            impl<T: RecordParser<' 
     | 
| 
       66 
     | 
    
         
            -
                 
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
                    readable: Box<dyn Read + Send + 'static>,
         
     | 
| 
       69 
     | 
    
         
            -
                ) -> Result<RecordReader<'static, T>, ReaderError> {
         
     | 
| 
       70 
     | 
    
         
            -
                    let flexible = self.flexible || self.flexible_default.is_some();
         
     | 
| 
       71 
     | 
    
         
            -
                    let mut reader = csv::ReaderBuilder::new()
         
     | 
| 
       72 
     | 
    
         
            -
                        .has_headers(self.has_headers)
         
     | 
| 
       73 
     | 
    
         
            -
                        .delimiter(self.delimiter)
         
     | 
| 
       74 
     | 
    
         
            -
                        .quote(self.quote_char)
         
     | 
| 
       75 
     | 
    
         
            -
                        .flexible(flexible)
         
     | 
| 
       76 
     | 
    
         
            -
                        .trim(self.trim)
         
     | 
| 
       77 
     | 
    
         
            -
                        .from_reader(readable);
         
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
                    let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
         
     | 
| 
       80 
     | 
    
         
            -
                    let static_headers = StringCache::intern_many(&headers)?;
         
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                    Ok(RecordReader::new_multi_threaded(
         
     | 
| 
       83 
     | 
    
         
            -
                        reader,
         
     | 
| 
       84 
     | 
    
         
            -
                        static_headers,
         
     | 
| 
       85 
     | 
    
         
            -
                        self.buffer,
         
     | 
| 
       86 
     | 
    
         
            -
                        self.null_string,
         
     | 
| 
       87 
     | 
    
         
            -
                        self.flexible_default,
         
     | 
| 
       88 
     | 
    
         
            -
                    ))
         
     | 
| 
       89 
     | 
    
         
            -
                }
         
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
                pub fn build_threaded(self) -> Result<RecordReader<'static, T>, ReaderError> {
         
     | 
| 
       92 
     | 
    
         
            -
                    if self.to_read.is_kind_of(self.ruby.class_io()) {
         
     | 
| 
       93 
     | 
    
         
            -
                        let readable = self.handle_file_descriptor()?;
         
     | 
| 
       94 
     | 
    
         
            -
                        self.build_multi_threaded(readable)
         
     | 
| 
       95 
     | 
    
         
            -
                    } else if self.to_read.is_kind_of(self.ruby.class_string()) {
         
     | 
| 
       96 
     | 
    
         
            -
                        let readable = self.handle_file_path()?;
         
     | 
| 
       97 
     | 
    
         
            -
                        self.build_multi_threaded(readable)
         
     | 
| 
       98 
     | 
    
         
            -
                    } else {
         
     | 
| 
       99 
     | 
    
         
            -
                        let readable = build_ruby_reader(self.ruby, self.to_read)?;
         
     | 
| 
       100 
     | 
    
         
            -
                        let buffered_reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
         
     | 
| 
       101 
     | 
    
         
            -
                        self.build_single_threaded(buffered_reader)
         
     | 
| 
       102 
     | 
    
         
            -
                    }
         
     | 
| 
       103 
     | 
    
         
            -
                }
         
     | 
| 
       104 
     | 
    
         
            -
            }
         
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
            impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> {
         
     | 
| 
       107 
     | 
    
         
            -
                pub fn new(ruby: &'a Ruby, to_read: Value) -> Self {
         
     | 
| 
      
 74 
     | 
    
         
            +
            impl<'a, T: RecordParser<'a>> RecordReaderBuilder<'a, T> {
         
     | 
| 
      
 75 
     | 
    
         
            +
                /// Creates a new builder instance with default settings.
         
     | 
| 
      
 76 
     | 
    
         
            +
                pub fn new(ruby: Ruby, to_read: Value) -> Self {
         
     | 
| 
       108 
77 
     | 
    
         
             
                    Self {
         
     | 
| 
       109 
78 
     | 
    
         
             
                        ruby,
         
     | 
| 
       110 
79 
     | 
    
         
             
                        to_read,
         
     | 
| 
         @@ -112,92 +81,107 @@ impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> { 
     | 
|
| 
       112 
81 
     | 
    
         
             
                        delimiter: b',',
         
     | 
| 
       113 
82 
     | 
    
         
             
                        quote_char: b'"',
         
     | 
| 
       114 
83 
     | 
    
         
             
                        null_string: None,
         
     | 
| 
       115 
     | 
    
         
            -
                        buffer: BUFFER_CHANNEL_SIZE,
         
     | 
| 
       116 
84 
     | 
    
         
             
                        flexible: false,
         
     | 
| 
       117 
85 
     | 
    
         
             
                        flexible_default: None,
         
     | 
| 
       118 
86 
     | 
    
         
             
                        trim: csv::Trim::None,
         
     | 
| 
       119 
87 
     | 
    
         
             
                        _phantom: PhantomData,
         
     | 
| 
      
 88 
     | 
    
         
            +
                        _phantom_a: PhantomData,
         
     | 
| 
       120 
89 
     | 
    
         
             
                    }
         
     | 
| 
       121 
90 
     | 
    
         
             
                }
         
     | 
| 
       122 
91 
     | 
    
         | 
| 
      
 92 
     | 
    
         
            +
                /// Sets whether the CSV file has headers.
         
     | 
| 
      
 93 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
       123 
94 
     | 
    
         
             
                pub fn has_headers(mut self, has_headers: bool) -> Self {
         
     | 
| 
       124 
95 
     | 
    
         
             
                    self.has_headers = has_headers;
         
     | 
| 
       125 
96 
     | 
    
         
             
                    self
         
     | 
| 
       126 
97 
     | 
    
         
             
                }
         
     | 
| 
       127 
98 
     | 
    
         | 
| 
      
 99 
     | 
    
         
            +
                /// Sets the delimiter character for the CSV.
         
     | 
| 
      
 100 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
       128 
101 
     | 
    
         
             
                pub fn delimiter(mut self, delimiter: u8) -> Self {
         
     | 
| 
       129 
102 
     | 
    
         
             
                    self.delimiter = delimiter;
         
     | 
| 
       130 
103 
     | 
    
         
             
                    self
         
     | 
| 
       131 
104 
     | 
    
         
             
                }
         
     | 
| 
       132 
105 
     | 
    
         | 
| 
      
 106 
     | 
    
         
            +
                /// Sets the quote character for the CSV.
         
     | 
| 
      
 107 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
       133 
108 
     | 
    
         
             
                pub fn quote_char(mut self, quote_char: u8) -> Self {
         
     | 
| 
       134 
109 
     | 
    
         
             
                    self.quote_char = quote_char;
         
     | 
| 
       135 
110 
     | 
    
         
             
                    self
         
     | 
| 
       136 
111 
     | 
    
         
             
                }
         
     | 
| 
       137 
112 
     | 
    
         | 
| 
      
 113 
     | 
    
         
            +
                /// Sets the string that should be interpreted as null.
         
     | 
| 
      
 114 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
       138 
115 
     | 
    
         
             
                pub fn null_string(mut self, null_string: Option<String>) -> Self {
         
     | 
| 
       139 
116 
     | 
    
         
             
                    self.null_string = null_string;
         
     | 
| 
       140 
117 
     | 
    
         
             
                    self
         
     | 
| 
       141 
118 
     | 
    
         
             
                }
         
     | 
| 
       142 
119 
     | 
    
         | 
| 
       143 
     | 
    
         
            -
                 
     | 
| 
       144 
     | 
    
         
            -
             
     | 
| 
       145 
     | 
    
         
            -
                    self
         
     | 
| 
       146 
     | 
    
         
            -
                }
         
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
      
 120 
     | 
    
         
            +
                /// Sets whether the reader should be flexible with field counts.
         
     | 
| 
      
 121 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
       148 
122 
     | 
    
         
             
                pub fn flexible(mut self, flexible: bool) -> Self {
         
     | 
| 
       149 
123 
     | 
    
         
             
                    self.flexible = flexible;
         
     | 
| 
       150 
124 
     | 
    
         
             
                    self
         
     | 
| 
       151 
125 
     | 
    
         
             
                }
         
     | 
| 
       152 
126 
     | 
    
         | 
| 
       153 
     | 
    
         
            -
                 
     | 
| 
      
 127 
     | 
    
         
            +
                /// Sets the default value for missing fields when in flexible mode.
         
     | 
| 
      
 128 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
      
 129 
     | 
    
         
            +
                pub fn flexible_default(mut self, flexible_default: Option<String>) -> Self {
         
     | 
| 
       154 
130 
     | 
    
         
             
                    self.flexible_default = flexible_default;
         
     | 
| 
       155 
131 
     | 
    
         
             
                    self
         
     | 
| 
       156 
132 
     | 
    
         
             
                }
         
     | 
| 
       157 
133 
     | 
    
         | 
| 
      
 134 
     | 
    
         
            +
                /// Sets the trimming mode for fields.
         
     | 
| 
      
 135 
     | 
    
         
            +
                #[must_use]
         
     | 
| 
       158 
136 
     | 
    
         
             
                pub fn trim(mut self, trim: csv::Trim) -> Self {
         
     | 
| 
       159 
137 
     | 
    
         
             
                    self.trim = trim;
         
     | 
| 
       160 
138 
     | 
    
         
             
                    self
         
     | 
| 
       161 
139 
     | 
    
         
             
                }
         
     | 
| 
       162 
140 
     | 
    
         | 
| 
       163 
     | 
    
         
            -
                 
     | 
| 
      
 141 
     | 
    
         
            +
                /// Handles reading from a file descriptor.
         
     | 
| 
      
 142 
     | 
    
         
            +
                fn handle_file_descriptor(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
         
     | 
| 
       164 
143 
     | 
    
         
             
                    let raw_value = self.to_read.as_raw();
         
     | 
| 
       165 
144 
     | 
    
         
             
                    let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
         
     | 
| 
       166 
     | 
    
         
            -
                        .map_err(| 
     | 
| 
       167 
     | 
    
         
            -
                            ReaderError::FileDescriptor("Failed to get file descriptor".to_string())
         
     | 
| 
       168 
     | 
    
         
            -
                        })?;
         
     | 
| 
      
 145 
     | 
    
         
            +
                        .map_err(|e| ReaderError::FileDescriptor(format!("{:?}", e)))?;
         
     | 
| 
       169 
146 
     | 
    
         | 
| 
       170 
147 
     | 
    
         
             
                    if fd < 0 {
         
     | 
| 
       171 
     | 
    
         
            -
                        return Err(ReaderError::InvalidFileDescriptor);
         
     | 
| 
      
 148 
     | 
    
         
            +
                        return Err(ReaderError::InvalidFileDescriptor(fd));
         
     | 
| 
       172 
149 
     | 
    
         
             
                    }
         
     | 
| 
       173 
150 
     | 
    
         | 
| 
       174 
151 
     | 
    
         
             
                    let file = unsafe { File::from_raw_fd(fd) };
         
     | 
| 
       175 
152 
     | 
    
         
             
                    let forgotten = ForgottenFileHandle(ManuallyDrop::new(file));
         
     | 
| 
       176 
     | 
    
         
            -
                    Ok(Box::new( 
     | 
| 
       177 
     | 
    
         
            -
                        READ_BUFFER_SIZE,
         
     | 
| 
       178 
     | 
    
         
            -
                        forgotten,
         
     | 
| 
       179 
     | 
    
         
            -
                    )))
         
     | 
| 
      
 153 
     | 
    
         
            +
                    Ok(Box::new(forgotten))
         
     | 
| 
       180 
154 
     | 
    
         
             
                }
         
     | 
| 
       181 
155 
     | 
    
         | 
| 
       182 
     | 
    
         
            -
                 
     | 
| 
      
 156 
     | 
    
         
            +
                /// Handles reading from a file path.
         
     | 
| 
      
 157 
     | 
    
         
            +
                fn handle_file_path(&self) -> Result<Box<dyn SeekableRead>, ReaderError> {
         
     | 
| 
       183 
158 
     | 
    
         
             
                    let path = self.to_read.to_r_string()?.to_string()?;
         
     | 
| 
       184 
159 
     | 
    
         
             
                    let file = File::open(&path)?;
         
     | 
| 
       185 
160 
     | 
    
         | 
| 
       186 
     | 
    
         
            -
                     
     | 
| 
       187 
     | 
    
         
            -
                         
     | 
| 
       188 
     | 
    
         
            -
             
     | 
| 
       189 
     | 
    
         
            -
             
     | 
| 
       190 
     | 
    
         
            -
                        ) 
     | 
| 
      
 161 
     | 
    
         
            +
                    if path.ends_with(".gz") {
         
     | 
| 
      
 162 
     | 
    
         
            +
                        // For gzipped files, we need to decompress them into memory first
         
     | 
| 
      
 163 
     | 
    
         
            +
                        // since GzDecoder doesn't support seeking
         
     | 
| 
      
 164 
     | 
    
         
            +
                        let mut decoder = GzDecoder::new(BufReader::with_capacity(READ_BUFFER_SIZE, file));
         
     | 
| 
      
 165 
     | 
    
         
            +
                        let mut contents = Vec::new();
         
     | 
| 
      
 166 
     | 
    
         
            +
                        decoder.read_to_end(&mut contents)?;
         
     | 
| 
      
 167 
     | 
    
         
            +
                        Ok(Box::new(std::io::Cursor::new(contents)))
         
     | 
| 
       191 
168 
     | 
    
         
             
                    } else {
         
     | 
| 
       192 
     | 
    
         
            -
                        Box::new( 
     | 
| 
       193 
     | 
    
         
            -
                    } 
     | 
| 
      
 169 
     | 
    
         
            +
                        Ok(Box::new(file))
         
     | 
| 
      
 170 
     | 
    
         
            +
                    }
         
     | 
| 
       194 
171 
     | 
    
         
             
                }
         
     | 
| 
       195 
172 
     | 
    
         | 
| 
       196 
     | 
    
         
            -
                 
     | 
| 
       197 
     | 
    
         
            -
             
     | 
| 
       198 
     | 
    
         
            -
                    readable 
     | 
| 
       199 
     | 
    
         
            -
             
     | 
| 
      
 173 
     | 
    
         
            +
                /// Builds the RecordReader with the configured options.
         
     | 
| 
      
 174 
     | 
    
         
            +
                pub fn build(self) -> Result<RecordReader<'a, T>, ReaderError> {
         
     | 
| 
      
 175 
     | 
    
         
            +
                    let readable = if self.to_read.is_kind_of(self.ruby.class_io()) {
         
     | 
| 
      
 176 
     | 
    
         
            +
                        self.handle_file_descriptor()?
         
     | 
| 
      
 177 
     | 
    
         
            +
                    } else if self.to_read.is_kind_of(self.ruby.class_string()) {
         
     | 
| 
      
 178 
     | 
    
         
            +
                        self.handle_file_path()?
         
     | 
| 
      
 179 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 180 
     | 
    
         
            +
                        build_ruby_reader(&self.ruby, self.to_read)?
         
     | 
| 
      
 181 
     | 
    
         
            +
                    };
         
     | 
| 
      
 182 
     | 
    
         
            +
             
     | 
| 
       200 
183 
     | 
    
         
             
                    let flexible = self.flexible || self.flexible_default.is_some();
         
     | 
| 
      
 184 
     | 
    
         
            +
                    let reader = BufReader::with_capacity(READ_BUFFER_SIZE, readable);
         
     | 
| 
       201 
185 
     | 
    
         | 
| 
       202 
186 
     | 
    
         
             
                    let mut reader = csv::ReaderBuilder::new()
         
     | 
| 
       203 
187 
     | 
    
         
             
                        .has_headers(self.has_headers)
         
     | 
| 
         @@ -205,16 +189,39 @@ impl<'a, T: RecordParser<'a> + Send> RecordReaderBuilder<'a, T> { 
     | 
|
| 
       205 
189 
     | 
    
         
             
                        .quote(self.quote_char)
         
     | 
| 
       206 
190 
     | 
    
         
             
                        .flexible(flexible)
         
     | 
| 
       207 
191 
     | 
    
         
             
                        .trim(self.trim)
         
     | 
| 
       208 
     | 
    
         
            -
                        .from_reader( 
     | 
| 
      
 192 
     | 
    
         
            +
                        .from_reader(reader);
         
     | 
| 
       209 
193 
     | 
    
         | 
| 
       210 
     | 
    
         
            -
                    let headers = RecordReader::<T>::get_headers(self.ruby, &mut reader, self.has_headers)?;
         
     | 
| 
      
 194 
     | 
    
         
            +
                    let headers = RecordReader::<T>::get_headers(&self.ruby, &mut reader, self.has_headers)?;
         
     | 
| 
       211 
195 
     | 
    
         
             
                    let static_headers = StringCache::intern_many(&headers)?;
         
     | 
| 
       212 
196 
     | 
    
         | 
| 
       213 
     | 
    
         
            -
                     
     | 
| 
      
 197 
     | 
    
         
            +
                    // We intern both of these to get static string references we can reuse throughout the parser.
         
     | 
| 
      
 198 
     | 
    
         
            +
                    let flexible_default = self
         
     | 
| 
      
 199 
     | 
    
         
            +
                        .flexible_default
         
     | 
| 
      
 200 
     | 
    
         
            +
                        .map(|s| {
         
     | 
| 
      
 201 
     | 
    
         
            +
                            RString::new(&s)
         
     | 
| 
      
 202 
     | 
    
         
            +
                                .to_interned_str()
         
     | 
| 
      
 203 
     | 
    
         
            +
                                .as_str()
         
     | 
| 
      
 204 
     | 
    
         
            +
                                .map_err(|e| ReaderError::InvalidFlexibleDefault(format!("{:?}", e)))
         
     | 
| 
      
 205 
     | 
    
         
            +
                        })
         
     | 
| 
      
 206 
     | 
    
         
            +
                        .transpose()?
         
     | 
| 
      
 207 
     | 
    
         
            +
                        .map(|s| Cow::Borrowed(s));
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                    let null_string = self
         
     | 
| 
      
 210 
     | 
    
         
            +
                        .null_string
         
     | 
| 
      
 211 
     | 
    
         
            +
                        .map(|s| {
         
     | 
| 
      
 212 
     | 
    
         
            +
                            RString::new(&s)
         
     | 
| 
      
 213 
     | 
    
         
            +
                                .to_interned_str()
         
     | 
| 
      
 214 
     | 
    
         
            +
                                .as_str()
         
     | 
| 
      
 215 
     | 
    
         
            +
                                .map_err(|e| ReaderError::InvalidNullString(format!("{:?}", e)))
         
     | 
| 
      
 216 
     | 
    
         
            +
                        })
         
     | 
| 
      
 217 
     | 
    
         
            +
                        .transpose()?
         
     | 
| 
      
 218 
     | 
    
         
            +
                        .map(|s| Cow::Borrowed(s));
         
     | 
| 
      
 219 
     | 
    
         
            +
             
     | 
| 
      
 220 
     | 
    
         
            +
                    Ok(RecordReader::new(
         
     | 
| 
       214 
221 
     | 
    
         
             
                        reader,
         
     | 
| 
       215 
222 
     | 
    
         
             
                        static_headers,
         
     | 
| 
       216 
     | 
    
         
            -
                         
     | 
| 
       217 
     | 
    
         
            -
                         
     | 
| 
      
 223 
     | 
    
         
            +
                        null_string,
         
     | 
| 
      
 224 
     | 
    
         
            +
                        flexible_default,
         
     | 
| 
       218 
225 
     | 
    
         
             
                    ))
         
     | 
| 
       219 
226 
     | 
    
         
             
                }
         
     | 
| 
       220 
227 
     | 
    
         
             
            }
         
     | 
| 
         @@ -6,8 +6,14 @@ 
     | 
|
| 
       6 
6 
     | 
    
         
             
            /// so this optimization could be removed if any issues arise.
         
     | 
| 
       7 
7 
     | 
    
         
             
            use std::{
         
     | 
| 
       8 
8 
     | 
    
         
             
                collections::HashMap,
         
     | 
| 
       9 
     | 
    
         
            -
                sync::{ 
     | 
| 
      
 9 
     | 
    
         
            +
                sync::{
         
     | 
| 
      
 10 
     | 
    
         
            +
                    atomic::{AtomicU32, Ordering},
         
     | 
| 
      
 11 
     | 
    
         
            +
                    LazyLock, Mutex, OnceLock,
         
     | 
| 
      
 12 
     | 
    
         
            +
                },
         
     | 
| 
       10 
13 
     | 
    
         
             
            };
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            use magnus::{r_string::FString, value::Opaque, IntoValue, RString, Ruby, Value};
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
       11 
17 
     | 
    
         
             
            use thiserror::Error;
         
     | 
| 
       12 
18 
     | 
    
         | 
| 
       13 
19 
     | 
    
         
             
            #[derive(Debug, Error)]
         
     | 
| 
         @@ -16,66 +22,139 @@ pub enum CacheError { 
     | 
|
| 
       16 
22 
     | 
    
         
             
                LockError(String),
         
     | 
| 
       17 
23 
     | 
    
         
             
            }
         
     | 
| 
       18 
24 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
            static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, AtomicU32>>> =
         
     | 
| 
      
 25 
     | 
    
         
            +
            static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (StringCacheKey, AtomicU32)>>> =
         
     | 
| 
       20 
26 
     | 
    
         
             
                LazyLock::new(|| Mutex::new(HashMap::with_capacity(100)));
         
     | 
| 
       21 
27 
     | 
    
         | 
| 
       22 
28 
     | 
    
         
             
            pub struct StringCache;
         
     | 
| 
       23 
29 
     | 
    
         | 
| 
      
 30 
     | 
    
         
            +
            #[derive(Copy, Clone)]
         
     | 
| 
      
 31 
     | 
    
         
            +
            pub struct StringCacheKey(Opaque<FString>, &'static str);
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            impl StringCacheKey {
         
     | 
| 
      
 34 
     | 
    
         
            +
                pub fn new(string: &str) -> Self {
         
     | 
| 
      
 35 
     | 
    
         
            +
                    let rstr = RString::new(string);
         
     | 
| 
      
 36 
     | 
    
         
            +
                    let fstr = rstr.to_interned_str();
         
     | 
| 
      
 37 
     | 
    
         
            +
                    Self(Opaque::from(fstr), fstr.as_str().unwrap())
         
     | 
| 
      
 38 
     | 
    
         
            +
                }
         
     | 
| 
      
 39 
     | 
    
         
            +
            }
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            impl AsRef<str> for StringCacheKey {
         
     | 
| 
      
 42 
     | 
    
         
            +
                fn as_ref(&self) -> &'static str {
         
     | 
| 
      
 43 
     | 
    
         
            +
                    self.1
         
     | 
| 
      
 44 
     | 
    
         
            +
                }
         
     | 
| 
      
 45 
     | 
    
         
            +
            }
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            impl IntoValue for StringCacheKey {
         
     | 
| 
      
 48 
     | 
    
         
            +
                fn into_value_with(self, handle: &Ruby) -> Value {
         
     | 
| 
      
 49 
     | 
    
         
            +
                    handle.into_value(self.0)
         
     | 
| 
      
 50 
     | 
    
         
            +
                }
         
     | 
| 
      
 51 
     | 
    
         
            +
            }
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            impl std::fmt::Debug for StringCacheKey {
         
     | 
| 
      
 54 
     | 
    
         
            +
                fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         
     | 
| 
      
 55 
     | 
    
         
            +
                    self.1.fmt(f)
         
     | 
| 
      
 56 
     | 
    
         
            +
                }
         
     | 
| 
      
 57 
     | 
    
         
            +
            }
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            impl PartialEq for StringCacheKey {
         
     | 
| 
      
 60 
     | 
    
         
            +
                fn eq(&self, other: &Self) -> bool {
         
     | 
| 
      
 61 
     | 
    
         
            +
                    self.1 == other.1
         
     | 
| 
      
 62 
     | 
    
         
            +
                }
         
     | 
| 
      
 63 
     | 
    
         
            +
            }
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            impl std::cmp::Eq for StringCacheKey {}
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            impl std::hash::Hash for StringCacheKey {
         
     | 
| 
      
 68 
     | 
    
         
            +
                fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
         
     | 
| 
      
 69 
     | 
    
         
            +
                    self.1.hash(state);
         
     | 
| 
      
 70 
     | 
    
         
            +
                }
         
     | 
| 
      
 71 
     | 
    
         
            +
            }
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
       24 
73 
     | 
    
         
             
            impl StringCache {
         
     | 
| 
       25 
74 
     | 
    
         
             
                #[allow(dead_code)]
         
     | 
| 
       26 
     | 
    
         
            -
                pub fn intern(string: String) -> Result 
     | 
| 
      
 75 
     | 
    
         
            +
                pub fn intern(string: String) -> Result<StringCacheKey, CacheError> {
         
     | 
| 
       27 
76 
     | 
    
         
             
                    let mut cache = STRING_CACHE
         
     | 
| 
       28 
77 
     | 
    
         
             
                        .lock()
         
     | 
| 
       29 
78 
     | 
    
         
             
                        .map_err(|e| CacheError::LockError(e.to_string()))?;
         
     | 
| 
       30 
79 
     | 
    
         | 
| 
       31 
     | 
    
         
            -
                    if let Some(( 
     | 
| 
       32 
     | 
    
         
            -
                         
     | 
| 
       33 
     | 
    
         
            -
                        Ok( 
     | 
| 
      
 80 
     | 
    
         
            +
                    if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_str()) {
         
     | 
| 
      
 81 
     | 
    
         
            +
                        counter.fetch_add(1, Ordering::Relaxed);
         
     | 
| 
      
 82 
     | 
    
         
            +
                        Ok(*interned_string)
         
     | 
| 
       34 
83 
     | 
    
         
             
                    } else {
         
     | 
| 
      
 84 
     | 
    
         
            +
                        let interned = StringCacheKey::new(string.as_str());
         
     | 
| 
       35 
85 
     | 
    
         
             
                        let leaked = Box::leak(string.into_boxed_str());
         
     | 
| 
       36 
     | 
    
         
            -
                        cache.insert(leaked, AtomicU32::new(1));
         
     | 
| 
       37 
     | 
    
         
            -
                        Ok( 
     | 
| 
      
 86 
     | 
    
         
            +
                        cache.insert(leaked, (interned, AtomicU32::new(1)));
         
     | 
| 
      
 87 
     | 
    
         
            +
                        Ok(interned)
         
     | 
| 
       38 
88 
     | 
    
         
             
                    }
         
     | 
| 
       39 
89 
     | 
    
         
             
                }
         
     | 
| 
       40 
90 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
                pub fn intern_many(strings: &[String]) -> Result<Vec 
     | 
| 
      
 91 
     | 
    
         
            +
                pub fn intern_many(strings: &[String]) -> Result<Vec<StringCacheKey>, CacheError> {
         
     | 
| 
       42 
92 
     | 
    
         
             
                    let mut cache = STRING_CACHE
         
     | 
| 
       43 
93 
     | 
    
         
             
                        .lock()
         
     | 
| 
       44 
94 
     | 
    
         
             
                        .map_err(|e| CacheError::LockError(e.to_string()))?;
         
     | 
| 
       45 
95 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
                    let mut result = Vec::with_capacity(strings.len());
         
     | 
| 
      
 96 
     | 
    
         
            +
                    let mut result: Vec<StringCacheKey> = Vec::with_capacity(strings.len());
         
     | 
| 
       47 
97 
     | 
    
         
             
                    for string in strings {
         
     | 
| 
       48 
     | 
    
         
            -
                        if let Some(( 
     | 
| 
       49 
     | 
    
         
            -
                             
     | 
| 
       50 
     | 
    
         
            -
                            result.push( 
     | 
| 
      
 98 
     | 
    
         
            +
                        if let Some((_, (interned_string, counter))) = cache.get_key_value(string.as_str()) {
         
     | 
| 
      
 99 
     | 
    
         
            +
                            counter.fetch_add(1, Ordering::Relaxed);
         
     | 
| 
      
 100 
     | 
    
         
            +
                            result.push(*interned_string);
         
     | 
| 
       51 
101 
     | 
    
         
             
                        } else {
         
     | 
| 
      
 102 
     | 
    
         
            +
                            let interned = StringCacheKey::new(&string);
         
     | 
| 
       52 
103 
     | 
    
         
             
                            let leaked = Box::leak(string.clone().into_boxed_str());
         
     | 
| 
       53 
     | 
    
         
            -
                            cache.insert(leaked, AtomicU32::new(1));
         
     | 
| 
       54 
     | 
    
         
            -
                            result.push( 
     | 
| 
      
 104 
     | 
    
         
            +
                            cache.insert(leaked, (interned, AtomicU32::new(1)));
         
     | 
| 
      
 105 
     | 
    
         
            +
                            result.push(interned);
         
     | 
| 
       55 
106 
     | 
    
         
             
                        }
         
     | 
| 
       56 
107 
     | 
    
         
             
                    }
         
     | 
| 
       57 
108 
     | 
    
         
             
                    Ok(result)
         
     | 
| 
       58 
109 
     | 
    
         
             
                }
         
     | 
| 
       59 
110 
     | 
    
         | 
| 
       60 
     | 
    
         
            -
                pub fn clear(headers: &[ 
     | 
| 
      
 111 
     | 
    
         
            +
                pub fn clear(headers: &[StringCacheKey]) -> Result<(), CacheError> {
         
     | 
| 
       61 
112 
     | 
    
         
             
                    let mut cache = STRING_CACHE
         
     | 
| 
       62 
113 
     | 
    
         
             
                        .lock()
         
     | 
| 
       63 
114 
     | 
    
         
             
                        .map_err(|e| CacheError::LockError(e.to_string()))?;
         
     | 
| 
       64 
115 
     | 
    
         | 
| 
       65 
     | 
    
         
            -
                     
     | 
| 
       66 
     | 
    
         
            -
                         
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
                            let  
     | 
| 
       69 
     | 
    
         
            -
                            if  
     | 
| 
       70 
     | 
    
         
            -
                                 
     | 
| 
       71 
     | 
    
         
            -
                                 
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
      
 116 
     | 
    
         
            +
                    let to_remove: Vec<_> = headers
         
     | 
| 
      
 117 
     | 
    
         
            +
                        .iter()
         
     | 
| 
      
 118 
     | 
    
         
            +
                        .filter_map(|header| {
         
     | 
| 
      
 119 
     | 
    
         
            +
                            let key = header.as_ref();
         
     | 
| 
      
 120 
     | 
    
         
            +
                            if let Some((_, (_, counter))) = cache.get_key_value(key) {
         
     | 
| 
      
 121 
     | 
    
         
            +
                                let prev_count = counter.fetch_sub(1, Ordering::Relaxed);
         
     | 
| 
      
 122 
     | 
    
         
            +
                                if prev_count == 1 {
         
     | 
| 
      
 123 
     | 
    
         
            +
                                    Some(key)
         
     | 
| 
      
 124 
     | 
    
         
            +
                                } else {
         
     | 
| 
      
 125 
     | 
    
         
            +
                                    None
         
     | 
| 
       74 
126 
     | 
    
         
             
                                }
         
     | 
| 
      
 127 
     | 
    
         
            +
                            } else {
         
     | 
| 
      
 128 
     | 
    
         
            +
                                None
         
     | 
| 
       75 
129 
     | 
    
         
             
                            }
         
     | 
| 
       76 
     | 
    
         
            -
                        }
         
     | 
| 
      
 130 
     | 
    
         
            +
                        })
         
     | 
| 
      
 131 
     | 
    
         
            +
                        .collect();
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                    for key in to_remove {
         
     | 
| 
      
 134 
     | 
    
         
            +
                        cache.remove(key);
         
     | 
| 
       77 
135 
     | 
    
         
             
                    }
         
     | 
| 
       78 
136 
     | 
    
         | 
| 
       79 
137 
     | 
    
         
             
                    Ok(())
         
     | 
| 
       80 
138 
     | 
    
         
             
                }
         
     | 
| 
       81 
139 
     | 
    
         
             
            }
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
            pub struct HeaderCacheCleanupIter<I> {
         
     | 
| 
      
 142 
     | 
    
         
            +
                pub inner: I,
         
     | 
| 
      
 143 
     | 
    
         
            +
                pub headers: OnceLock<Vec<StringCacheKey>>,
         
     | 
| 
      
 144 
     | 
    
         
            +
            }
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
            impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
         
     | 
| 
      
 147 
     | 
    
         
            +
                type Item = I::Item;
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
      
 149 
     | 
    
         
            +
                fn next(&mut self) -> Option<Self::Item> {
         
     | 
| 
      
 150 
     | 
    
         
            +
                    self.inner.next()
         
     | 
| 
      
 151 
     | 
    
         
            +
                }
         
     | 
| 
      
 152 
     | 
    
         
            +
            }
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
            impl<I> Drop for HeaderCacheCleanupIter<I> {
         
     | 
| 
      
 155 
     | 
    
         
            +
                fn drop(&mut self) {
         
     | 
| 
      
 156 
     | 
    
         
            +
                    if let Some(headers) = self.headers.get() {
         
     | 
| 
      
 157 
     | 
    
         
            +
                        StringCache::clear(&headers).unwrap();
         
     | 
| 
      
 158 
     | 
    
         
            +
                    }
         
     | 
| 
      
 159 
     | 
    
         
            +
                }
         
     | 
| 
      
 160 
     | 
    
         
            +
            }
         
     | 
    
        data/ext/osv/src/csv/mod.rs
    CHANGED
    
    | 
         @@ -7,7 +7,7 @@ mod ruby_integration; 
     | 
|
| 
       7 
7 
     | 
    
         
             
            mod ruby_reader;
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
            pub use builder::RecordReaderBuilder;
         
     | 
| 
       10 
     | 
    
         
            -
            pub 
     | 
| 
       11 
     | 
    
         
            -
            pub use record:: 
     | 
| 
      
 10 
     | 
    
         
            +
            pub use header_cache::StringCacheKey;
         
     | 
| 
      
 11 
     | 
    
         
            +
            pub use record::CowStr;
         
     | 
| 
       12 
12 
     | 
    
         
             
            pub use record::CsvRecord;
         
     | 
| 
       13 
13 
     | 
    
         
             
            pub use ruby_integration::*;
         
     |