osv 0.3.15 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Cargo.lock +11 -1
 - data/README.md +27 -27
 - data/ext/osv/Cargo.toml +1 -0
 - data/ext/osv/src/csv/builder.rs +92 -85
 - data/ext/osv/src/csv/header_cache.rs +105 -26
 - data/ext/osv/src/csv/mod.rs +2 -2
 - data/ext/osv/src/csv/parser.rs +22 -85
 - data/ext/osv/src/csv/record.rs +25 -8
 - data/ext/osv/src/csv/record_reader.rs +53 -118
 - data/ext/osv/src/csv/ruby_integration.rs +10 -21
 - data/ext/osv/src/csv/ruby_reader.rs +9 -4
 - data/ext/osv/src/reader.rs +64 -46
 - data/ext/osv/src/utils.rs +4 -12
 - data/lib/osv/version.rb +1 -1
 - metadata +2 -2
 
    
        data/ext/osv/src/csv/parser.rs
    CHANGED
    
    | 
         @@ -2,77 +2,78 @@ use std::borrow::Cow; 
     | 
|
| 
       2 
2 
     | 
    
         
             
            use std::collections::HashMap;
         
     | 
| 
       3 
3 
     | 
    
         
             
            use std::hash::BuildHasher;
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
            use super:: 
     | 
| 
      
 5 
     | 
    
         
            +
            use super::header_cache::StringCacheKey;
         
     | 
| 
      
 6 
     | 
    
         
            +
            use super::CowStr;
         
     | 
| 
       6 
7 
     | 
    
         | 
| 
       7 
8 
     | 
    
         
             
            pub trait RecordParser<'a> {
         
     | 
| 
       8 
     | 
    
         
            -
                type Output 
     | 
| 
      
 9 
     | 
    
         
            +
                type Output;
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
11 
     | 
    
         
             
                fn parse(
         
     | 
| 
       11 
     | 
    
         
            -
                    headers: &[ 
     | 
| 
      
 12 
     | 
    
         
            +
                    headers: &[StringCacheKey],
         
     | 
| 
       12 
13 
     | 
    
         
             
                    record: &csv::StringRecord,
         
     | 
| 
       13 
     | 
    
         
            -
                    null_string: Option 
     | 
| 
      
 14 
     | 
    
         
            +
                    null_string: Option<Cow<'a, str>>,
         
     | 
| 
       14 
15 
     | 
    
         
             
                    flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       15 
16 
     | 
    
         
             
                ) -> Self::Output;
         
     | 
| 
       16 
17 
     | 
    
         
             
            }
         
     | 
| 
       17 
18 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
            impl<'a, S: BuildHasher + Default 
     | 
| 
       19 
     | 
    
         
            -
                for HashMap 
     | 
| 
      
 19 
     | 
    
         
            +
            impl<'a, S: BuildHasher + Default> RecordParser<'a>
         
     | 
| 
      
 20 
     | 
    
         
            +
                for HashMap<StringCacheKey, Option<CowStr<'a>>, S>
         
     | 
| 
       20 
21 
     | 
    
         
             
            {
         
     | 
| 
       21 
22 
     | 
    
         
             
                type Output = Self;
         
     | 
| 
       22 
23 
     | 
    
         | 
| 
       23 
24 
     | 
    
         
             
                #[inline]
         
     | 
| 
       24 
25 
     | 
    
         
             
                fn parse(
         
     | 
| 
       25 
     | 
    
         
            -
                    headers: &[ 
     | 
| 
      
 26 
     | 
    
         
            +
                    headers: &[StringCacheKey],
         
     | 
| 
       26 
27 
     | 
    
         
             
                    record: &csv::StringRecord,
         
     | 
| 
       27 
     | 
    
         
            -
                    null_string: Option 
     | 
| 
      
 28 
     | 
    
         
            +
                    null_string: Option<Cow<'a, str>>,
         
     | 
| 
       28 
29 
     | 
    
         
             
                    flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       29 
30 
     | 
    
         
             
                ) -> Self::Output {
         
     | 
| 
       30 
31 
     | 
    
         
             
                    let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
         
     | 
| 
       31 
32 
     | 
    
         | 
| 
       32 
33 
     | 
    
         
             
                    let shared_empty = Cow::Borrowed("");
         
     | 
| 
       33 
     | 
    
         
            -
                    let shared_default = flexible_default.map( 
     | 
| 
       34 
     | 
    
         
            -
                    headers.iter().enumerate().for_each(|(i,  
     | 
| 
      
 34 
     | 
    
         
            +
                    let shared_default = flexible_default.map(CowStr);
         
     | 
| 
      
 35 
     | 
    
         
            +
                    headers.iter().enumerate().for_each(|(i, ref header)| {
         
     | 
| 
       35 
36 
     | 
    
         
             
                        let value = record.get(i).map_or_else(
         
     | 
| 
       36 
37 
     | 
    
         
             
                            || shared_default.clone(),
         
     | 
| 
       37 
38 
     | 
    
         
             
                            |field| {
         
     | 
| 
       38 
     | 
    
         
            -
                                if null_string == Some(field) {
         
     | 
| 
      
 39 
     | 
    
         
            +
                                if null_string.as_deref() == Some(field) {
         
     | 
| 
       39 
40 
     | 
    
         
             
                                    None
         
     | 
| 
       40 
41 
     | 
    
         
             
                                } else if field.is_empty() {
         
     | 
| 
       41 
     | 
    
         
            -
                                    Some( 
     | 
| 
      
 42 
     | 
    
         
            +
                                    Some(CowStr(shared_empty.clone()))
         
     | 
| 
       42 
43 
     | 
    
         
             
                                } else {
         
     | 
| 
       43 
     | 
    
         
            -
                                    Some( 
     | 
| 
      
 44 
     | 
    
         
            +
                                    Some(CowStr(Cow::Owned(field.to_string())))
         
     | 
| 
       44 
45 
     | 
    
         
             
                                }
         
     | 
| 
       45 
46 
     | 
    
         
             
                            },
         
     | 
| 
       46 
47 
     | 
    
         
             
                        );
         
     | 
| 
       47 
     | 
    
         
            -
                        map.insert(header, value);
         
     | 
| 
      
 48 
     | 
    
         
            +
                        map.insert((*header).clone(), value);
         
     | 
| 
       48 
49 
     | 
    
         
             
                    });
         
     | 
| 
       49 
50 
     | 
    
         
             
                    map
         
     | 
| 
       50 
51 
     | 
    
         
             
                }
         
     | 
| 
       51 
52 
     | 
    
         
             
            }
         
     | 
| 
       52 
53 
     | 
    
         | 
| 
       53 
     | 
    
         
            -
            impl<'a> RecordParser<'a> for Vec<Option< 
     | 
| 
      
 54 
     | 
    
         
            +
            impl<'a> RecordParser<'a> for Vec<Option<CowStr<'a>>> {
         
     | 
| 
       54 
55 
     | 
    
         
             
                type Output = Self;
         
     | 
| 
       55 
56 
     | 
    
         | 
| 
       56 
57 
     | 
    
         
             
                #[inline]
         
     | 
| 
       57 
58 
     | 
    
         
             
                fn parse(
         
     | 
| 
       58 
     | 
    
         
            -
                    headers: &[ 
     | 
| 
      
 59 
     | 
    
         
            +
                    headers: &[StringCacheKey],
         
     | 
| 
       59 
60 
     | 
    
         
             
                    record: &csv::StringRecord,
         
     | 
| 
       60 
     | 
    
         
            -
                    null_string: Option 
     | 
| 
      
 61 
     | 
    
         
            +
                    null_string: Option<Cow<'a, str>>,
         
     | 
| 
       61 
62 
     | 
    
         
             
                    flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       62 
63 
     | 
    
         
             
                ) -> Self::Output {
         
     | 
| 
       63 
64 
     | 
    
         
             
                    let target_len = headers.len();
         
     | 
| 
       64 
65 
     | 
    
         
             
                    let mut vec = Vec::with_capacity(target_len);
         
     | 
| 
       65 
66 
     | 
    
         | 
| 
       66 
67 
     | 
    
         
             
                    let shared_empty = Cow::Borrowed("");
         
     | 
| 
       67 
     | 
    
         
            -
                    let shared_default = flexible_default.map( 
     | 
| 
      
 68 
     | 
    
         
            +
                    let shared_default = flexible_default.map(CowStr);
         
     | 
| 
       68 
69 
     | 
    
         | 
| 
       69 
70 
     | 
    
         
             
                    for field in record.iter() {
         
     | 
| 
       70 
     | 
    
         
            -
                        let value = if Some(field) == null_string {
         
     | 
| 
      
 71 
     | 
    
         
            +
                        let value = if Some(field) == null_string.as_deref() {
         
     | 
| 
       71 
72 
     | 
    
         
             
                            None
         
     | 
| 
       72 
73 
     | 
    
         
             
                        } else if field.is_empty() {
         
     | 
| 
       73 
     | 
    
         
            -
                            Some( 
     | 
| 
      
 74 
     | 
    
         
            +
                            Some(CowStr(shared_empty.clone()))
         
     | 
| 
       74 
75 
     | 
    
         
             
                        } else {
         
     | 
| 
       75 
     | 
    
         
            -
                            Some( 
     | 
| 
      
 76 
     | 
    
         
            +
                            Some(CowStr(Cow::Owned(field.to_string())))
         
     | 
| 
       76 
77 
     | 
    
         
             
                        };
         
     | 
| 
       77 
78 
     | 
    
         
             
                        vec.push(value);
         
     | 
| 
       78 
79 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -85,67 +86,3 @@ impl<'a> RecordParser<'a> for Vec<Option<CowValue<'a>>> { 
     | 
|
| 
       85 
86 
     | 
    
         
             
                    vec
         
     | 
| 
       86 
87 
     | 
    
         
             
                }
         
     | 
| 
       87 
88 
     | 
    
         
             
            }
         
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
            // impl<'a, S: BuildHasher + Default + 'a> RecordParser<'a>
         
     | 
| 
       90 
     | 
    
         
            -
            //     for HashMap<&'static str, Option<String>, S>
         
     | 
| 
       91 
     | 
    
         
            -
            // {
         
     | 
| 
       92 
     | 
    
         
            -
            //     type Output = Self;
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
            //     #[inline]
         
     | 
| 
       95 
     | 
    
         
            -
            //     fn parse(
         
     | 
| 
       96 
     | 
    
         
            -
            //         headers: &[&'static str],
         
     | 
| 
       97 
     | 
    
         
            -
            //         record: &csv::StringRecord,
         
     | 
| 
       98 
     | 
    
         
            -
            //         null_string: Option<&str>,
         
     | 
| 
       99 
     | 
    
         
            -
            //         flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       100 
     | 
    
         
            -
            //     ) -> Self::Output {
         
     | 
| 
       101 
     | 
    
         
            -
            //         let mut map = HashMap::with_capacity_and_hasher(headers.len(), S::default());
         
     | 
| 
       102 
     | 
    
         
            -
            //         headers.iter().enumerate().for_each(|(i, &header)| {
         
     | 
| 
       103 
     | 
    
         
            -
            //             let value = record.get(i).map_or_else(
         
     | 
| 
       104 
     | 
    
         
            -
            //                 || flexible_default.clone(),
         
     | 
| 
       105 
     | 
    
         
            -
            //                 |field| {
         
     | 
| 
       106 
     | 
    
         
            -
            //                     if null_string == Some(field) {
         
     | 
| 
       107 
     | 
    
         
            -
            //                         None
         
     | 
| 
       108 
     | 
    
         
            -
            //                     } else if field.is_empty() {
         
     | 
| 
       109 
     | 
    
         
            -
            //                         Some(String::new())
         
     | 
| 
       110 
     | 
    
         
            -
            //                     } else {
         
     | 
| 
       111 
     | 
    
         
            -
            //                         Some(field.into())
         
     | 
| 
       112 
     | 
    
         
            -
            //                     }
         
     | 
| 
       113 
     | 
    
         
            -
            //                 },
         
     | 
| 
       114 
     | 
    
         
            -
            //             );
         
     | 
| 
       115 
     | 
    
         
            -
            //             map.insert(header, value);
         
     | 
| 
       116 
     | 
    
         
            -
            //         });
         
     | 
| 
       117 
     | 
    
         
            -
            //         map
         
     | 
| 
       118 
     | 
    
         
            -
            //     }
         
     | 
| 
       119 
     | 
    
         
            -
            // }
         
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
            // impl<'a> RecordParser<'a> for Vec<Option<String>> {
         
     | 
| 
       122 
     | 
    
         
            -
            //     type Output = Self;
         
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
     | 
    
         
            -
            //     #[inline]
         
     | 
| 
       125 
     | 
    
         
            -
            //     fn parse(
         
     | 
| 
       126 
     | 
    
         
            -
            //         headers: &[&'static str],
         
     | 
| 
       127 
     | 
    
         
            -
            //         record: &csv::StringRecord,
         
     | 
| 
       128 
     | 
    
         
            -
            //         null_string: Option<&str>,
         
     | 
| 
       129 
     | 
    
         
            -
            //         flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       130 
     | 
    
         
            -
            //     ) -> Self::Output {
         
     | 
| 
       131 
     | 
    
         
            -
            //         let target_len = headers.len();
         
     | 
| 
       132 
     | 
    
         
            -
            //         let mut vec = Vec::with_capacity(target_len);
         
     | 
| 
       133 
     | 
    
         
            -
            //         for field in record.iter() {
         
     | 
| 
       134 
     | 
    
         
            -
            //             let value = if Some(field) == null_string {
         
     | 
| 
       135 
     | 
    
         
            -
            //                 None
         
     | 
| 
       136 
     | 
    
         
            -
            //             } else if field.is_empty() {
         
     | 
| 
       137 
     | 
    
         
            -
            //                 Some(String::new())
         
     | 
| 
       138 
     | 
    
         
            -
            //             } else {
         
     | 
| 
       139 
     | 
    
         
            -
            //                 Some(field.into())
         
     | 
| 
       140 
     | 
    
         
            -
            //             };
         
     | 
| 
       141 
     | 
    
         
            -
            //             vec.push(value);
         
     | 
| 
       142 
     | 
    
         
            -
            //         }
         
     | 
| 
       143 
     | 
    
         
            -
             
     | 
| 
       144 
     | 
    
         
            -
            //         if vec.len() < target_len {
         
     | 
| 
       145 
     | 
    
         
            -
            //             if let Some(default) = flexible_default {
         
     | 
| 
       146 
     | 
    
         
            -
            //                 vec.resize_with(target_len, || Some(default.to_string()));
         
     | 
| 
       147 
     | 
    
         
            -
            //             }
         
     | 
| 
       148 
     | 
    
         
            -
            //         }
         
     | 
| 
       149 
     | 
    
         
            -
            //         vec
         
     | 
| 
       150 
     | 
    
         
            -
            //     }
         
     | 
| 
       151 
     | 
    
         
            -
            // }
         
     | 
    
        data/ext/osv/src/csv/record.rs
    CHANGED
    
    | 
         @@ -1,10 +1,13 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            use  
     | 
| 
      
 1 
     | 
    
         
            +
            use itertools::Itertools;
         
     | 
| 
      
 2 
     | 
    
         
            +
            use magnus::{value::ReprValue, IntoValue, Ruby, Value};
         
     | 
| 
       2 
3 
     | 
    
         
             
            use std::{borrow::Cow, collections::HashMap, hash::BuildHasher};
         
     | 
| 
       3 
4 
     | 
    
         | 
| 
      
 5 
     | 
    
         
            +
            use super::StringCacheKey;
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
       4 
7 
     | 
    
         
             
            #[derive(Debug)]
         
     | 
| 
       5 
8 
     | 
    
         
             
            pub enum CsvRecord<'a, S: BuildHasher + Default> {
         
     | 
| 
       6 
     | 
    
         
            -
                Vec(Vec<Option< 
     | 
| 
       7 
     | 
    
         
            -
                Map(HashMap 
     | 
| 
      
 9 
     | 
    
         
            +
                Vec(Vec<Option<CowStr<'a>>>),
         
     | 
| 
      
 10 
     | 
    
         
            +
                Map(HashMap<StringCacheKey, Option<CowStr<'a>>, S>),
         
     | 
| 
       8 
11 
     | 
    
         
             
            }
         
     | 
| 
       9 
12 
     | 
    
         | 
| 
       10 
13 
     | 
    
         
             
            impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> {
         
     | 
| 
         @@ -19,9 +22,23 @@ impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> { 
     | 
|
| 
       19 
22 
     | 
    
         
             
                        CsvRecord::Map(map) => {
         
     | 
| 
       20 
23 
     | 
    
         
             
                            // Pre-allocate the hash with the known size
         
     | 
| 
       21 
24 
     | 
    
         
             
                            let hash = handle.hash_new_capa(map.len());
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                            let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
         
     | 
| 
      
 27 
     | 
    
         
            +
                            let mut i = 0;
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                            for chunk in &map.into_iter().chunks(128) {
         
     | 
| 
      
 30 
     | 
    
         
            +
                                for (k, v) in chunk {
         
     | 
| 
      
 31 
     | 
    
         
            +
                                    values[i] = handle.into_value(k);
         
     | 
| 
      
 32 
     | 
    
         
            +
                                    values[i + 1] = handle.into_value(v);
         
     | 
| 
      
 33 
     | 
    
         
            +
                                    i += 2;
         
     | 
| 
      
 34 
     | 
    
         
            +
                                }
         
     | 
| 
      
 35 
     | 
    
         
            +
                                hash.bulk_insert(&values[..i]).unwrap();
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                                // Zero out used values
         
     | 
| 
      
 38 
     | 
    
         
            +
                                values[..i].fill(handle.qnil().as_value());
         
     | 
| 
      
 39 
     | 
    
         
            +
                                i = 0;
         
     | 
| 
      
 40 
     | 
    
         
            +
                            }
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
       25 
42 
     | 
    
         
             
                            hash.into_value_with(handle)
         
     | 
| 
       26 
43 
     | 
    
         
             
                        }
         
     | 
| 
       27 
44 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -29,9 +46,9 @@ impl<S: BuildHasher + Default> IntoValue for CsvRecord<'_, S> { 
     | 
|
| 
       29 
46 
     | 
    
         
             
            }
         
     | 
| 
       30 
47 
     | 
    
         | 
| 
       31 
48 
     | 
    
         
             
            #[derive(Debug, Clone)]
         
     | 
| 
       32 
     | 
    
         
            -
            pub struct  
     | 
| 
      
 49 
     | 
    
         
            +
            pub struct CowStr<'a>(pub Cow<'a, str>);
         
     | 
| 
       33 
50 
     | 
    
         | 
| 
       34 
     | 
    
         
            -
            impl IntoValue for  
     | 
| 
      
 51 
     | 
    
         
            +
            impl IntoValue for CowStr<'_> {
         
     | 
| 
       35 
52 
     | 
    
         
             
                fn into_value_with(self, handle: &Ruby) -> Value {
         
     | 
| 
       36 
53 
     | 
    
         
             
                    self.0.into_value_with(handle)
         
     | 
| 
       37 
54 
     | 
    
         
             
                }
         
     | 
| 
         @@ -1,32 +1,35 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            use super::header_cache::StringCacheKey;
         
     | 
| 
       1 
2 
     | 
    
         
             
            use super::parser::RecordParser;
         
     | 
| 
       2 
3 
     | 
    
         
             
            use super::{header_cache::StringCache, ruby_reader::SeekableRead};
         
     | 
| 
       3 
4 
     | 
    
         
             
            use magnus::{Error, Ruby};
         
     | 
| 
       4 
     | 
    
         
            -
            use std:: 
     | 
| 
       5 
     | 
    
         
            -
            use std::{ 
     | 
| 
      
 5 
     | 
    
         
            +
            use std::borrow::Cow;
         
     | 
| 
      
 6 
     | 
    
         
            +
            use std::io::{BufReader, Read};
         
     | 
| 
       6 
7 
     | 
    
         | 
| 
      
 8 
     | 
    
         
            +
            /// Size of the internal buffer used for reading CSV records
         
     | 
| 
       7 
9 
     | 
    
         
             
            pub(crate) const READ_BUFFER_SIZE: usize = 16384;
         
     | 
| 
       8 
10 
     | 
    
         | 
| 
      
 11 
     | 
    
         
            +
            /// A reader that processes CSV records using a specified parser.
         
     | 
| 
      
 12 
     | 
    
         
            +
            ///
         
     | 
| 
      
 13 
     | 
    
         
            +
            /// This struct implements Iterator to provide a streaming interface for CSV records.
         
     | 
| 
       9 
14 
     | 
    
         
             
            pub struct RecordReader<'a, T: RecordParser<'a>> {
         
     | 
| 
       10 
     | 
    
         
            -
                 
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
       16 
     | 
    
         
            -
                    reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
         
     | 
| 
       17 
     | 
    
         
            -
                    headers: Vec<&'static str>,
         
     | 
| 
       18 
     | 
    
         
            -
                    null_string: Option<String>,
         
     | 
| 
       19 
     | 
    
         
            -
                    flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       20 
     | 
    
         
            -
                    string_record: csv::StringRecord,
         
     | 
| 
       21 
     | 
    
         
            -
                },
         
     | 
| 
       22 
     | 
    
         
            -
                MultiThreaded {
         
     | 
| 
       23 
     | 
    
         
            -
                    headers: Vec<&'static str>,
         
     | 
| 
       24 
     | 
    
         
            -
                    receiver: kanal::Receiver<T::Output>,
         
     | 
| 
       25 
     | 
    
         
            -
                    handle: Option<thread::JoinHandle<()>>,
         
     | 
| 
       26 
     | 
    
         
            -
                },
         
     | 
| 
      
 15 
     | 
    
         
            +
                reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
         
     | 
| 
      
 16 
     | 
    
         
            +
                headers: Vec<StringCacheKey>,
         
     | 
| 
      
 17 
     | 
    
         
            +
                null_string: Option<Cow<'a, str>>,
         
     | 
| 
      
 18 
     | 
    
         
            +
                flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
      
 19 
     | 
    
         
            +
                string_record: csv::StringRecord,
         
     | 
| 
      
 20 
     | 
    
         
            +
                parser: std::marker::PhantomData<T>,
         
     | 
| 
       27 
21 
     | 
    
         
             
            }
         
     | 
| 
       28 
22 
     | 
    
         | 
| 
       29 
23 
     | 
    
         
             
            impl<'a, T: RecordParser<'a>> RecordReader<'a, T> {
         
     | 
| 
      
 24 
     | 
    
         
            +
                /// Reads and processes headers from a CSV reader.
         
     | 
| 
      
 25 
     | 
    
         
            +
                ///
         
     | 
| 
      
 26 
     | 
    
         
            +
                /// # Arguments
         
     | 
| 
      
 27 
     | 
    
         
            +
                /// * `ruby` - Ruby VM context for error handling
         
     | 
| 
      
 28 
     | 
    
         
            +
                /// * `reader` - CSV reader instance
         
     | 
| 
      
 29 
     | 
    
         
            +
                /// * `has_headers` - Whether the CSV file contains headers
         
     | 
| 
      
 30 
     | 
    
         
            +
                ///
         
     | 
| 
      
 31 
     | 
    
         
            +
                /// # Returns
         
     | 
| 
      
 32 
     | 
    
         
            +
                /// A vector of header strings or generated column names if `has_headers` is false
         
     | 
| 
       30 
33 
     | 
    
         
             
                #[inline]
         
     | 
| 
       31 
34 
     | 
    
         
             
                pub(crate) fn get_headers(
         
     | 
| 
       32 
35 
     | 
    
         
             
                    ruby: &Ruby,
         
     | 
| 
         @@ -40,67 +43,41 @@ impl<'a, T: RecordParser<'a>> RecordReader<'a, T> { 
     | 
|
| 
       40 
43 
     | 
    
         
             
                        )
         
     | 
| 
       41 
44 
     | 
    
         
             
                    })?;
         
     | 
| 
       42 
45 
     | 
    
         | 
| 
       43 
     | 
    
         
            -
                     
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
                        headers.extend(first_row.iter().map(String::from));
         
     | 
| 
      
 46 
     | 
    
         
            +
                    Ok(if has_headers {
         
     | 
| 
      
 47 
     | 
    
         
            +
                        first_row.iter().map(String::from).collect()
         
     | 
| 
       46 
48 
     | 
    
         
             
                    } else {
         
     | 
| 
       47 
     | 
    
         
            -
                         
     | 
| 
       48 
     | 
    
         
            -
                    }
         
     | 
| 
       49 
     | 
    
         
            -
                    Ok(headers)
         
     | 
| 
      
 49 
     | 
    
         
            +
                        (0..first_row.len()).map(|i| format!("c{i}")).collect()
         
     | 
| 
      
 50 
     | 
    
         
            +
                    })
         
     | 
| 
       50 
51 
     | 
    
         
             
                }
         
     | 
| 
       51 
52 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
                 
     | 
| 
      
 53 
     | 
    
         
            +
                /// Creates a new RecordReader instance.
         
     | 
| 
      
 54 
     | 
    
         
            +
                pub(crate) fn new(
         
     | 
| 
       53 
55 
     | 
    
         
             
                    reader: csv::Reader<BufReader<Box<dyn SeekableRead>>>,
         
     | 
| 
       54 
     | 
    
         
            -
                    headers: Vec 
     | 
| 
       55 
     | 
    
         
            -
                    null_string: Option< 
     | 
| 
       56 
     | 
    
         
            -
                    flexible_default: Option 
     | 
| 
      
 56 
     | 
    
         
            +
                    headers: Vec<StringCacheKey>,
         
     | 
| 
      
 57 
     | 
    
         
            +
                    null_string: Option<Cow<'a, str>>,
         
     | 
| 
      
 58 
     | 
    
         
            +
                    flexible_default: Option<Cow<'a, str>>,
         
     | 
| 
       57 
59 
     | 
    
         
             
                ) -> Self {
         
     | 
| 
       58 
60 
     | 
    
         
             
                    let headers_len = headers.len();
         
     | 
| 
       59 
61 
     | 
    
         
             
                    Self {
         
     | 
| 
       60 
     | 
    
         
            -
                         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
                        },
         
     | 
| 
      
 62 
     | 
    
         
            +
                        reader,
         
     | 
| 
      
 63 
     | 
    
         
            +
                        headers,
         
     | 
| 
      
 64 
     | 
    
         
            +
                        null_string,
         
     | 
| 
      
 65 
     | 
    
         
            +
                        flexible_default,
         
     | 
| 
      
 66 
     | 
    
         
            +
                        string_record: csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_len),
         
     | 
| 
      
 67 
     | 
    
         
            +
                        parser: std::marker::PhantomData,
         
     | 
| 
       67 
68 
     | 
    
         
             
                    }
         
     | 
| 
       68 
69 
     | 
    
         
             
                }
         
     | 
| 
       69 
     | 
    
         
            -
            }
         
     | 
| 
       70 
70 
     | 
    
         | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
                 
     | 
| 
       73 
     | 
    
         
            -
                     
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                    let handle = thread::spawn(move || {
         
     | 
| 
       83 
     | 
    
         
            -
                        let mut record =
         
     | 
| 
       84 
     | 
    
         
            -
                            csv::StringRecord::with_capacity(READ_BUFFER_SIZE, headers_for_thread.len());
         
     | 
| 
       85 
     | 
    
         
            -
                        while let Ok(true) = reader.read_record(&mut record) {
         
     | 
| 
       86 
     | 
    
         
            -
                            let row = T::parse(
         
     | 
| 
       87 
     | 
    
         
            -
                                &headers_for_thread,
         
     | 
| 
       88 
     | 
    
         
            -
                                &record,
         
     | 
| 
       89 
     | 
    
         
            -
                                null_string.as_deref(),
         
     | 
| 
       90 
     | 
    
         
            -
                                flexible_default.map(Cow::Borrowed),
         
     | 
| 
       91 
     | 
    
         
            -
                            );
         
     | 
| 
       92 
     | 
    
         
            -
                            if sender.send(row).is_err() {
         
     | 
| 
       93 
     | 
    
         
            -
                                break;
         
     | 
| 
       94 
     | 
    
         
            -
                            }
         
     | 
| 
       95 
     | 
    
         
            -
                        }
         
     | 
| 
       96 
     | 
    
         
            -
                    });
         
     | 
| 
       97 
     | 
    
         
            -
             
     | 
| 
       98 
     | 
    
         
            -
                    Self {
         
     | 
| 
       99 
     | 
    
         
            -
                        inner: ReaderImpl::MultiThreaded {
         
     | 
| 
       100 
     | 
    
         
            -
                            headers,
         
     | 
| 
       101 
     | 
    
         
            -
                            receiver,
         
     | 
| 
       102 
     | 
    
         
            -
                            handle: Some(handle),
         
     | 
| 
       103 
     | 
    
         
            -
                        },
         
     | 
| 
      
 71 
     | 
    
         
            +
                /// Attempts to read the next record, returning any errors encountered.
         
     | 
| 
      
 72 
     | 
    
         
            +
                fn try_next(&mut self) -> csv::Result<Option<T::Output>> {
         
     | 
| 
      
 73 
     | 
    
         
            +
                    match self.reader.read_record(&mut self.string_record)? {
         
     | 
| 
      
 74 
     | 
    
         
            +
                        true => Ok(Some(T::parse(
         
     | 
| 
      
 75 
     | 
    
         
            +
                            &self.headers,
         
     | 
| 
      
 76 
     | 
    
         
            +
                            &self.string_record,
         
     | 
| 
      
 77 
     | 
    
         
            +
                            self.null_string.clone(),
         
     | 
| 
      
 78 
     | 
    
         
            +
                            self.flexible_default.clone(),
         
     | 
| 
      
 79 
     | 
    
         
            +
                        ))),
         
     | 
| 
      
 80 
     | 
    
         
            +
                        false => Ok(None),
         
     | 
| 
       104 
81 
     | 
    
         
             
                    }
         
     | 
| 
       105 
82 
     | 
    
         
             
                }
         
     | 
| 
       106 
83 
     | 
    
         
             
            }
         
     | 
| 
         @@ -110,63 +87,21 @@ impl<'a, T: RecordParser<'a>> Iterator for RecordReader<'a, T> { 
     | 
|
| 
       110 
87 
     | 
    
         | 
| 
       111 
88 
     | 
    
         
             
                #[inline]
         
     | 
| 
       112 
89 
     | 
    
         
             
                fn next(&mut self) -> Option<Self::Item> {
         
     | 
| 
       113 
     | 
    
         
            -
                     
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                        } => match receiver.recv() {
         
     | 
| 
       117 
     | 
    
         
            -
                            Ok(record) => Some(record),
         
     | 
| 
       118 
     | 
    
         
            -
                            Err(_) => {
         
     | 
| 
       119 
     | 
    
         
            -
                                if let Some(handle) = handle.take() {
         
     | 
| 
       120 
     | 
    
         
            -
                                    let _ = handle.join();
         
     | 
| 
       121 
     | 
    
         
            -
                                }
         
     | 
| 
       122 
     | 
    
         
            -
                                None
         
     | 
| 
       123 
     | 
    
         
            -
                            }
         
     | 
| 
       124 
     | 
    
         
            -
                        },
         
     | 
| 
       125 
     | 
    
         
            -
                        ReaderImpl::SingleThreaded {
         
     | 
| 
       126 
     | 
    
         
            -
                            reader,
         
     | 
| 
       127 
     | 
    
         
            -
                            headers,
         
     | 
| 
       128 
     | 
    
         
            -
                            null_string,
         
     | 
| 
       129 
     | 
    
         
            -
                            flexible_default,
         
     | 
| 
       130 
     | 
    
         
            -
                            ref mut string_record,
         
     | 
| 
       131 
     | 
    
         
            -
                        } => match reader.read_record(string_record) {
         
     | 
| 
       132 
     | 
    
         
            -
                            Ok(true) => Some(T::parse(
         
     | 
| 
       133 
     | 
    
         
            -
                                headers,
         
     | 
| 
       134 
     | 
    
         
            -
                                string_record,
         
     | 
| 
       135 
     | 
    
         
            -
                                null_string.as_deref(),
         
     | 
| 
       136 
     | 
    
         
            -
                                flexible_default.clone(),
         
     | 
| 
       137 
     | 
    
         
            -
                            )),
         
     | 
| 
       138 
     | 
    
         
            -
                            Ok(false) => None,
         
     | 
| 
       139 
     | 
    
         
            -
                            Err(_e) => None,
         
     | 
| 
       140 
     | 
    
         
            -
                        },
         
     | 
| 
       141 
     | 
    
         
            -
                    }
         
     | 
| 
      
 90 
     | 
    
         
            +
                    // Note: We intentionally swallow errors here to maintain Iterator contract.
         
     | 
| 
      
 91 
     | 
    
         
            +
                    // Errors can be handled by using try_next() directly if needed.
         
     | 
| 
      
 92 
     | 
    
         
            +
                    self.try_next().ok().flatten()
         
     | 
| 
       142 
93 
     | 
    
         
             
                }
         
     | 
| 
       143 
94 
     | 
    
         | 
| 
       144 
95 
     | 
    
         
             
                #[inline]
         
     | 
| 
       145 
96 
     | 
    
         
             
                fn size_hint(&self) -> (usize, Option<usize>) {
         
     | 
| 
       146 
     | 
    
         
            -
                     
     | 
| 
       147 
     | 
    
         
            -
                    (0, None)
         
     | 
| 
      
 97 
     | 
    
         
            +
                    (0, None) // Cannot determine size without reading entire file
         
     | 
| 
       148 
98 
     | 
    
         
             
                }
         
     | 
| 
       149 
99 
     | 
    
         
             
            }
         
     | 
| 
       150 
100 
     | 
    
         | 
| 
       151 
101 
     | 
    
         
             
            impl<'a, T: RecordParser<'a>> Drop for RecordReader<'a, T> {
         
     | 
| 
       152 
102 
     | 
    
         
             
                #[inline]
         
     | 
| 
       153 
103 
     | 
    
         
             
                fn drop(&mut self) {
         
     | 
| 
       154 
     | 
    
         
            -
                     
     | 
| 
       155 
     | 
    
         
            -
             
     | 
| 
       156 
     | 
    
         
            -
                            receiver,
         
     | 
| 
       157 
     | 
    
         
            -
                            handle,
         
     | 
| 
       158 
     | 
    
         
            -
                            headers,
         
     | 
| 
       159 
     | 
    
         
            -
                            ..
         
     | 
| 
       160 
     | 
    
         
            -
                        } => {
         
     | 
| 
       161 
     | 
    
         
            -
                            receiver.close();
         
     | 
| 
       162 
     | 
    
         
            -
                            if let Some(handle) = handle.take() {
         
     | 
| 
       163 
     | 
    
         
            -
                                let _ = handle.join();
         
     | 
| 
       164 
     | 
    
         
            -
                            }
         
     | 
| 
       165 
     | 
    
         
            -
                            let _ = StringCache::clear(headers);
         
     | 
| 
       166 
     | 
    
         
            -
                        }
         
     | 
| 
       167 
     | 
    
         
            -
                        ReaderImpl::SingleThreaded { headers, .. } => {
         
     | 
| 
       168 
     | 
    
         
            -
                            let _ = StringCache::clear(headers);
         
     | 
| 
       169 
     | 
    
         
            -
                        }
         
     | 
| 
       170 
     | 
    
         
            -
                    }
         
     | 
| 
      
 104 
     | 
    
         
            +
                    // Intentionally ignore errors during cleanup as there's no meaningful way to handle them
         
     | 
| 
      
 105 
     | 
    
         
            +
                    let _ = StringCache::clear(&self.headers);
         
     | 
| 
       171 
106 
     | 
    
         
             
                }
         
     | 
| 
       172 
107 
     | 
    
         
             
            }
         
     | 
| 
         @@ -1,30 +1,19 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            use std::{ 
     | 
| 
      
 1 
     | 
    
         
            +
            use std::{
         
     | 
| 
      
 2 
     | 
    
         
            +
                fs::File,
         
     | 
| 
      
 3 
     | 
    
         
            +
                io::{self, Read, Seek, SeekFrom},
         
     | 
| 
      
 4 
     | 
    
         
            +
                mem::ManuallyDrop,
         
     | 
| 
      
 5 
     | 
    
         
            +
            };
         
     | 
| 
       2 
6 
     | 
    
         | 
| 
       3 
7 
     | 
    
         
             
            pub struct ForgottenFileHandle(pub ManuallyDrop<File>);
         
     | 
| 
       4 
8 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
            impl  
     | 
| 
      
 9 
     | 
    
         
            +
            impl Read for ForgottenFileHandle {
         
     | 
| 
       6 
10 
     | 
    
         
             
                fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
         
     | 
| 
       7 
11 
     | 
    
         
             
                    self.0.read(buf)
         
     | 
| 
       8 
12 
     | 
    
         
             
                }
         
     | 
| 
      
 13 
     | 
    
         
            +
            }
         
     | 
| 
       9 
14 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                // fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
         
     | 
| 
       15 
     | 
    
         
            -
                //     self.0.read_buf(cursor)
         
     | 
| 
       16 
     | 
    
         
            -
                // }
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                // #[inline]
         
     | 
| 
       19 
     | 
    
         
            -
                // fn is_read_vectored(&self) -> bool {
         
     | 
| 
       20 
     | 
    
         
            -
                //     self.0.is_read_vectored()
         
     | 
| 
       21 
     | 
    
         
            -
                // }
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
                fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
         
     | 
| 
       24 
     | 
    
         
            -
                    self.0.read_to_end(buf)
         
     | 
| 
       25 
     | 
    
         
            -
                }
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
         
     | 
| 
       28 
     | 
    
         
            -
                    self.0.read_to_string(buf)
         
     | 
| 
      
 15 
     | 
    
         
            +
            impl Seek for ForgottenFileHandle {
         
     | 
| 
      
 16 
     | 
    
         
            +
                fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
         
     | 
| 
      
 17 
     | 
    
         
            +
                    self.0.seek(pos)
         
     | 
| 
       29 
18 
     | 
    
         
             
                }
         
     | 
| 
       30 
19 
     | 
    
         
             
            }
         
     | 
| 
         @@ -2,9 +2,12 @@ use magnus::{ 
     | 
|
| 
       2 
2 
     | 
    
         
             
                value::{Opaque, ReprValue},
         
     | 
| 
       3 
3 
     | 
    
         
             
                RClass, RString, Ruby, Value,
         
     | 
| 
       4 
4 
     | 
    
         
             
            };
         
     | 
| 
       5 
     | 
    
         
            -
            use std:: 
     | 
| 
      
 5 
     | 
    
         
            +
            use std::fs::File;
         
     | 
| 
      
 6 
     | 
    
         
            +
            use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
         
     | 
| 
       6 
7 
     | 
    
         
             
            use std::sync::OnceLock;
         
     | 
| 
       7 
8 
     | 
    
         | 
| 
      
 9 
     | 
    
         
            +
            use super::ForgottenFileHandle;
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
       8 
11 
     | 
    
         
             
            static STRING_IO_CLASS: OnceLock<Opaque<RClass>> = OnceLock::new();
         
     | 
| 
       9 
12 
     | 
    
         | 
| 
       10 
13 
     | 
    
         
             
            /// A reader that can handle various Ruby input types (String, StringIO, IO-like objects)
         
     | 
| 
         @@ -17,6 +20,10 @@ pub struct RubyReader<T> { 
     | 
|
| 
       17 
20 
     | 
    
         
             
            pub trait SeekableRead: std::io::Read + Seek {}
         
     | 
| 
       18 
21 
     | 
    
         
             
            impl SeekableRead for RubyReader<Value> {}
         
     | 
| 
       19 
22 
     | 
    
         
             
            impl SeekableRead for RubyReader<RString> {}
         
     | 
| 
      
 23 
     | 
    
         
            +
            impl SeekableRead for File {}
         
     | 
| 
      
 24 
     | 
    
         
            +
            impl<T: Read + Seek> SeekableRead for BufReader<T> {}
         
     | 
| 
      
 25 
     | 
    
         
            +
            impl SeekableRead for std::io::Cursor<Vec<u8>> {}
         
     | 
| 
      
 26 
     | 
    
         
            +
            impl SeekableRead for ForgottenFileHandle {}
         
     | 
| 
       20 
27 
     | 
    
         | 
| 
       21 
28 
     | 
    
         
             
            pub fn build_ruby_reader(
         
     | 
| 
       22 
29 
     | 
    
         
             
                ruby: &Ruby,
         
     | 
| 
         @@ -74,9 +81,7 @@ impl Seek for RubyReader<RString> { 
     | 
|
| 
       74 
81 
     | 
    
         
             
                    match pos {
         
     | 
| 
       75 
82 
     | 
    
         
             
                        io::SeekFrom::Start(offset) => self.offset = offset as usize,
         
     | 
| 
       76 
83 
     | 
    
         
             
                        io::SeekFrom::Current(offset) => self.offset = (self.offset as i64 + offset) as usize,
         
     | 
| 
       77 
     | 
    
         
            -
                        io::SeekFrom::End(offset) =>  
     | 
| 
       78 
     | 
    
         
            -
                            self.offset = self.inner.len() - offset as usize
         
     | 
| 
       79 
     | 
    
         
            -
                        }
         
     | 
| 
      
 84 
     | 
    
         
            +
                        io::SeekFrom::End(offset) => self.offset = self.inner.len() - offset as usize,
         
     | 
| 
       80 
85 
     | 
    
         
             
                    }
         
     | 
| 
       81 
86 
     | 
    
         
             
                    Ok(self.offset as u64)
         
     | 
| 
       82 
87 
     | 
    
         
             
                }
         
     |