patchwork_csv_utils 0.1.7-x86_64-linux → 0.1.9-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/ext/csv_utils/src/utils/csv.rs +31 -10
- data/ext/csv_utils/src/utils/mod.rs +4 -0
- data/ext/csv_utils/src/utils/xls.rs +9 -2
- data/lib/csv_utils/2.7/csv_utils.so +0 -0
- data/lib/csv_utils/3.0/csv_utils.so +0 -0
- data/lib/csv_utils/3.1/csv_utils.so +0 -0
- data/lib/csv_utils/3.2/csv_utils.so +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: e62c8af2ccfe8ef8dbdde1f1e38159e9a21938b13f702db03c88b02821498adf
         | 
| 4 | 
            +
              data.tar.gz: 521fbb4faaf539e3c9b0bc7f9592ff78337a0681d4ac5172f180030fcaee7dd7
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 3f149641716466188ef4e3b7ec51d409dcf33bfb5258ae7787e5ce0594a34306fa051817f88a307b611149390721bff41cfd5732515b8f86e51e96481eed7d41
         | 
| 7 | 
            +
              data.tar.gz: feb32457cce07efc5d3e2b0d0b64496049499b5370d33a3aa1ee5f62333f2058c1b5913ee1625e981e50cde7810a5d070258ed8ca9345a0dc8bb800d9ff3ed94
         | 
    
        data/Gemfile.lock
    CHANGED
    
    
| @@ -3,9 +3,9 @@ use std::fs::File; | |
| 3 3 |  | 
| 4 4 | 
             
            use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
         | 
| 5 5 | 
             
            use csv::{StringRecord, Writer};
         | 
| 6 | 
            -
            use magnus::{RArray, Ruby};
         | 
| 6 | 
            +
            use magnus::{Error, RArray, Ruby};
         | 
| 7 7 |  | 
| 8 | 
            -
            use crate::utils::{FileExtension, magnus_err, missing_header};
         | 
| 8 | 
            +
            use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
         | 
| 9 9 |  | 
| 10 10 | 
             
            pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
         | 
| 11 11 | 
             
                if !csv_path.has_extension(&["csv"]) {
         | 
| @@ -19,6 +19,7 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi | |
| 19 19 | 
             
                let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
         | 
| 20 20 | 
             
                let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
         | 
| 21 21 | 
             
                let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
         | 
| 22 | 
            +
                let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
         | 
| 22 23 |  | 
| 23 24 | 
             
                wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
         | 
| 24 25 |  | 
| @@ -41,13 +42,13 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi | |
| 41 42 | 
             
                    let record = record.iter().enumerate().map(|(i, c)| {
         | 
| 42 43 | 
             
                        let c = c.trim_end();
         | 
| 43 44 | 
             
                        if i == *date {
         | 
| 44 | 
            -
                            let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri,  | 
| 45 | 
            +
                            let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
         | 
| 45 46 | 
             
                            date_value = current;
         | 
| 46 47 | 
             
                            Ok(current.to_string())
         | 
| 47 48 | 
             
                        } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
         | 
| 48 | 
            -
                             | 
| 49 | 
            -
                            let  | 
| 50 | 
            -
                             | 
| 49 | 
            +
                            if c.is_empty() { return Ok(c.to_string()); }
         | 
| 50 | 
            +
                            let column_name = get_column_name(&inverse_header_map, &i);
         | 
| 51 | 
            +
                            process_datetime(ruby, ri, date_value, c, &column_name)
         | 
| 51 52 | 
             
                        } else {
         | 
| 52 53 | 
             
                            Ok(c.to_string())
         | 
| 53 54 | 
             
                        }
         | 
| @@ -62,15 +63,39 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi | |
| 62 63 | 
             
                Ok(())
         | 
| 63 64 | 
             
            }
         | 
| 64 65 |  | 
| 66 | 
            +
            fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
         | 
| 67 | 
            +
                let maybe_correct = correct_datetime(c);
         | 
| 68 | 
            +
                if let Some(correct) = maybe_correct {
         | 
| 69 | 
            +
                    return Ok(correct.to_string());
         | 
| 70 | 
            +
                }
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
         | 
| 73 | 
            +
                let datetime = transform_time_to_datetime(date_value, current_time);
         | 
| 74 | 
            +
                Ok(datetime.to_string())
         | 
| 75 | 
            +
            }
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> String {
         | 
| 78 | 
            +
                let unknown = "Unknown".to_string();
         | 
| 79 | 
            +
                let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
         | 
| 80 | 
            +
                column_name.to_string()
         | 
| 81 | 
            +
            }
         | 
| 82 | 
            +
             | 
| 65 83 | 
             
            fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
         | 
| 66 84 | 
             
                let value = r.get(*request_id).unwrap_or_default();
         | 
| 67 85 | 
             
                exclusions.contains(&value.to_string())
         | 
| 68 86 | 
             
            }
         | 
| 69 87 |  | 
| 70 88 | 
             
            fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
         | 
| 89 | 
            +
                let maybe_correct = correct_datetime(s);
         | 
| 90 | 
            +
                if maybe_correct.is_some() { return maybe_correct; }
         | 
| 91 | 
            +
             | 
| 71 92 | 
             
                NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
         | 
| 72 93 | 
             
            }
         | 
| 73 94 |  | 
| 95 | 
            +
            fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
         | 
| 96 | 
            +
                NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
         | 
| 97 | 
            +
            }
         | 
| 98 | 
            +
             | 
| 74 99 | 
             
            fn string_to_time(s: &str) -> Option<NaiveTime> {
         | 
| 75 100 | 
             
                NaiveTime::parse_from_str(s, "%H:%M").ok()
         | 
| 76 101 | 
             
            }
         | 
| @@ -79,10 +104,6 @@ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime | |
| 79 104 | 
             
                NaiveDateTime::new(t1.date(), t2)
         | 
| 80 105 | 
             
            }
         | 
| 81 106 |  | 
| 82 | 
            -
            fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: usize) -> magnus::Error {
         | 
| 83 | 
            -
                magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
         | 
| 84 | 
            -
            }
         | 
| 85 | 
            -
             | 
| 86 107 | 
             
            fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
         | 
| 87 108 | 
             
                record[0].is_empty()
         | 
| 88 109 | 
             
            }
         | 
| @@ -15,6 +15,10 @@ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error { | |
| 15 15 | 
             
                magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
         | 
| 16 16 | 
             
            }
         | 
| 17 17 |  | 
| 18 | 
            +
            fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
         | 
| 19 | 
            +
                magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
         | 
| 20 | 
            +
            }
         | 
| 21 | 
            +
             | 
| 18 22 | 
             
            pub trait FileExtension {
         | 
| 19 23 | 
             
                fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
         | 
| 20 24 | 
             
            }
         | 
| @@ -6,7 +6,7 @@ use calamine::{Data, open_workbook, Range, Reader, Xls}; | |
| 6 6 | 
             
            use chrono::{NaiveDateTime, Utc};
         | 
| 7 7 | 
             
            use magnus::{RArray, Ruby};
         | 
| 8 8 |  | 
| 9 | 
            -
            use crate::utils::{FileExtension, magnus_err, missing_header};
         | 
| 9 | 
            +
            use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
         | 
| 10 10 |  | 
| 11 11 | 
             
            pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
         | 
| 12 12 | 
             
                if !xls_path.has_extension(&["xls"]) {
         | 
| @@ -44,6 +44,9 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma | |
| 44 44 | 
             
                    if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
         | 
| 45 45 | 
             
                    if skip_empty_rows(r) { continue; }
         | 
| 46 46 | 
             
                    if skip_rows_with_no_request_id(&request_id, r) { continue; }
         | 
| 47 | 
            +
                    if date_value_is_not_present(&date, r) {
         | 
| 48 | 
            +
                        return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
         | 
| 49 | 
            +
                    }
         | 
| 47 50 |  | 
| 48 51 | 
             
                    for (i, c) in r.iter().enumerate() {
         | 
| 49 52 | 
             
                        match *c {
         | 
| @@ -53,7 +56,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma | |
| 53 56 | 
             
                            }
         | 
| 54 57 | 
             
                            Data::Float(ref f) => write!(dest, "{}", f),
         | 
| 55 58 | 
             
                            Data::DateTime(ref d) => {
         | 
| 56 | 
            -
                                let mut current = d.as_datetime(). | 
| 59 | 
            +
                                let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
         | 
| 57 60 | 
             
                                if i == *date {
         | 
| 58 61 | 
             
                                    date_value = current;
         | 
| 59 62 | 
             
                                } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
         | 
| @@ -74,6 +77,10 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma | |
| 74 77 | 
             
                Ok(())
         | 
| 75 78 | 
             
            }
         | 
| 76 79 |  | 
| 80 | 
            +
            fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
         | 
| 81 | 
            +
                r[*date] == Data::Empty
         | 
| 82 | 
            +
            }
         | 
| 83 | 
            +
             | 
| 77 84 | 
             
            fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
         | 
| 78 85 | 
             
                let value = r[*request_id].to_string();
         | 
| 79 86 | 
             
                exclusions.contains(&value.to_string())
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
    
        data/lib/csv_utils/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: patchwork_csv_utils
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.9
         | 
| 5 5 | 
             
            platform: x86_64-linux
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - kingsley.hendrickse
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2024-08- | 
| 11 | 
            +
            date: 2024-08-08 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: Deduplication of CSV files and XLS to CSV conversion.
         | 
| 14 14 | 
             
            email:
         |