patchwork_csv_utils 0.1.22-arm64-darwin → 0.1.23-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile.lock +1 -1
 - data/ext/csv_utils/src/lib.rs +2 -2
 - data/ext/csv_utils/src/utils/csv.rs +19 -13
 - data/ext/csv_utils/src/utils/mod.rs +17 -0
 - data/ext/csv_utils/src/utils/xls.rs +31 -4
 - data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
 - data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
 - data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
 - data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
 - data/lib/csv_utils/version.rb +1 -1
 - metadata +1 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: ff496709a23c4cfeba6216aef09b3feb5b32609362b45cdd269e8b1e3d610adc
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 21d6e4dd7e4cb58150b46c8efb771a6f864d19c20b6c57b7fa9d79ecd303b69b
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 3b4d883cf490921a2365f70785eff0673d01cf4be369350b1953d83c7a9a39b81290bcac66d6747484d7ae37d4c4a491ac49b6ce9a644e67cd1c66e735cc74f6
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: e4703bc008ffbbe68fa02fd4eea982e29da38e07c4df0c1d776e71e921b2475bcf4dc91f076907827448e8e106493be7e4cb5fc7e8af5d955ff1856381a48afe
         
     | 
    
        data/Gemfile.lock
    CHANGED
    
    
    
        data/ext/csv_utils/src/lib.rs
    CHANGED
    
    | 
         @@ -9,7 +9,7 @@ pub mod utils; 
     | 
|
| 
       9 
9 
     | 
    
         
             
            fn init() -> Result<(), magnus::Error> {
         
     | 
| 
       10 
10 
     | 
    
         
             
                let module = define_module("CsvUtils")?;
         
     | 
| 
       11 
11 
     | 
    
         
             
                module.define_singleton_method("dedup", function!(dedup, 4))?;
         
     | 
| 
       12 
     | 
    
         
            -
                module.define_singleton_method("to_csv", function!(to_csv,  
     | 
| 
       13 
     | 
    
         
            -
                module.define_singleton_method("transform_csv", function!(transform_csv,  
     | 
| 
      
 12 
     | 
    
         
            +
                module.define_singleton_method("to_csv", function!(to_csv, 8))?;
         
     | 
| 
      
 13 
     | 
    
         
            +
                module.define_singleton_method("transform_csv", function!(transform_csv, 8))?;
         
     | 
| 
       14 
14 
     | 
    
         
             
                Ok(())
         
     | 
| 
       15 
15 
     | 
    
         
             
            }
         
     | 
| 
         @@ -1,10 +1,10 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            use chrono::{ 
     | 
| 
      
 1 
     | 
    
         
            +
            use chrono::{NaiveDateTime, NaiveTime, Utc};
         
     | 
| 
       2 
2 
     | 
    
         
             
            use csv::{Reader, StringRecord, Writer};
         
     | 
| 
       3 
3 
     | 
    
         
             
            use magnus::{Error, RArray, Ruby};
         
     | 
| 
       4 
4 
     | 
    
         
             
            use std::collections::HashMap;
         
     | 
| 
       5 
5 
     | 
    
         
             
            use std::fs::File;
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
            use crate::utils::{check_mandatory_headers, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
         
     | 
| 
      
 7 
     | 
    
         
            +
            use crate::utils::{check_mandatory_headers, correct_datetime, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
            pub fn transform_csv(ruby: &Ruby,
         
     | 
| 
       10 
10 
     | 
    
         
             
                                 csv_path: String,
         
     | 
| 
         @@ -13,11 +13,15 @@ pub fn transform_csv(ruby: &Ruby, 
     | 
|
| 
       13 
13 
     | 
    
         
             
                                 mandatory_headers: RArray,
         
     | 
| 
       14 
14 
     | 
    
         
             
                                 status_exclusions: RArray,
         
     | 
| 
       15 
15 
     | 
    
         
             
                                 expected_trust_name: String,
         
     | 
| 
       16 
     | 
    
         
            -
                                 is_streamed_file: bool 
     | 
| 
      
 16 
     | 
    
         
            +
                                 is_streamed_file: bool,
         
     | 
| 
      
 17 
     | 
    
         
            +
                                 earliest_start_date: Option<String>) -> magnus::error::Result<()> {
         
     | 
| 
       17 
18 
     | 
    
         
             
                if !csv_path.has_extension(&["csv"]) {
         
     | 
| 
       18 
19 
     | 
    
         
             
                    return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
         
     | 
| 
       19 
20 
     | 
    
         
             
                }
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
      
 22 
     | 
    
         
            +
                let start_date = earliest_start_date
         
     | 
| 
      
 23 
     | 
    
         
            +
                    .and_then(|date_str| string_to_datetime(&date_str));
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
       21 
25 
     | 
    
         
             
                let exclusions = RArray::to_vec(exclusions)?;
         
     | 
| 
       22 
26 
     | 
    
         
             
                let status_exclusions = RArray::to_vec(status_exclusions)?;
         
     | 
| 
       23 
27 
     | 
    
         
             
                let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
         
     | 
| 
         @@ -50,6 +54,7 @@ pub fn transform_csv(ruby: &Ruby, 
     | 
|
| 
       50 
54 
     | 
    
         | 
| 
       51 
55 
     | 
    
         
             
                for (ri, record) in mandatory_records.iter().enumerate() {
         
     | 
| 
       52 
56 
     | 
    
         | 
| 
      
 57 
     | 
    
         
            +
                    if skip_rows_before_start_date(&start_date, &record, &date) { continue; }
         
     | 
| 
       53 
58 
     | 
    
         
             
                    if skip_excluded_rows(request_id, &status, &record, &exclusions) { continue; }
         
     | 
| 
       54 
59 
     | 
    
         
             
                    if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
         
     | 
| 
       55 
60 
     | 
    
         
             
                    if has_empty_row_skip(&record) { continue; }
         
     | 
| 
         @@ -155,22 +160,23 @@ fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &StringRec 
     | 
|
| 
       155 
160 
     | 
    
         
             
                exclusions.contains(&value.to_string())
         
     | 
| 
       156 
161 
     | 
    
         
             
            }
         
     | 
| 
       157 
162 
     | 
    
         | 
| 
      
 163 
     | 
    
         
            +
            fn skip_rows_before_start_date(&start_date: &Option<NaiveDateTime>, r: &StringRecord, date_index: &usize) -> bool {
         
     | 
| 
      
 164 
     | 
    
         
            +
                if let Some(start_date) = start_date {
         
     | 
| 
      
 165 
     | 
    
         
            +
                    if let Some(date_str) = r.get(*date_index) {
         
     | 
| 
      
 166 
     | 
    
         
            +
                        if let Some(date) = string_to_datetime(date_str) {
         
     | 
| 
      
 167 
     | 
    
         
            +
                            return date <= start_date;
         
     | 
| 
      
 168 
     | 
    
         
            +
                        }
         
     | 
| 
      
 169 
     | 
    
         
            +
                    }
         
     | 
| 
      
 170 
     | 
    
         
            +
                }
         
     | 
| 
      
 171 
     | 
    
         
            +
                false
         
     | 
| 
      
 172 
     | 
    
         
            +
            }
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
       158 
174 
     | 
    
         
             
            fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
         
     | 
| 
       159 
175 
     | 
    
         
             
                status
         
     | 
| 
       160 
176 
     | 
    
         
             
                    .map(|index| exclusions.contains(&r[*index].to_string()))
         
     | 
| 
       161 
177 
     | 
    
         
             
                    .unwrap_or(false)
         
     | 
| 
       162 
178 
     | 
    
         
             
            }
         
     | 
| 
       163 
179 
     | 
    
         | 
| 
       164 
     | 
    
         
            -
            fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
         
     | 
| 
       165 
     | 
    
         
            -
                let maybe_correct = correct_datetime(s);
         
     | 
| 
       166 
     | 
    
         
            -
                if maybe_correct.is_some() { return maybe_correct; }
         
     | 
| 
       167 
     | 
    
         
            -
             
     | 
| 
       168 
     | 
    
         
            -
                NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
         
     | 
| 
       169 
     | 
    
         
            -
            }
         
     | 
| 
       170 
     | 
    
         
            -
             
     | 
| 
       171 
     | 
    
         
            -
            fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
         
     | 
| 
       172 
     | 
    
         
            -
                NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
         
     | 
| 
       173 
     | 
    
         
            -
            }
         
     | 
| 
       174 
180 
     | 
    
         | 
| 
       175 
181 
     | 
    
         
             
            fn string_to_time(s: &str) -> Option<NaiveTime> {
         
     | 
| 
       176 
182 
     | 
    
         
             
                NaiveTime::parse_from_str(s, "%H:%M").ok()
         
     | 
| 
         @@ -4,6 +4,7 @@ use std::ffi::OsStr; 
     | 
|
| 
       4 
4 
     | 
    
         
             
            use std::path::Path;
         
     | 
| 
       5 
5 
     | 
    
         
             
            use ::csv::{ByteRecord, StringRecord};
         
     | 
| 
       6 
6 
     | 
    
         
             
            use magnus::Ruby;
         
     | 
| 
      
 7 
     | 
    
         
            +
            use chrono::{NaiveDate, NaiveDateTime};
         
     | 
| 
       7 
8 
     | 
    
         | 
| 
       8 
9 
     | 
    
         
             
            pub mod csv;
         
     | 
| 
       9 
10 
     | 
    
         
             
            pub mod dedup;
         
     | 
| 
         @@ -83,3 +84,19 @@ impl<P: AsRef<Path>> FileExtension for P { 
     | 
|
| 
       83 
84 
     | 
    
         
             
                }
         
     | 
| 
       84 
85 
     | 
    
         
             
            }
         
     | 
| 
       85 
86 
     | 
    
         | 
| 
      
 87 
     | 
    
         
            +
            pub fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
         
     | 
| 
      
 88 
     | 
    
         
            +
                let maybe_correct = correct_datetime(s);
         
     | 
| 
      
 89 
     | 
    
         
            +
                if maybe_correct.is_some() { return maybe_correct; }
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                // Try YYYY-MM-DD format
         
     | 
| 
      
 92 
     | 
    
         
            +
                if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
         
     | 
| 
      
 93 
     | 
    
         
            +
                    return date.and_hms_opt(0, 0, 0);
         
     | 
| 
      
 94 
     | 
    
         
            +
                }
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
         
     | 
| 
      
 97 
     | 
    
         
            +
            }
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
            pub fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
         
     | 
| 
      
 100 
     | 
    
         
            +
                NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
         
     | 
| 
      
 101 
     | 
    
         
            +
            }
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
         @@ -6,7 +6,7 @@ use calamine::{open_workbook_auto, Data, DataType, Range, Reader}; 
     | 
|
| 
       6 
6 
     | 
    
         
             
            use chrono::{NaiveDateTime, Timelike, Utc};
         
     | 
| 
       7 
7 
     | 
    
         
             
            use magnus::{RArray, Ruby};
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
            use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
         
     | 
| 
      
 9 
     | 
    
         
            +
            use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
            pub fn to_csv(ruby: &Ruby, xls_path: String,
         
     | 
| 
       12 
12 
     | 
    
         
             
                          target_path: String,
         
     | 
| 
         @@ -14,7 +14,8 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, 
     | 
|
| 
       14 
14 
     | 
    
         
             
                          mandatory_headers: RArray,
         
     | 
| 
       15 
15 
     | 
    
         
             
                          status_exclusions: RArray,
         
     | 
| 
       16 
16 
     | 
    
         
             
                          expected_trust_name: String,
         
     | 
| 
       17 
     | 
    
         
            -
                          is_streamed_file: bool
         
     | 
| 
      
 17 
     | 
    
         
            +
                          is_streamed_file: bool,
         
     | 
| 
      
 18 
     | 
    
         
            +
                          earliest_start_date: Option<String>
         
     | 
| 
       18 
19 
     | 
    
         
             
            ) -> magnus::error::Result<()> {
         
     | 
| 
       19 
20 
     | 
    
         
             
                if !xls_path.has_extension(&["xls","xlsx"]) {
         
     | 
| 
       20 
21 
     | 
    
         
             
                    return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
         
     | 
| 
         @@ -23,6 +24,9 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, 
     | 
|
| 
       23 
24 
     | 
    
         
             
                let exclusions = RArray::to_vec(exclusions)?;
         
     | 
| 
       24 
25 
     | 
    
         
             
                let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
         
     | 
| 
       25 
26 
     | 
    
         
             
                let status_exclusions = RArray::to_vec(status_exclusions)?;
         
     | 
| 
      
 27 
     | 
    
         
            +
                
         
     | 
| 
      
 28 
     | 
    
         
            +
                let start_date = earliest_start_date
         
     | 
| 
      
 29 
     | 
    
         
            +
                    .and_then(|date_str| string_to_datetime(&date_str));
         
     | 
| 
       26 
30 
     | 
    
         | 
| 
       27 
31 
     | 
    
         
             
                let mut workbook = open_workbook_auto(&xls_path)
         
     | 
| 
       28 
32 
     | 
    
         
             
                    .map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
         
     | 
| 
         @@ -41,7 +45,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, 
     | 
|
| 
       41 
45 
     | 
    
         
             
                let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
         
     | 
| 
       42 
46 
     | 
    
         
             
                let mut dest = BufWriter::new(csv_out_file);
         
     | 
| 
       43 
47 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
                write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file)
         
     | 
| 
      
 48 
     | 
    
         
            +
                write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file, start_date)
         
     | 
| 
       45 
49 
     | 
    
         
             
            }
         
     | 
| 
       46 
50 
     | 
    
         | 
| 
       47 
51 
     | 
    
         
             
            fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
         
     | 
| 
         @@ -50,7 +54,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, 
     | 
|
| 
       50 
54 
     | 
    
         
             
                                   headers_list: Vec<String>,
         
     | 
| 
       51 
55 
     | 
    
         
             
                                   status_exclusions: Vec<String>,
         
     | 
| 
       52 
56 
     | 
    
         
             
                                   expected_trust_name: String,
         
     | 
| 
       53 
     | 
    
         
            -
                                   is_streamed_file: bool 
     | 
| 
      
 57 
     | 
    
         
            +
                                   is_streamed_file: bool,
         
     | 
| 
      
 58 
     | 
    
         
            +
                                   start_date: Option<NaiveDateTime>) -> magnus::error::Result<()> {
         
     | 
| 
       54 
59 
     | 
    
         
             
                let n = mandatory_headers.len() - 1;
         
     | 
| 
       55 
60 
     | 
    
         
             
                let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
         
     | 
| 
       56 
61 
     | 
    
         
             
                let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
         
     | 
| 
         @@ -66,6 +71,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, 
     | 
|
| 
       66 
71 
     | 
    
         
             
                for (ri, r) in mandatory_rows.into_iter().enumerate() {
         
     | 
| 
       67 
72 
     | 
    
         
             
                    let mut date_value = Utc::now().naive_utc();
         
     | 
| 
       68 
73 
     | 
    
         | 
| 
      
 74 
     | 
    
         
            +
                    if skip_rows_before_start_date(&start_date, &r, &date) { continue; }
         
     | 
| 
       69 
75 
     | 
    
         
             
                    if skip_excluded_rows(&request_id, &status, &r, &exclusions) { continue; }
         
     | 
| 
       70 
76 
     | 
    
         
             
                    if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
         
     | 
| 
       71 
77 
     | 
    
         
             
                    if skip_empty_rows(&r) { continue; }
         
     | 
| 
         @@ -214,6 +220,27 @@ fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool { 
     | 
|
| 
       214 
220 
     | 
    
         
             
                r[*request_id] == &Data::Empty
         
     | 
| 
       215 
221 
     | 
    
         
             
            }
         
     | 
| 
       216 
222 
     | 
    
         | 
| 
      
 223 
     | 
    
         
            +
            fn skip_rows_before_start_date(start_date: &Option<NaiveDateTime>, r: &Vec<&Data>, date_index: &usize) -> bool {
         
     | 
| 
      
 224 
     | 
    
         
            +
                if let Some(start_date) = start_date {
         
     | 
| 
      
 225 
     | 
    
         
            +
                    if let Some(date_data) = r.get(*date_index) {
         
     | 
| 
      
 226 
     | 
    
         
            +
                        match date_data {
         
     | 
| 
      
 227 
     | 
    
         
            +
                            Data::DateTime(d) => {
         
     | 
| 
      
 228 
     | 
    
         
            +
                                if let Some(date) = d.as_datetime() {
         
     | 
| 
      
 229 
     | 
    
         
            +
                                    return date <= *start_date;
         
     | 
| 
      
 230 
     | 
    
         
            +
                                }
         
     | 
| 
      
 231 
     | 
    
         
            +
                            }
         
     | 
| 
      
 232 
     | 
    
         
            +
                            Data::DateTimeIso(s) => {
         
     | 
| 
      
 233 
     | 
    
         
            +
                                if let Some(date) = string_to_datetime(s) {
         
     | 
| 
      
 234 
     | 
    
         
            +
                                    return date <= *start_date;
         
     | 
| 
      
 235 
     | 
    
         
            +
                                }
         
     | 
| 
      
 236 
     | 
    
         
            +
                            }
         
     | 
| 
      
 237 
     | 
    
         
            +
                            _ => {}
         
     | 
| 
      
 238 
     | 
    
         
            +
                        }
         
     | 
| 
      
 239 
     | 
    
         
            +
                    }
         
     | 
| 
      
 240 
     | 
    
         
            +
                }
         
     | 
| 
      
 241 
     | 
    
         
            +
                false
         
     | 
| 
      
 242 
     | 
    
         
            +
            }
         
     | 
| 
      
 243 
     | 
    
         
            +
             
     | 
| 
       217 
244 
     | 
    
         
             
            fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
         
     | 
| 
       218 
245 
     | 
    
         
             
                NaiveDateTime::new(t1.date(), t2.time())
         
     | 
| 
       219 
246 
     | 
    
         
             
            }
         
     | 
| 
         Binary file 
     | 
| 
         Binary file 
     | 
| 
         Binary file 
     | 
| 
         Binary file 
     | 
    
        data/lib/csv_utils/version.rb
    CHANGED