patchwork_csv_utils 0.1.4-arm64-darwin → 0.1.6-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/lib.rs +1 -1
- data/ext/csv_utils/src/utils/dedup.rs +22 -0
- data/ext/csv_utils/src/utils/xls.rs +21 -4
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 29f014ebb0d7fe82e824325e564767deb1cfed92e24c1f487171457214316ac2
         | 
| 4 | 
            +
              data.tar.gz: 608a2edb81ccedebc584762b5c30a2eb24e2c002fae1d28190cf7cf307cc23b3
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: de08a499be3bd3106978ba9b56b81afd3b1bfb634262405aefae74cab36543390d9f825e2bcdf2d25601bbdcea2fec3f4853bff470a39a89a484846477b2dd59
         | 
| 7 | 
            +
              data.tar.gz: 6b25c5917e837b3de71570f0d12c14d92cf3ef588d36ca660611fbae4a6ca17cc2c04af641304f7dc755cb69ea57b5a38c324ca09b9322af62dd8d0900659718
         | 
    
        data/Gemfile.lock
    CHANGED
    
    
    
        data/ext/csv_utils/src/lib.rs
    CHANGED
    
    | @@ -8,6 +8,6 @@ pub mod utils; | |
| 8 8 | 
             
            fn init() -> Result<(), magnus::Error> {
         | 
| 9 9 | 
             
                let module = define_module("CsvUtils")?;
         | 
| 10 10 | 
             
                module.define_singleton_method("dedup", function!(dedup, 3))?;
         | 
| 11 | 
            -
                module.define_singleton_method("to_csv", function!(to_csv,  | 
| 11 | 
            +
                module.define_singleton_method("to_csv", function!(to_csv, 3))?;
         | 
| 12 12 | 
             
                Ok(())
         | 
| 13 13 | 
             
            }
         | 
| @@ -33,12 +33,20 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe | |
| 33 33 | 
             
                let mut previous_records = vec![];
         | 
| 34 34 | 
             
                for previous_record in previous_csv.records() {
         | 
| 35 35 | 
             
                    let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    if has_empty_row_skip(&previous_record) { continue; }
         | 
| 38 | 
            +
                    if has_empty_first_col_skip_row(&previous_record) { continue; }
         | 
| 39 | 
            +
             | 
| 36 40 | 
             
                    let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
         | 
| 37 41 | 
             
                    previous_records.push(previous_record)
         | 
| 38 42 | 
             
                }
         | 
| 39 43 |  | 
| 40 44 | 
             
                for new_record in new_csv.records() {
         | 
| 41 45 | 
             
                    let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    if has_empty_row_skip(&new_record) { continue; }
         | 
| 48 | 
            +
                    if has_empty_first_col_skip_row(&new_record) { continue; }
         | 
| 49 | 
            +
             | 
| 42 50 | 
             
                    let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
         | 
| 43 51 | 
             
                    if !previous_records.contains(&new_record) {
         | 
| 44 52 | 
             
                        wtr.write_byte_record(new_record.as_byte_record()).unwrap();
         | 
| @@ -50,4 +58,18 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe | |
| 50 58 | 
             
                Ok(())
         | 
| 51 59 | 
             
            }
         | 
| 52 60 |  | 
| 61 | 
            +
            fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
         | 
| 62 | 
            +
                if previous_record[0].is_empty() {
         | 
| 63 | 
            +
                    return true;
         | 
| 64 | 
            +
                }
         | 
| 65 | 
            +
                false
         | 
| 66 | 
            +
            }
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            fn has_empty_row_skip(record: &StringRecord) -> bool {
         | 
| 69 | 
            +
                if record.iter().all(|r| r.is_empty()) {
         | 
| 70 | 
            +
                    return true;
         | 
| 71 | 
            +
                }
         | 
| 72 | 
            +
                false
         | 
| 73 | 
            +
            }
         | 
| 74 | 
            +
             | 
| 53 75 |  | 
| @@ -4,11 +4,17 @@ use std::io::{BufWriter, Write}; | |
| 4 4 |  | 
| 5 5 | 
             
            use calamine::{Data, open_workbook, Range, Reader, Xls};
         | 
| 6 6 | 
             
            use chrono::{NaiveDateTime, Utc};
         | 
| 7 | 
            -
            use magnus::Ruby;
         | 
| 7 | 
            +
            use magnus::{RArray, Ruby};
         | 
| 8 8 |  | 
| 9 9 | 
             
            use crate::utils::{FileExtension, magnus_err};
         | 
| 10 10 |  | 
| 11 | 
            -
            pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::error::Result<()> {
         | 
| 11 | 
            +
            pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
         | 
| 12 | 
            +
                let exclusions = RArray::to_vec(exclusions)?;
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                println!("xls_path: {:?}", xls_path);
         | 
| 15 | 
            +
                println!("target_path: {:?}", target_path);
         | 
| 16 | 
            +
                println!("exclusions: {:?}", exclusions);
         | 
| 17 | 
            +
             | 
| 12 18 | 
             
                if !xls_path.has_extension(&["xls"]) {
         | 
| 13 19 | 
             
                    return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
         | 
| 14 20 | 
             
                }
         | 
| @@ -23,14 +29,15 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::err | |
| 23 29 | 
             
                let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
         | 
| 24 30 | 
             
                let mut dest = BufWriter::new(csv_out_file);
         | 
| 25 31 |  | 
| 26 | 
            -
                write_csv(ruby, &mut dest, &range, header_map)
         | 
| 32 | 
            +
                write_csv(ruby, &mut dest, &range, header_map, exclusions)
         | 
| 27 33 | 
             
            }
         | 
| 28 34 |  | 
| 29 | 
            -
            fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>) -> magnus::error::Result<()> {
         | 
| 35 | 
            +
            fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
         | 
| 30 36 | 
             
                let n = range.get_size().1 - 1;
         | 
| 31 37 | 
             
                for (ri, r) in range.rows().enumerate() {
         | 
| 32 38 | 
             
                    let mut date_value = Utc::now().naive_utc();
         | 
| 33 39 |  | 
| 40 | 
            +
                    if skip_excluded_rows(&header_map, r, &exclusions) { continue; }
         | 
| 34 41 | 
             
                    if skip_empty_rows(r) { continue; }
         | 
| 35 42 | 
             
                    if skip_rows_with_no_request_id(&header_map, r) { continue; }
         | 
| 36 43 |  | 
| @@ -69,6 +76,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma | |
| 69 76 | 
             
                Ok(())
         | 
| 70 77 | 
             
            }
         | 
| 71 78 |  | 
| 79 | 
            +
            fn skip_excluded_rows(header_map: &HashMap<String, usize>, r: &[Data], exclusions: &Vec<String>) -> bool {
         | 
| 80 | 
            +
                if let Some(request_id) = header_map.get("Request Id") {
         | 
| 81 | 
            +
                    let value = r[*request_id].to_string();
         | 
| 82 | 
            +
                    if exclusions.contains(&value) {
         | 
| 83 | 
            +
                        return true;
         | 
| 84 | 
            +
                    }
         | 
| 85 | 
            +
                }
         | 
| 86 | 
            +
                false
         | 
| 87 | 
            +
            }
         | 
| 88 | 
            +
             | 
| 72 89 | 
             
            fn skip_empty_rows(r: &[Data]) -> bool {
         | 
| 73 90 | 
             
                if r.iter().all(|c| c == &Data::Empty) {
         | 
| 74 91 | 
             
                    return true;
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
    
        data/lib/csv_utils/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: patchwork_csv_utils
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.6
         | 
| 5 5 | 
             
            platform: arm64-darwin
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - kingsley.hendrickse
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2024-08- | 
| 11 | 
            +
            date: 2024-08-06 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: Deduplication of CSV files and XLS to CSV conversion.
         | 
| 14 14 | 
             
            email:
         |