RubyGems - patchwork_csv_utils - Versions diffs - 0.1.21-arm64-darwin → 0.1.23-arm64-darwin - Mend

patchwork_csv_utils 0.1.21-arm64-darwin → 0.1.23-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.ruby-version +1 -0
data/Gemfile.lock +2 -1
data/ext/csv_utils/src/lib.rs +2 -2
data/ext/csv_utils/src/utils/csv.rs +33 -17
data/ext/csv_utils/src/utils/mod.rs +17 -0
data/ext/csv_utils/src/utils/xls.rs +44 -7
data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
data/lib/csv_utils/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: eb9ba1cfb3930c7586b62182de0c6c9a2f1207d186bf1a7a2c578a8cfc294bf7
-  data.tar.gz: aec2801daf163c9de70c96d77aaabf62feaa70483785afed01b12d8546fd069b
+  metadata.gz: ff496709a23c4cfeba6216aef09b3feb5b32609362b45cdd269e8b1e3d610adc
+  data.tar.gz: 21d6e4dd7e4cb58150b46c8efb771a6f864d19c20b6c57b7fa9d79ecd303b69b
 SHA512:
-  metadata.gz: e79a8ae7df6fb41bfb309d89faf850a9122b60c0754ef58736fabc684964eda9e7b7b2a6a1e6948fd107ede13275ba636d7b4e56212425da35582c1d16a7befc
-  data.tar.gz: e4f7e61799ef6dc6153acb32dd4ecc589671a96e40c95c71e92105ad199a9a6b75f05631a10b6fb13389937c665513ef1d96e6edbf1833bbff3defe174896a68
+  metadata.gz: 3b4d883cf490921a2365f70785eff0673d01cf4be369350b1953d83c7a9a39b81290bcac66d6747484d7ae37d4c4a491ac49b6ce9a644e67cd1c66e735cc74f6
+  data.tar.gz: e4703bc008ffbbe68fa02fd4eea982e29da38e07c4df0c1d776e71e921b2475bcf4dc91f076907827448e8e106493be7e4cb5fc7e8af5d955ff1856381a48afe

data/.ruby-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ ruby-3.0.7

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    patchwork_csv_utils (0.1.21)
+    patchwork_csv_utils (0.1.23)
 GEM
   remote: https://rubygems.org/
@@ -56,6 +56,7 @@ GEM
 PLATFORMS
   arm64-darwin-22
   arm64-darwin-23
+  arm64-darwin-24
   x86_64-linux
 DEPENDENCIES

data/ext/csv_utils/src/lib.rs CHANGED Viewed

@@ -9,7 +9,7 @@ pub mod utils;
 fn init() -> Result<(), magnus::Error> {
     let module = define_module("CsvUtils")?;
     module.define_singleton_method("dedup", function!(dedup, 4))?;
-    module.define_singleton_method("to_csv", function!(to_csv, 7))?;
-    module.define_singleton_method("transform_csv", function!(transform_csv, 7))?;
+    module.define_singleton_method("to_csv", function!(to_csv, 8))?;
+    module.define_singleton_method("transform_csv", function!(transform_csv, 8))?;
     Ok(())
 }

data/ext/csv_utils/src/utils/csv.rs CHANGED Viewed

@@ -1,21 +1,27 @@
-use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
+use chrono::{NaiveDateTime, NaiveTime, Utc};
 use csv::{Reader, StringRecord, Writer};
 use magnus::{Error, RArray, Ruby};
 use std::collections::HashMap;
 use std::fs::File;
-use crate::utils::{check_mandatory_headers, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
+use crate::utils::{check_mandatory_headers, correct_datetime, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
-pub fn transform_csv(ruby: &Ruby, csv_path: String,
-                     target_path: String, exclusions: RArray,
+pub fn transform_csv(ruby: &Ruby,
+                     csv_path: String,
+                     target_path: String,
+                     exclusions: RArray,
                      mandatory_headers: RArray,
                      status_exclusions: RArray,
                      expected_trust_name: String,
-                     is_streamed_file: bool) -> magnus::error::Result<()> {
+                     is_streamed_file: bool,
+                     earliest_start_date: Option<String>) -> magnus::error::Result<()> {
     if !csv_path.has_extension(&["csv"]) {
         return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
     }
+    let start_date = earliest_start_date
+        .and_then(|date_str| string_to_datetime(&date_str));
     let exclusions = RArray::to_vec(exclusions)?;
     let status_exclusions = RArray::to_vec(status_exclusions)?;
     let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
@@ -48,7 +54,8 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
     for (ri, record) in mandatory_records.iter().enumerate() {
-        if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
+        if skip_rows_before_start_date(&start_date, &record, &date) { continue; }
+        if skip_excluded_rows(request_id, &status, &record, &exclusions) { continue; }
         if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
         if has_empty_row_skip(&record) { continue; }
         if has_empty_first_col_skip_row(&record) { continue; }
@@ -140,27 +147,36 @@ fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> St
     column_name.to_string()
 }
-fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
+fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
+    if let Some(status_index) = status {
+        if let Some(status) = r.get(**status_index) {
+            if status.eq("Recalled") {
+                return false
+            }
+        }
+    }
     let value = r.get(*request_id).unwrap_or_default();
     exclusions.contains(&value.to_string())
 }
+fn skip_rows_before_start_date(&start_date: &Option<NaiveDateTime>, r: &StringRecord, date_index: &usize) -> bool {
+    if let Some(start_date) = start_date {
+        if let Some(date_str) = r.get(*date_index) {
+            if let Some(date) = string_to_datetime(date_str) {
+                return date <= start_date;
+            }
+        }
+    }
+    false
+}
 fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
     status
         .map(|index| exclusions.contains(&r[*index].to_string()))
         .unwrap_or(false)
 }
-fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
-    let maybe_correct = correct_datetime(s);
-    if maybe_correct.is_some() { return maybe_correct; }
-    NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
-}
-fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
-    NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
-}
 fn string_to_time(s: &str) -> Option<NaiveTime> {
     NaiveTime::parse_from_str(s, "%H:%M").ok()

data/ext/csv_utils/src/utils/mod.rs CHANGED Viewed

@@ -4,6 +4,7 @@ use std::ffi::OsStr;
 use std::path::Path;
 use ::csv::{ByteRecord, StringRecord};
 use magnus::Ruby;
+use chrono::{NaiveDate, NaiveDateTime};
 pub mod csv;
 pub mod dedup;
@@ -83,3 +84,19 @@ impl<P: AsRef<Path>> FileExtension for P {
     }
 }
+pub fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
+    let maybe_correct = correct_datetime(s);
+    if maybe_correct.is_some() { return maybe_correct; }
+    // Try YYYY-MM-DD format
+    if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
+        return date.and_hms_opt(0, 0, 0);
+    }
+    NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
+}
+pub fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
+    NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
+}

data/ext/csv_utils/src/utils/xls.rs CHANGED Viewed

@@ -2,11 +2,11 @@ use std::collections::HashMap;
 use std::fs::File;
 use std::io::{BufWriter, Write};
-use calamine::{open_workbook_auto, Data, Range, Reader};
+use calamine::{open_workbook_auto, Data, DataType, Range, Reader};
 use chrono::{NaiveDateTime, Timelike, Utc};
 use magnus::{RArray, Ruby};
-use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
+use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
 pub fn to_csv(ruby: &Ruby, xls_path: String,
               target_path: String,
@@ -14,7 +14,8 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
               mandatory_headers: RArray,
               status_exclusions: RArray,
               expected_trust_name: String,
-              is_streamed_file: bool
+              is_streamed_file: bool,
+              earliest_start_date: Option<String>
 ) -> magnus::error::Result<()> {
     if !xls_path.has_extension(&["xls","xlsx"]) {
         return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
@@ -23,6 +24,9 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
     let exclusions = RArray::to_vec(exclusions)?;
     let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
     let status_exclusions = RArray::to_vec(status_exclusions)?;
+    let start_date = earliest_start_date
+        .and_then(|date_str| string_to_datetime(&date_str));
     let mut workbook = open_workbook_auto(&xls_path)
         .map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
@@ -41,7 +45,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
     let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
     let mut dest = BufWriter::new(csv_out_file);
-    write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file)
+    write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file, start_date)
 }
 fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
@@ -50,7 +54,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
                        headers_list: Vec<String>,
                        status_exclusions: Vec<String>,
                        expected_trust_name: String,
-                       is_streamed_file: bool) -> magnus::error::Result<()> {
+                       is_streamed_file: bool,
+                       start_date: Option<NaiveDateTime>) -> magnus::error::Result<()> {
     let n = mandatory_headers.len() - 1;
     let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
     let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
@@ -66,7 +71,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
     for (ri, r) in mandatory_rows.into_iter().enumerate() {
         let mut date_value = Utc::now().naive_utc();
-        if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
+        if skip_rows_before_start_date(&start_date, &r, &date) { continue; }
+        if skip_excluded_rows(&request_id, &status, &r, &exclusions) { continue; }
         if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
         if skip_empty_rows(&r) { continue; }
         if skip_rows_with_no_request_id(&request_id, &r) { continue; }
@@ -185,7 +191,17 @@ fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
     r[*date] == &Data::Empty
 }
-fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
+fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
+    if let Some(status_index) = status {
+        if let Some(status) = r.get(**status_index) {
+            if let Some(status_str) = status.as_string() {
+                if status_str.eq("Recalled") {
+                    return false
+                }
+            }
+        }
+    }
     let value = r[*request_id].to_string();
     exclusions.contains(&value.to_string())
 }
@@ -204,6 +220,27 @@ fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
     r[*request_id] == &Data::Empty
 }
+fn skip_rows_before_start_date(start_date: &Option<NaiveDateTime>, r: &Vec<&Data>, date_index: &usize) -> bool {
+    if let Some(start_date) = start_date {
+        if let Some(date_data) = r.get(*date_index) {
+            match date_data {
+                Data::DateTime(d) => {
+                    if let Some(date) = d.as_datetime() {
+                        return date <= *start_date;
+                    }
+                }
+                Data::DateTimeIso(s) => {
+                    if let Some(date) = string_to_datetime(s) {
+                        return date <= *start_date;
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+    false
+}
 fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
     NaiveDateTime::new(t1.date(), t2.time())
 }

data/lib/csv_utils/2.7/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/3.0/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/3.1/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/3.2/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module CsvUtils
-  VERSION = '0.1.21'
+  VERSION = '0.1.23'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: patchwork_csv_utils
 version: !ruby/object:Gem::Version
-  version: 0.1.21
+  version: 0.1.23
 platform: arm64-darwin
 authors:
 - kingsley.hendrickse
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-04-14 00:00:00.000000000 Z
+date: 2025-07-08 00:00:00.000000000 Z
 dependencies: []
 description: Deduplication of CSV files and XLS to CSV conversion.
 email:
@@ -19,6 +19,7 @@ extra_rdoc_files: []
 files:
 - ".rspec"
 - ".rubocop.yml"
+- ".ruby-version"
 - Cargo.lock
 - Cargo.toml
 - Gemfile