RubyGems - patchwork_csv_utils - Versions diffs - 0.1.6-x86_64-darwin → 0.1.7-x86_64-darwin - Mend

patchwork_csv_utils 0.1.6-x86_64-darwin → 0.1.7-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/Gemfile.lock +2 -2
data/README.md +2 -1
data/ext/csv_utils/src/lib.rs +2 -0
data/ext/csv_utils/src/utils/csv.rs +92 -0
data/ext/csv_utils/src/utils/dedup.rs +5 -11
data/ext/csv_utils/src/utils/mod.rs +5 -0
data/ext/csv_utils/src/utils/xls.rs +19 -38
data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
data/lib/csv_utils/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: dc9127c25a2fb4b4a0f99bd3730a4a82975fb249637f4c69ff31e0f5a68288de
-  data.tar.gz: 0b493f77942386540b3fc244fdfcdc853ce8dac8ae41df3fa37c06bff1777d50
+  metadata.gz: 6716d509dcd08fa0114772079eb4ee4c910d9fcba2e239292661f7394ae20579
+  data.tar.gz: 97b20c9b75784359b32bf352156827c62f1d63d29cb00833ce826ae9db1ba08a
 SHA512:
-  metadata.gz: 23a5632d5cfbaf9eca4397bff6199a34cd52c902a07640e490633260c94c122a8b5fc567c09ad3264e03eb31c5e5db2d676b1efe66ec322d7a6a3ddb4e7017f5
-  data.tar.gz: 656ca8052002ab45dadad145b141de4255ec72a36c9c4d975adfd1e9071e7ac3675647837492b0261e6385fc5159af3e02cf9af92aa1551fdd25efc05bd02589
+  metadata.gz: 4cb36630ed56de19331bc54206aaed38cfe1901b68ca4b4822c16d5141d8f0a7e374fa1c508b951dbf9182bcef3ffa5f2e0c8fa116b4ed63b148f6ebbddbf076
+  data.tar.gz: 5cc6c6c1a0edf9ef86b42fab23aaa2937b1a80fcad5518a1f15fd3fe22d0ace8d3a534bac85f46a83a25a1dadc6328d210806f616f561dc9fb5bbe712e2f5a8b

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    patchwork_csv_utils (0.1.6)
+    patchwork_csv_utils (0.1.7)
 GEM
   remote: https://rubygems.org/
@@ -67,4 +67,4 @@ DEPENDENCIES
   rubocop (~> 1.21)
 BUNDLED WITH
-   2.4.10
+   2.4.4

data/README.md CHANGED Viewed

@@ -14,7 +14,8 @@ gem install patchwork_csv_utils
 ```irb
 require 'csv_utils'
 CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
-CsvUtils.to_csv('file1.xls', 'output_file1.csv', 'sheet_name')
+CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
+CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
 ```
 ## Release

data/ext/csv_utils/src/lib.rs CHANGED Viewed

@@ -1,4 +1,5 @@
 use magnus::{define_module, function, prelude::*};
+use crate::utils::csv::transform_csv;
 use crate::utils::dedup::dedup;
 use crate::utils::xls::to_csv;
@@ -9,5 +10,6 @@ fn init() -> Result<(), magnus::Error> {
     let module = define_module("CsvUtils")?;
     module.define_singleton_method("dedup", function!(dedup, 3))?;
     module.define_singleton_method("to_csv", function!(to_csv, 3))?;
+    module.define_singleton_method("transform_csv", function!(transform_csv, 3))?;
     Ok(())
 }

data/ext/csv_utils/src/utils/csv.rs ADDED Viewed

@@ -0,0 +1,92 @@
+use std::collections::HashMap;
+use std::fs::File;
+use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
+use csv::{StringRecord, Writer};
+use magnus::{RArray, Ruby};
+use crate::utils::{FileExtension, magnus_err, missing_header};
+pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
+    if !csv_path.has_extension(&["csv"]) {
+        return Err(magnus::Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
+    }
+    let exclusions = RArray::to_vec(exclusions)?;
+    let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
+    let mut csv: csv::Reader<File> = csv::Reader::from_reader(csv_file);
+    let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
+    let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
+    let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
+    wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
+    let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
+    let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
+    let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
+    let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
+    let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
+    let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
+    for (ri, record) in csv.records().enumerate() {
+        let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
+        if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
+        if has_empty_row_skip(&record) { continue; }
+        if has_empty_first_col_skip_row(&record) { continue; }
+        let mut date_value = Utc::now().naive_utc();
+        let record = record.iter().enumerate().map(|(i, c)| {
+            let c = c.trim_end();
+            if i == *date {
+                let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, i))?;
+                date_value = current;
+                Ok(current.to_string())
+            } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
+                let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, i))?;
+                let datetime = transform_time_to_datetime(date_value, current_time);
+                Ok(datetime.to_string())
+            } else {
+                Ok(c.to_string())
+            }
+        }).collect::<Result<StringRecord, magnus::Error>>()?;
+        let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
+        wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
+    }
+    wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
+    Ok(())
+}
+fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
+    let value = r.get(*request_id).unwrap_or_default();
+    exclusions.contains(&value.to_string())
+}
+fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
+    NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
+}
+fn string_to_time(s: &str) -> Option<NaiveTime> {
+    NaiveTime::parse_from_str(s, "%H:%M").ok()
+}
+fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime {
+    NaiveDateTime::new(t1.date(), t2)
+}
+fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: usize) -> magnus::Error {
+    magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
+}
+fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
+    record[0].is_empty()
+}
+fn has_empty_row_skip(record: &StringRecord) -> bool {
+    record.iter().all(|r| r.is_empty())
+}

data/ext/csv_utils/src/utils/dedup.rs CHANGED Viewed

@@ -28,7 +28,7 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
         return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
     }
-    wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
+    wtr.write_byte_record(previous_headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
     let mut previous_records = vec![];
     for previous_record in previous_csv.records() {
@@ -49,27 +49,21 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
         let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
         if !previous_records.contains(&new_record) {
-            wtr.write_byte_record(new_record.as_byte_record()).unwrap();
+            wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
         }
     }
-    wtr.flush().unwrap();
+    wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
     Ok(())
 }
 fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
-    if previous_record[0].is_empty() {
-        return true;
-    }
-    false
+    previous_record[0].is_empty()
 }
 fn has_empty_row_skip(record: &StringRecord) -> bool {
-    if record.iter().all(|r| r.is_empty()) {
-        return true;
-    }
-    false
+    record.iter().all(|r| r.is_empty())
 }

data/ext/csv_utils/src/utils/mod.rs CHANGED Viewed

@@ -3,9 +3,14 @@ use std::ffi::OsStr;
 use std::path::Path;
 use magnus::Ruby;
+pub mod csv;
 pub mod dedup;
 pub mod xls;
+fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
+    magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
+}
 fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
     magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
 }

data/ext/csv_utils/src/utils/xls.rs CHANGED Viewed

@@ -6,19 +6,15 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
 use chrono::{NaiveDateTime, Utc};
 use magnus::{RArray, Ruby};
-use crate::utils::{FileExtension, magnus_err};
+use crate::utils::{FileExtension, magnus_err, missing_header};
 pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
-    let exclusions = RArray::to_vec(exclusions)?;
-    println!("xls_path: {:?}", xls_path);
-    println!("target_path: {:?}", target_path);
-    println!("exclusions: {:?}", exclusions);
     if !xls_path.has_extension(&["xls"]) {
         return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
     }
+    let exclusions = RArray::to_vec(exclusions)?;
     let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
     let range = workbook.worksheet_range_at(0)
         .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
@@ -34,18 +30,20 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RA
 fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
     let n = range.get_size().1 - 1;
+    let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
+    let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
+    let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
+    let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
+    let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
+    let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
     for (ri, r) in range.rows().enumerate() {
         let mut date_value = Utc::now().naive_utc();
-        if skip_excluded_rows(&header_map, r, &exclusions) { continue; }
+        if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
         if skip_empty_rows(r) { continue; }
-        if skip_rows_with_no_request_id(&header_map, r) { continue; }
-        let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
-        let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
-        let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
-        let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
-        let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
+        if skip_rows_with_no_request_id(&request_id, r) { continue; }
         for (i, c) in r.iter().enumerate() {
             match *c {
@@ -76,34 +74,17 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
     Ok(())
 }
-fn skip_excluded_rows(header_map: &HashMap<String, usize>, r: &[Data], exclusions: &Vec<String>) -> bool {
-    if let Some(request_id) = header_map.get("Request Id") {
-        let value = r[*request_id].to_string();
-        if exclusions.contains(&value) {
-            return true;
-        }
-    }
-    false
+fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
+    let value = r[*request_id].to_string();
+    exclusions.contains(&value.to_string())
 }
 fn skip_empty_rows(r: &[Data]) -> bool {
-    if r.iter().all(|c| c == &Data::Empty) {
-        return true;
-    }
-    false
-}
-fn skip_rows_with_no_request_id(header_map: &HashMap<String, usize>, r: &[Data]) -> bool {
-    if let Some(request_id) = header_map.get("Request Id") {
-        if r[*request_id] == Data::Empty {
-            return true;
-        }
-    }
-    false
+    r.iter().all(|c| c == &Data::Empty)
 }
-fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
-    magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header in xls", header))
+fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
+    r[*request_id] == Data::Empty
 }
 fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {

data/lib/csv_utils/2.7/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/3.0/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/3.1/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/3.2/csv_utils.bundle CHANGED Viewed

Binary file

data/lib/csv_utils/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module CsvUtils
-  VERSION = '0.1.6'
+  VERSION = '0.1.7'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: patchwork_csv_utils
 version: !ruby/object:Gem::Version
-  version: 0.1.6
+  version: 0.1.7
 platform: x86_64-darwin
 authors:
 - kingsley.hendrickse
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-08-06 00:00:00.000000000 Z
+date: 2024-08-07 00:00:00.000000000 Z
 dependencies: []
 description: Deduplication of CSV files and XLS to CSV conversion.
 email:
@@ -28,6 +28,7 @@ files:
 - ext/csv_utils/Cargo.toml
 - ext/csv_utils/extconf.rb
 - ext/csv_utils/src/lib.rs
+- ext/csv_utils/src/utils/csv.rs
 - ext/csv_utils/src/utils/dedup.rs
 - ext/csv_utils/src/utils/mod.rs
 - ext/csv_utils/src/utils/xls.rs