patchwork_csv_utils 0.1.5-arm64-darwin → 0.1.7-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ccdea0ba6abfc936797f007183000983de9b15b9c195dfcdcfcd6ed3e63b5119
4
- data.tar.gz: af20ff750dbbf82f86a3aaad20242770e1327db98218dd5e93381f3627f744bb
3
+ metadata.gz: f0a84b0c5c4d9eee5d49fdb63c5f1381967eda72a68a16b35c602fa0143cd0e2
4
+ data.tar.gz: d1e0f1077c2abbda112ddcee7dae55e198666566097e95bd96aa6665c363eec7
5
5
  SHA512:
6
- metadata.gz: d134cfdfafab51d74fef17480c0aad78c1033d0fc3ae443cd57ad5ce838a76fdf44a05970c295177eb6b9b26f556c2d37a8a2b04f05d67fec194f22991da8628
7
- data.tar.gz: dc188247d6c270c0640f3f3b88173b4e3ff97d23c4b077749219fcf33c0394616b1b95b3d22e9fd5126fb2741f72bea316c1d0f09a7798484e775663661c0daa
6
+ metadata.gz: 011f0adde455db75eeb27492bbbb8617f2a406ef56278c54024fb3c874a7038935e11cd70176e2706c1e4700f9a3d6dc1cfde1abc076bbe2561c3c886f1140ec
7
+ data.tar.gz: a5d81faa6efd2b56941003638bd2c5c0829670179ee8a76a475724b6db688c6430bea189397ebd7c53a7b790f8f8ae5a38b4049c2aef7bb7a4c62dcd152021c3
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.5)
4
+ patchwork_csv_utils (0.1.7)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -67,4 +67,4 @@ DEPENDENCIES
67
67
  rubocop (~> 1.21)
68
68
 
69
69
  BUNDLED WITH
70
- 2.4.10
70
+ 2.4.4
data/README.md CHANGED
@@ -14,7 +14,8 @@ gem install patchwork_csv_utils
14
14
  ```irb
15
15
  require 'csv_utils'
16
16
  CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
17
- CsvUtils.to_csv('file1.xls', 'output_file1.csv', 'sheet_name')
17
+ CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
+ CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
19
  ```
19
20
 
20
21
  ## Release
@@ -1,4 +1,5 @@
1
1
  use magnus::{define_module, function, prelude::*};
2
+ use crate::utils::csv::transform_csv;
2
3
  use crate::utils::dedup::dedup;
3
4
  use crate::utils::xls::to_csv;
4
5
 
@@ -8,6 +9,7 @@ pub mod utils;
8
9
  fn init() -> Result<(), magnus::Error> {
9
10
  let module = define_module("CsvUtils")?;
10
11
  module.define_singleton_method("dedup", function!(dedup, 3))?;
11
- module.define_singleton_method("to_csv", function!(to_csv, 2))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 3))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 3))?;
12
14
  Ok(())
13
15
  }
@@ -0,0 +1,92 @@
1
+ use std::collections::HashMap;
2
+ use std::fs::File;
3
+
4
+ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{RArray, Ruby};
7
+
8
+ use crate::utils::{FileExtension, magnus_err, missing_header};
9
+
10
+ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
11
+ if !csv_path.has_extension(&["csv"]) {
12
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
13
+ }
14
+
15
+ let exclusions = RArray::to_vec(exclusions)?;
16
+
17
+ let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
18
+ let mut csv: csv::Reader<File> = csv::Reader::from_reader(csv_file);
19
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
20
+ let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
21
+ let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
22
+
23
+ wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
24
+
25
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
26
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
27
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
28
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
29
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
30
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
31
+
32
+ for (ri, record) in csv.records().enumerate() {
33
+ let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
34
+
35
+ if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
36
+ if has_empty_row_skip(&record) { continue; }
37
+ if has_empty_first_col_skip_row(&record) { continue; }
38
+
39
+ let mut date_value = Utc::now().naive_utc();
40
+
41
+ let record = record.iter().enumerate().map(|(i, c)| {
42
+ let c = c.trim_end();
43
+ if i == *date {
44
+ let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, i))?;
45
+ date_value = current;
46
+ Ok(current.to_string())
47
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
48
+ let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, i))?;
49
+ let datetime = transform_time_to_datetime(date_value, current_time);
50
+ Ok(datetime.to_string())
51
+ } else {
52
+ Ok(c.to_string())
53
+ }
54
+ }).collect::<Result<StringRecord, magnus::Error>>()?;
55
+
56
+ let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
57
+ wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
58
+ }
59
+
60
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
61
+
62
+ Ok(())
63
+ }
64
+
65
+ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
66
+ let value = r.get(*request_id).unwrap_or_default();
67
+ exclusions.contains(&value.to_string())
68
+ }
69
+
70
+ fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
71
+ NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
72
+ }
73
+
74
+ fn string_to_time(s: &str) -> Option<NaiveTime> {
75
+ NaiveTime::parse_from_str(s, "%H:%M").ok()
76
+ }
77
+
78
+ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime {
79
+ NaiveDateTime::new(t1.date(), t2)
80
+ }
81
+
82
+ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: usize) -> magnus::Error {
83
+ magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
84
+ }
85
+
86
+ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
87
+ record[0].is_empty()
88
+ }
89
+
90
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
91
+ record.iter().all(|r| r.is_empty())
92
+ }
@@ -28,7 +28,7 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
28
28
  return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
29
29
  }
30
30
 
31
- wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
32
32
 
33
33
  let mut previous_records = vec![];
34
34
  for previous_record in previous_csv.records() {
@@ -49,27 +49,21 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
49
49
 
50
50
  let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
51
51
  if !previous_records.contains(&new_record) {
52
- wtr.write_byte_record(new_record.as_byte_record()).unwrap();
52
+ wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
53
53
  }
54
54
  }
55
55
 
56
- wtr.flush().unwrap();
56
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
57
57
 
58
58
  Ok(())
59
59
  }
60
60
 
61
61
  fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
62
- if previous_record[0].is_empty() {
63
- return true;
64
- }
65
- false
62
+ previous_record[0].is_empty()
66
63
  }
67
64
 
68
65
  fn has_empty_row_skip(record: &StringRecord) -> bool {
69
- if record.iter().all(|r| r.is_empty()) {
70
- return true;
71
- }
72
- false
66
+ record.iter().all(|r| r.is_empty())
73
67
  }
74
68
 
75
69
 
@@ -3,9 +3,14 @@ use std::ffi::OsStr;
3
3
  use std::path::Path;
4
4
  use magnus::Ruby;
5
5
 
6
+ pub mod csv;
6
7
  pub mod dedup;
7
8
  pub mod xls;
8
9
 
10
+ fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
11
+ magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
12
+ }
13
+
9
14
  fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
10
15
  magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
11
16
  }
@@ -4,15 +4,17 @@ use std::io::{BufWriter, Write};
4
4
 
5
5
  use calamine::{Data, open_workbook, Range, Reader, Xls};
6
6
  use chrono::{NaiveDateTime, Utc};
7
- use magnus::Ruby;
7
+ use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{FileExtension, magnus_err};
9
+ use crate::utils::{FileExtension, magnus_err, missing_header};
10
10
 
11
- pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::error::Result<()> {
11
+ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
12
12
  if !xls_path.has_extension(&["xls"]) {
13
13
  return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
14
14
  }
15
15
 
16
+ let exclusions = RArray::to_vec(exclusions)?;
17
+
16
18
  let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
17
19
  let range = workbook.worksheet_range_at(0)
18
20
  .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
@@ -23,22 +25,25 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::err
23
25
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
24
26
  let mut dest = BufWriter::new(csv_out_file);
25
27
 
26
- write_csv(ruby, &mut dest, &range, header_map)
28
+ write_csv(ruby, &mut dest, &range, header_map, exclusions)
27
29
  }
28
30
 
29
- fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>) -> magnus::error::Result<()> {
31
+ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
30
32
  let n = range.get_size().1 - 1;
33
+
34
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
35
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
36
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
37
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
38
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
39
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
40
+
31
41
  for (ri, r) in range.rows().enumerate() {
32
42
  let mut date_value = Utc::now().naive_utc();
33
43
 
44
+ if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
34
45
  if skip_empty_rows(r) { continue; }
35
- if skip_rows_with_no_request_id(&header_map, r) { continue; }
36
-
37
- let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
38
- let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
39
- let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
40
- let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
41
- let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
46
+ if skip_rows_with_no_request_id(&request_id, r) { continue; }
42
47
 
43
48
  for (i, c) in r.iter().enumerate() {
44
49
  match *c {
@@ -69,24 +74,17 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
69
74
  Ok(())
70
75
  }
71
76
 
72
- fn skip_empty_rows(r: &[Data]) -> bool {
73
- if r.iter().all(|c| c == &Data::Empty) {
74
- return true;
75
- }
76
- false
77
+ fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
78
+ let value = r[*request_id].to_string();
79
+ exclusions.contains(&value.to_string())
77
80
  }
78
81
 
79
- fn skip_rows_with_no_request_id(header_map: &HashMap<String, usize>, r: &[Data]) -> bool {
80
- if let Some(request_id) = header_map.get("Request Id") {
81
- if r[*request_id] == Data::Empty {
82
- return true;
83
- }
84
- }
85
- false
82
+ fn skip_empty_rows(r: &[Data]) -> bool {
83
+ r.iter().all(|c| c == &Data::Empty)
86
84
  }
87
85
 
88
- fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
89
- magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header in xls", header))
86
+ fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
87
+ r[*request_id] == Data::Empty
90
88
  }
91
89
 
92
90
  fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.5'
4
+ VERSION = '0.1.7'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-01 00:00:00.000000000 Z
11
+ date: 2024-08-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email:
@@ -28,6 +28,7 @@ files:
28
28
  - ext/csv_utils/Cargo.toml
29
29
  - ext/csv_utils/extconf.rb
30
30
  - ext/csv_utils/src/lib.rs
31
+ - ext/csv_utils/src/utils/csv.rs
31
32
  - ext/csv_utils/src/utils/dedup.rs
32
33
  - ext/csv_utils/src/utils/mod.rs
33
34
  - ext/csv_utils/src/utils/xls.rs