patchwork_csv_utils 0.1.6-arm64-darwin → 0.1.8-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 29f014ebb0d7fe82e824325e564767deb1cfed92e24c1f487171457214316ac2
4
- data.tar.gz: 608a2edb81ccedebc584762b5c30a2eb24e2c002fae1d28190cf7cf307cc23b3
3
+ metadata.gz: cee57c41f3e282435e1d7de609e9674dfd8f12b3b58a800ec5d5208c4563956f
4
+ data.tar.gz: 1c14864e23f63a738cb88f077d272a15d6efdba60ee0ed086cd7f3e99cd6ccc1
5
5
  SHA512:
6
- metadata.gz: de08a499be3bd3106978ba9b56b81afd3b1bfb634262405aefae74cab36543390d9f825e2bcdf2d25601bbdcea2fec3f4853bff470a39a89a484846477b2dd59
7
- data.tar.gz: 6b25c5917e837b3de71570f0d12c14d92cf3ef588d36ca660611fbae4a6ca17cc2c04af641304f7dc755cb69ea57b5a38c324ca09b9322af62dd8d0900659718
6
+ metadata.gz: d54494b5517a1624c2904ca9ff74a1d66ae64cefb3cd3eabd2452c0daa034e69af6292b307629ad7075974e5cb59aff44a2843d5d7f2c18e0c08e604481d1d32
7
+ data.tar.gz: 25837083730e79c5f161ef4a5cef157659662f639d5c3c6cd0601cbd53fdabf8a8c303458927071163a2ff7e8b15ecdf07d04d01aefebf797f83e96d43a0d2b3
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.6)
4
+ patchwork_csv_utils (0.1.8)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -14,7 +14,8 @@ gem install patchwork_csv_utils
14
14
  ```irb
15
15
  require 'csv_utils'
16
16
  CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
17
- CsvUtils.to_csv('file1.xls', 'output_file1.csv', 'sheet_name')
17
+ CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
+ CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
19
  ```
19
20
 
20
21
  ## Release
@@ -1,4 +1,5 @@
1
1
  use magnus::{define_module, function, prelude::*};
2
+ use crate::utils::csv::transform_csv;
2
3
  use crate::utils::dedup::dedup;
3
4
  use crate::utils::xls::to_csv;
4
5
 
@@ -9,5 +10,6 @@ fn init() -> Result<(), magnus::Error> {
9
10
  let module = define_module("CsvUtils")?;
10
11
  module.define_singleton_method("dedup", function!(dedup, 3))?;
11
12
  module.define_singleton_method("to_csv", function!(to_csv, 3))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 3))?;
12
14
  Ok(())
13
15
  }
@@ -0,0 +1,96 @@
1
+ use std::collections::HashMap;
2
+ use std::fs::File;
3
+
4
+ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{RArray, Ruby};
7
+
8
+ use crate::utils::{FileExtension, magnus_err, missing_header};
9
+
10
+ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
11
+ if !csv_path.has_extension(&["csv"]) {
12
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
13
+ }
14
+
15
+ let exclusions = RArray::to_vec(exclusions)?;
16
+
17
+ let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
18
+ let mut csv: csv::Reader<File> = csv::Reader::from_reader(csv_file);
19
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
20
+ let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
21
+ let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
22
+ let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
23
+
24
+ wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
25
+
26
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
27
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
28
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
29
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
30
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
31
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
32
+
33
+ for (ri, record) in csv.records().enumerate() {
34
+ let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
35
+
36
+ if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
37
+ if has_empty_row_skip(&record) { continue; }
38
+ if has_empty_first_col_skip_row(&record) { continue; }
39
+
40
+ let mut date_value = Utc::now().naive_utc();
41
+
42
+ let record = record.iter().enumerate().map(|(i, c)| {
43
+ let c = c.trim_end();
44
+ if i == *date {
45
+ let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
46
+ date_value = current;
47
+ Ok(current.to_string())
48
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
49
+ if c.is_empty() { return Ok(c.to_string()); }
50
+ let unknown = "Unknown".to_string();
51
+ let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
52
+ let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
53
+ let datetime = transform_time_to_datetime(date_value, current_time);
54
+ Ok(datetime.to_string())
55
+ } else {
56
+ Ok(c.to_string())
57
+ }
58
+ }).collect::<Result<StringRecord, magnus::Error>>()?;
59
+
60
+ let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
61
+ wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
62
+ }
63
+
64
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
65
+
66
+ Ok(())
67
+ }
68
+
69
+ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
70
+ let value = r.get(*request_id).unwrap_or_default();
71
+ exclusions.contains(&value.to_string())
72
+ }
73
+
74
+ fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
75
+ NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
76
+ }
77
+
78
+ fn string_to_time(s: &str) -> Option<NaiveTime> {
79
+ NaiveTime::parse_from_str(s, "%H:%M").ok()
80
+ }
81
+
82
+ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime {
83
+ NaiveDateTime::new(t1.date(), t2)
84
+ }
85
+
86
+ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
87
+ magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
88
+ }
89
+
90
+ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
91
+ record[0].is_empty()
92
+ }
93
+
94
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
95
+ record.iter().all(|r| r.is_empty())
96
+ }
@@ -28,7 +28,7 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
28
28
  return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
29
29
  }
30
30
 
31
- wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
32
32
 
33
33
  let mut previous_records = vec![];
34
34
  for previous_record in previous_csv.records() {
@@ -49,27 +49,21 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
49
49
 
50
50
  let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
51
51
  if !previous_records.contains(&new_record) {
52
- wtr.write_byte_record(new_record.as_byte_record()).unwrap();
52
+ wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
53
53
  }
54
54
  }
55
55
 
56
- wtr.flush().unwrap();
56
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
57
57
 
58
58
  Ok(())
59
59
  }
60
60
 
61
61
  fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
62
- if previous_record[0].is_empty() {
63
- return true;
64
- }
65
- false
62
+ previous_record[0].is_empty()
66
63
  }
67
64
 
68
65
  fn has_empty_row_skip(record: &StringRecord) -> bool {
69
- if record.iter().all(|r| r.is_empty()) {
70
- return true;
71
- }
72
- false
66
+ record.iter().all(|r| r.is_empty())
73
67
  }
74
68
 
75
69
 
@@ -3,9 +3,14 @@ use std::ffi::OsStr;
3
3
  use std::path::Path;
4
4
  use magnus::Ruby;
5
5
 
6
+ pub mod csv;
6
7
  pub mod dedup;
7
8
  pub mod xls;
8
9
 
10
+ fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
11
+ magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
12
+ }
13
+
9
14
  fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
10
15
  magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
11
16
  }
@@ -6,19 +6,15 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
6
6
  use chrono::{NaiveDateTime, Utc};
7
7
  use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{FileExtension, magnus_err};
9
+ use crate::utils::{FileExtension, magnus_err, missing_header};
10
10
 
11
11
  pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
12
- let exclusions = RArray::to_vec(exclusions)?;
13
-
14
- println!("xls_path: {:?}", xls_path);
15
- println!("target_path: {:?}", target_path);
16
- println!("exclusions: {:?}", exclusions);
17
-
18
12
  if !xls_path.has_extension(&["xls"]) {
19
13
  return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
20
14
  }
21
15
 
16
+ let exclusions = RArray::to_vec(exclusions)?;
17
+
22
18
  let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
23
19
  let range = workbook.worksheet_range_at(0)
24
20
  .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
@@ -34,18 +30,23 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RA
34
30
 
35
31
  fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
36
32
  let n = range.get_size().1 - 1;
33
+
34
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
35
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
36
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
37
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
38
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
39
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
40
+
37
41
  for (ri, r) in range.rows().enumerate() {
38
42
  let mut date_value = Utc::now().naive_utc();
39
43
 
40
- if skip_excluded_rows(&header_map, r, &exclusions) { continue; }
44
+ if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
41
45
  if skip_empty_rows(r) { continue; }
42
- if skip_rows_with_no_request_id(&header_map, r) { continue; }
43
-
44
- let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
45
- let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
46
- let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
47
- let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
48
- let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
46
+ if skip_rows_with_no_request_id(&request_id, r) { continue; }
47
+ if date_value_is_not_present(&date, r) {
48
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
49
+ }
49
50
 
50
51
  for (i, c) in r.iter().enumerate() {
51
52
  match *c {
@@ -55,7 +56,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
55
56
  }
56
57
  Data::Float(ref f) => write!(dest, "{}", f),
57
58
  Data::DateTime(ref d) => {
58
- let mut current = d.as_datetime().unwrap_or_default();
59
+ let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
59
60
  if i == *date {
60
61
  date_value = current;
61
62
  } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
@@ -76,40 +77,31 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
76
77
  Ok(())
77
78
  }
78
79
 
79
- fn skip_excluded_rows(header_map: &HashMap<String, usize>, r: &[Data], exclusions: &Vec<String>) -> bool {
80
- if let Some(request_id) = header_map.get("Request Id") {
81
- let value = r[*request_id].to_string();
82
- if exclusions.contains(&value) {
83
- return true;
84
- }
85
- }
86
- false
80
+ fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
81
+ r[*date] == Data::Empty
87
82
  }
88
83
 
89
- fn skip_empty_rows(r: &[Data]) -> bool {
90
- if r.iter().all(|c| c == &Data::Empty) {
91
- return true;
92
- }
93
- false
84
+ fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
85
+ let value = r[*request_id].to_string();
86
+ exclusions.contains(&value.to_string())
94
87
  }
95
88
 
96
- fn skip_rows_with_no_request_id(header_map: &HashMap<String, usize>, r: &[Data]) -> bool {
97
- if let Some(request_id) = header_map.get("Request Id") {
98
- if r[*request_id] == Data::Empty {
99
- return true;
100
- }
101
- }
102
- false
89
+ fn skip_empty_rows(r: &[Data]) -> bool {
90
+ r.iter().all(|c| c == &Data::Empty)
103
91
  }
104
92
 
105
- fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
106
- magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header in xls", header))
93
+ fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
94
+ r[*request_id] == Data::Empty
107
95
  }
108
96
 
109
97
  fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
110
98
  NaiveDateTime::new(t1.date(), t2.time())
111
99
  }
112
100
 
101
+ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
102
+ magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
103
+ }
104
+
113
105
  fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
114
106
  if s.contains(",") {
115
107
  write!(dest, "{:?}", clean_strings(s).trim_end())
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.6'
4
+ VERSION = '0.1.8'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.8
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-06 00:00:00.000000000 Z
11
+ date: 2024-08-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email:
@@ -28,6 +28,7 @@ files:
28
28
  - ext/csv_utils/Cargo.toml
29
29
  - ext/csv_utils/extconf.rb
30
30
  - ext/csv_utils/src/lib.rs
31
+ - ext/csv_utils/src/utils/csv.rs
31
32
  - ext/csv_utils/src/utils/dedup.rs
32
33
  - ext/csv_utils/src/utils/mod.rs
33
34
  - ext/csv_utils/src/utils/xls.rs