patchwork_csv_utils 0.1.16-x86_64-linux → 0.1.18-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ccd0a5a1fac5fc48723b28f4b0922ee10468b2bcf26bb6da77a7abdcb9c3882f
4
- data.tar.gz: b9be825c7c520ef319c940814863f71e71c41d0ed2be062179e665110dfcb0d6
3
+ metadata.gz: c53dcc394dd22b4c32d7201914e7c9fd1d94f668365c45b5a0466bc9820e9bf2
4
+ data.tar.gz: 85f24589e760b680d325a8900c2123ce52c8602bef9ed4b3db82561238924b8b
5
5
  SHA512:
6
- metadata.gz: f2419b94893570ef960dd9be99ed2d530b3b8d201b05a8373dd8a5a2b679e6c9f078e26ebc6f8a43f649158e2984ed9c10dad5d8c7ebac80ffe6b927ca2250fc
7
- data.tar.gz: bb87188b17f9e95e3cdf7db797cf9ae067bb322151069f70b6caaa7c8749ec9c2ecdb068136c55c0a010a0f67b5de9990192b4ca94fd6ec5b8e0d73da71a2ed9
6
+ metadata.gz: 36306ea0ff3606f5325f3d97d1ef8a84e93f3879b4652574e8226781ae01aeaef79448be87edb3a305a44290581d0c8ebbb8d8c677395758750b1a0af680a5c4
7
+ data.tar.gz: b236321552dcc55a1a2d05772f0272fc21c65a707f94b4f67ebd068b948a9dd66d001097cbf965d302cc71202e8dbad67e632323a7ee68c46c4cc865965e20a4
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.16)
4
+ patchwork_csv_utils (0.1.18)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -9,7 +9,7 @@ pub mod utils;
9
9
  fn init() -> Result<(), magnus::Error> {
10
10
  let module = define_module("CsvUtils")?;
11
11
  module.define_singleton_method("dedup", function!(dedup, 4))?;
12
- module.define_singleton_method("to_csv", function!(to_csv, 6))?;
13
- module.define_singleton_method("transform_csv", function!(transform_csv, 6))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 5))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 5))?;
14
14
  Ok(())
15
15
  }
@@ -4,13 +4,12 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
4
4
  use csv::{Reader, StringRecord, Writer};
5
5
  use magnus::{Error, RArray, Ruby};
6
6
 
7
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list, validate_trust_name};
7
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
8
8
 
9
9
  pub fn transform_csv(ruby: &Ruby, csv_path: String,
10
10
  target_path: String, exclusions: RArray,
11
11
  mandatory_headers: RArray,
12
- status_exclusions: RArray,
13
- expected_trust_name: String,) -> magnus::error::Result<()> {
12
+ status_exclusions: RArray) -> magnus::error::Result<()> {
14
13
  if !csv_path.has_extension(&["csv"]) {
15
14
  return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
16
15
  }
@@ -41,7 +40,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
41
40
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
42
41
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
43
42
  let status = header_map.get("Status");
44
- let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
45
43
 
46
44
  let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
47
45
 
@@ -60,8 +58,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
60
58
  let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
61
59
  let column_value = column_value.trim_end();
62
60
 
63
- validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, &column_value.to_string())?;
64
-
65
61
  if i == *date {
66
62
  let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
67
63
  date_value = current;
@@ -87,6 +83,16 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
87
83
  Ok(())
88
84
  }
89
85
 
86
+
87
+ fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
88
+ if i == *trust_name {
89
+ if s != &expected_trust_name.clone() {
90
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
91
+ }
92
+ }
93
+ Ok(())
94
+ }
95
+
90
96
  fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
91
97
  let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
92
98
 
@@ -9,15 +9,6 @@ pub mod csv;
9
9
  pub mod dedup;
10
10
  pub mod xls;
11
11
 
12
- fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
13
- if ri > 0 && i == *trust_name {
14
- if s != &expected_trust_name.clone() {
15
- return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
16
- }
17
- }
18
- Ok(())
19
- }
20
-
21
12
  fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
22
13
  magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
23
14
  }
@@ -74,6 +65,7 @@ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
74
65
 
75
66
  pub trait FileExtension {
76
67
  fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
68
+ fn extension(&self) -> Option<&str>;
77
69
  }
78
70
 
79
71
  impl<P: AsRef<Path>> FileExtension for P {
@@ -86,5 +78,8 @@ impl<P: AsRef<Path>> FileExtension for P {
86
78
 
87
79
  false
88
80
  }
81
+ fn extension(&self) -> Option<&str> {
82
+ self.as_ref().extension().and_then(OsStr::to_str)
83
+ }
89
84
  }
90
85
 
@@ -2,28 +2,31 @@ use std::collections::HashMap;
2
2
  use std::fs::File;
3
3
  use std::io::{BufWriter, Write};
4
4
 
5
- use calamine::{Data, open_workbook, Range, Reader, Xls};
6
- use chrono::{NaiveDateTime, Utc};
5
+ use calamine::{Data, open_workbook, Range, Reader, Xls, open_workbook_auto};
6
+ use chrono::{NaiveDateTime, Timelike, Utc};
7
7
  use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list, validate_trust_name};
9
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
10
10
 
11
11
  pub fn to_csv(ruby: &Ruby, xls_path: String,
12
12
  target_path: String,
13
13
  exclusions: RArray,
14
14
  mandatory_headers: RArray,
15
15
  status_exclusions: RArray,
16
- expected_trust_name: String,
17
16
  ) -> magnus::error::Result<()> {
18
- if !xls_path.has_extension(&["xls"]) {
19
- return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
17
+ if !xls_path.has_extension(&["xls","xlsx"]) {
18
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
20
19
  }
21
20
 
22
21
  let exclusions = RArray::to_vec(exclusions)?;
23
22
  let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
24
23
  let status_exclusions = RArray::to_vec(status_exclusions)?;
25
24
 
26
- let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
25
+
26
+ let mut workbook = open_workbook_auto(&xls_path)
27
+ .map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
28
+
29
+
27
30
  let range = workbook.worksheet_range_at(0)
28
31
  .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
29
32
  .and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
@@ -38,15 +41,14 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
38
41
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
39
42
  let mut dest = BufWriter::new(csv_out_file);
40
43
 
41
- write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name)
44
+ write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions)
42
45
  }
43
46
 
44
47
  fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
45
48
  header_map: HashMap<String, usize>, exclusions: Vec<String>,
46
49
  mandatory_headers: Vec<String>,
47
50
  headers_list: Vec<String>,
48
- status_exclusions: Vec<String>,
49
- expected_trust_name: String) -> magnus::error::Result<()> {
51
+ status_exclusions: Vec<String>) -> magnus::error::Result<()> {
50
52
  let n = mandatory_headers.len() - 1;
51
53
  let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
52
54
  let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
@@ -55,7 +57,6 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
55
57
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
56
58
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
57
59
  let status = header_map.get("Status");
58
- let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
59
60
 
60
61
  let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
61
62
 
@@ -69,6 +70,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
69
70
  if date_value_is_not_present(&date, &r) {
70
71
  return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
71
72
  }
73
+ // validate_trust_name(ruby, &expected_trust_name, trust_name, ri, &r)?;
72
74
 
73
75
  for (i, c) in mandatory_headers.iter().enumerate() {
74
76
  let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
@@ -76,11 +78,49 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
76
78
 
77
79
  match *c {
78
80
  Data::Empty => Ok(()),
79
- Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
80
- validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, s)?;
81
+ Data::String(ref s) | Data::DurationIso(ref s) => {
81
82
  handle_commas(dest, s)
82
83
  }
83
84
  Data::Float(ref f) => write!(dest, "{}", f),
85
+ Data::DateTimeIso(ref s) => {
86
+ // Normalize the string to ensure manageable precision
87
+ let normalized_s = if s.contains('.') {
88
+ let parts: Vec<&str> = s.split('.').collect();
89
+ format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
90
+ } else {
91
+ s.to_string()
92
+ };
93
+
94
+ // Attempt to parse the normalized string as a full datetime
95
+ let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
96
+ .or_else(|_| {
97
+ // If parsing as datetime fails, try parsing as date-only
98
+ NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
99
+ })
100
+ .or_else(|_| {
101
+ // If parsing as time-only fails, try parsing as time-only
102
+ NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
103
+ })
104
+ .map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
105
+
106
+ // Apply the same logic as for Data::DateTime
107
+ if i == *date {
108
+ date_value = current;
109
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
110
+ current = transform_time_to_datetime(date_value, current);
111
+ }
112
+
113
+ // Round up to the next second if we have any fractional seconds
114
+ let adjusted_time = if current.nanosecond() > 0 {
115
+ current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
116
+ } else {
117
+ current
118
+ };
119
+
120
+ // Format the output to ensure consistent precision
121
+ let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
122
+ write!(dest, "{}", formatted_output)
123
+ }
84
124
  Data::DateTime(ref d) => {
85
125
  let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
86
126
  if i == *date {
@@ -103,6 +143,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
103
143
  Ok(())
104
144
  }
105
145
 
146
+ fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, r: &Vec<&Data>) -> magnus::error::Result<()> {
147
+ if ri > 0 {
148
+ let s = r[*trust_name].to_string();
149
+ if s != expected_trust_name.clone() {
150
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
151
+ }
152
+ }
153
+ Ok(())
154
+ }
155
+
106
156
  fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
107
157
  let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
108
158
 
@@ -161,7 +211,7 @@ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
161
211
 
162
212
  fn clean_strings(s: &str) -> String {
163
213
  s.replace("\n", " ")
164
- .replace("\r", " ")
214
+ .replace("\r", "")
165
215
  .replace("\"", "")
166
216
  }
167
217
 
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.16'
4
+ VERSION = '0.1.18'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.18
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-11-25 00:00:00.000000000 Z
11
+ date: 2024-11-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email: