patchwork_csv_utils 0.1.16-x86_64-linux → 0.1.18-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ccd0a5a1fac5fc48723b28f4b0922ee10468b2bcf26bb6da77a7abdcb9c3882f
4
- data.tar.gz: b9be825c7c520ef319c940814863f71e71c41d0ed2be062179e665110dfcb0d6
3
+ metadata.gz: c53dcc394dd22b4c32d7201914e7c9fd1d94f668365c45b5a0466bc9820e9bf2
4
+ data.tar.gz: 85f24589e760b680d325a8900c2123ce52c8602bef9ed4b3db82561238924b8b
5
5
  SHA512:
6
- metadata.gz: f2419b94893570ef960dd9be99ed2d530b3b8d201b05a8373dd8a5a2b679e6c9f078e26ebc6f8a43f649158e2984ed9c10dad5d8c7ebac80ffe6b927ca2250fc
7
- data.tar.gz: bb87188b17f9e95e3cdf7db797cf9ae067bb322151069f70b6caaa7c8749ec9c2ecdb068136c55c0a010a0f67b5de9990192b4ca94fd6ec5b8e0d73da71a2ed9
6
+ metadata.gz: 36306ea0ff3606f5325f3d97d1ef8a84e93f3879b4652574e8226781ae01aeaef79448be87edb3a305a44290581d0c8ebbb8d8c677395758750b1a0af680a5c4
7
+ data.tar.gz: b236321552dcc55a1a2d05772f0272fc21c65a707f94b4f67ebd068b948a9dd66d001097cbf965d302cc71202e8dbad67e632323a7ee68c46c4cc865965e20a4
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.16)
4
+ patchwork_csv_utils (0.1.18)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -9,7 +9,7 @@ pub mod utils;
9
9
  fn init() -> Result<(), magnus::Error> {
10
10
  let module = define_module("CsvUtils")?;
11
11
  module.define_singleton_method("dedup", function!(dedup, 4))?;
12
- module.define_singleton_method("to_csv", function!(to_csv, 6))?;
13
- module.define_singleton_method("transform_csv", function!(transform_csv, 6))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 5))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 5))?;
14
14
  Ok(())
15
15
  }
@@ -4,13 +4,12 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
4
4
  use csv::{Reader, StringRecord, Writer};
5
5
  use magnus::{Error, RArray, Ruby};
6
6
 
7
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list, validate_trust_name};
7
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
8
8
 
9
9
  pub fn transform_csv(ruby: &Ruby, csv_path: String,
10
10
  target_path: String, exclusions: RArray,
11
11
  mandatory_headers: RArray,
12
- status_exclusions: RArray,
13
- expected_trust_name: String,) -> magnus::error::Result<()> {
12
+ status_exclusions: RArray) -> magnus::error::Result<()> {
14
13
  if !csv_path.has_extension(&["csv"]) {
15
14
  return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
16
15
  }
@@ -41,7 +40,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
41
40
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
42
41
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
43
42
  let status = header_map.get("Status");
44
- let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
45
43
 
46
44
  let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
47
45
 
@@ -60,8 +58,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
60
58
  let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
61
59
  let column_value = column_value.trim_end();
62
60
 
63
- validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, &column_value.to_string())?;
64
-
65
61
  if i == *date {
66
62
  let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
67
63
  date_value = current;
@@ -87,6 +83,16 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
87
83
  Ok(())
88
84
  }
89
85
 
86
+
87
+ fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
88
+ if i == *trust_name {
89
+ if s != &expected_trust_name.clone() {
90
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
91
+ }
92
+ }
93
+ Ok(())
94
+ }
95
+
90
96
  fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
91
97
  let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
92
98
 
@@ -9,15 +9,6 @@ pub mod csv;
9
9
  pub mod dedup;
10
10
  pub mod xls;
11
11
 
12
- fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
13
- if ri > 0 && i == *trust_name {
14
- if s != &expected_trust_name.clone() {
15
- return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
16
- }
17
- }
18
- Ok(())
19
- }
20
-
21
12
  fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
22
13
  magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
23
14
  }
@@ -74,6 +65,7 @@ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
74
65
 
75
66
  pub trait FileExtension {
76
67
  fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
68
+ fn extension(&self) -> Option<&str>;
77
69
  }
78
70
 
79
71
  impl<P: AsRef<Path>> FileExtension for P {
@@ -86,5 +78,8 @@ impl<P: AsRef<Path>> FileExtension for P {
86
78
 
87
79
  false
88
80
  }
81
+ fn extension(&self) -> Option<&str> {
82
+ self.as_ref().extension().and_then(OsStr::to_str)
83
+ }
89
84
  }
90
85
 
@@ -2,28 +2,31 @@ use std::collections::HashMap;
2
2
  use std::fs::File;
3
3
  use std::io::{BufWriter, Write};
4
4
 
5
- use calamine::{Data, open_workbook, Range, Reader, Xls};
6
- use chrono::{NaiveDateTime, Utc};
5
+ use calamine::{Data, open_workbook, Range, Reader, Xls, open_workbook_auto};
6
+ use chrono::{NaiveDateTime, Timelike, Utc};
7
7
  use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list, validate_trust_name};
9
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
10
10
 
11
11
  pub fn to_csv(ruby: &Ruby, xls_path: String,
12
12
  target_path: String,
13
13
  exclusions: RArray,
14
14
  mandatory_headers: RArray,
15
15
  status_exclusions: RArray,
16
- expected_trust_name: String,
17
16
  ) -> magnus::error::Result<()> {
18
- if !xls_path.has_extension(&["xls"]) {
19
- return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
17
+ if !xls_path.has_extension(&["xls","xlsx"]) {
18
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
20
19
  }
21
20
 
22
21
  let exclusions = RArray::to_vec(exclusions)?;
23
22
  let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
24
23
  let status_exclusions = RArray::to_vec(status_exclusions)?;
25
24
 
26
- let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
25
+
26
+ let mut workbook = open_workbook_auto(&xls_path)
27
+ .map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
28
+
29
+
27
30
  let range = workbook.worksheet_range_at(0)
28
31
  .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
29
32
  .and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
@@ -38,15 +41,14 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
38
41
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
39
42
  let mut dest = BufWriter::new(csv_out_file);
40
43
 
41
- write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name)
44
+ write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions)
42
45
  }
43
46
 
44
47
  fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
45
48
  header_map: HashMap<String, usize>, exclusions: Vec<String>,
46
49
  mandatory_headers: Vec<String>,
47
50
  headers_list: Vec<String>,
48
- status_exclusions: Vec<String>,
49
- expected_trust_name: String) -> magnus::error::Result<()> {
51
+ status_exclusions: Vec<String>) -> magnus::error::Result<()> {
50
52
  let n = mandatory_headers.len() - 1;
51
53
  let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
52
54
  let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
@@ -55,7 +57,6 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
55
57
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
56
58
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
57
59
  let status = header_map.get("Status");
58
- let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
59
60
 
60
61
  let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
61
62
 
@@ -69,6 +70,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
69
70
  if date_value_is_not_present(&date, &r) {
70
71
  return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
71
72
  }
73
+ // validate_trust_name(ruby, &expected_trust_name, trust_name, ri, &r)?;
72
74
 
73
75
  for (i, c) in mandatory_headers.iter().enumerate() {
74
76
  let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
@@ -76,11 +78,49 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
76
78
 
77
79
  match *c {
78
80
  Data::Empty => Ok(()),
79
- Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
80
- validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, s)?;
81
+ Data::String(ref s) | Data::DurationIso(ref s) => {
81
82
  handle_commas(dest, s)
82
83
  }
83
84
  Data::Float(ref f) => write!(dest, "{}", f),
85
+ Data::DateTimeIso(ref s) => {
86
+ // Normalize the string to ensure manageable precision
87
+ let normalized_s = if s.contains('.') {
88
+ let parts: Vec<&str> = s.split('.').collect();
89
+ format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
90
+ } else {
91
+ s.to_string()
92
+ };
93
+
94
+ // Attempt to parse the normalized string as a full datetime
95
+ let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
96
+ .or_else(|_| {
97
+ // If parsing as datetime fails, try parsing as date-only
98
+ NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
99
+ })
100
+ .or_else(|_| {
101
+ // If parsing as time-only fails, try parsing as time-only
102
+ NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
103
+ })
104
+ .map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
105
+
106
+ // Apply the same logic as for Data::DateTime
107
+ if i == *date {
108
+ date_value = current;
109
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
110
+ current = transform_time_to_datetime(date_value, current);
111
+ }
112
+
113
+ // Round up to the next second if we have any fractional seconds
114
+ let adjusted_time = if current.nanosecond() > 0 {
115
+ current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
116
+ } else {
117
+ current
118
+ };
119
+
120
+ // Format the output to ensure consistent precision
121
+ let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
122
+ write!(dest, "{}", formatted_output)
123
+ }
84
124
  Data::DateTime(ref d) => {
85
125
  let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
86
126
  if i == *date {
@@ -103,6 +143,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
103
143
  Ok(())
104
144
  }
105
145
 
146
+ fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, r: &Vec<&Data>) -> magnus::error::Result<()> {
147
+ if ri > 0 {
148
+ let s = r[*trust_name].to_string();
149
+ if s != expected_trust_name.clone() {
150
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
151
+ }
152
+ }
153
+ Ok(())
154
+ }
155
+
106
156
  fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
107
157
  let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
108
158
 
@@ -161,7 +211,7 @@ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
161
211
 
162
212
  fn clean_strings(s: &str) -> String {
163
213
  s.replace("\n", " ")
164
- .replace("\r", " ")
214
+ .replace("\r", "")
165
215
  .replace("\"", "")
166
216
  }
167
217
 
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.16'
4
+ VERSION = '0.1.18'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.18
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-11-25 00:00:00.000000000 Z
11
+ date: 2024-11-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email: