patchwork_csv_utils 0.1.23-x86_64-linux → 0.1.24-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,28 @@
1
+ use ::csv::{ByteRecord, StringRecord};
2
+ use chrono::{NaiveDate, NaiveDateTime};
3
+ use magnus::Ruby;
1
4
  use std::collections::{HashMap, HashSet};
2
5
  use std::error::Error;
3
6
  use std::ffi::OsStr;
4
7
  use std::path::Path;
5
- use ::csv::{ByteRecord, StringRecord};
6
- use magnus::Ruby;
7
- use chrono::{NaiveDate, NaiveDateTime};
8
8
 
9
9
  pub mod csv;
10
10
  pub mod dedup;
11
+ pub mod shared;
11
12
  pub mod xls;
12
13
 
13
14
  fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
14
- magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
15
+ magnus::Error::new(
16
+ ruby.exception_standard_error(),
17
+ format!("Missing '{}' header", header),
18
+ )
15
19
  }
16
20
 
17
21
  fn missing_value(ruby: &Ruby, header: &str) -> magnus::Error {
18
- magnus::Error::new(ruby.exception_standard_error(), format!("Missing value for '{}' header", header))
22
+ magnus::Error::new(
23
+ ruby.exception_standard_error(),
24
+ format!("Missing value for '{}' header", header),
25
+ )
19
26
  }
20
27
 
21
28
  fn headers_as_byte_record(headers: Vec<String>) -> ByteRecord {
@@ -24,18 +31,32 @@ fn headers_as_byte_record(headers: Vec<String>) -> ByteRecord {
24
31
  }
25
32
 
26
33
  fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
27
- magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
34
+ magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e))
28
35
  }
29
36
 
30
37
  fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
31
- magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
38
+ magnus::Error::new(
39
+ ruby.exception_standard_error(),
40
+ format!(
41
+ "Could not parse datetime '{}', row: {}, col: {}",
42
+ value, row, col
43
+ ),
44
+ )
32
45
  }
33
46
 
34
- fn check_mandatory_headers(ruby: &Ruby, headers: &Vec<String>, mandatory_headers: &Vec<String>, message: &str) -> Option<magnus::error::Result<()>> {
47
+ fn check_mandatory_headers(
48
+ ruby: &Ruby,
49
+ headers: &[String],
50
+ mandatory_headers: &[String],
51
+ message: &str,
52
+ ) -> Option<magnus::error::Result<()>> {
35
53
  let csv_mandatory_headers = filter_headers(headers, mandatory_headers);
36
54
 
37
55
  if csv_mandatory_headers.is_empty() {
38
- return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} has no mandatory headers", message))));
56
+ return Some(Err(magnus::Error::new(
57
+ ruby.exception_standard_error(),
58
+ format!("{} has no mandatory headers", message),
59
+ )));
39
60
  }
40
61
 
41
62
  let csv_mandatory_headers = csv_mandatory_headers.to_owned().clone();
@@ -46,22 +67,45 @@ fn check_mandatory_headers(ruby: &Ruby, headers: &Vec<String>, mandatory_headers
46
67
  let difference = set2.difference(&set1).collect::<Vec<_>>();
47
68
 
48
69
  if !difference.is_empty() {
49
- let missing_headers = difference.iter().map(|h| h.to_string()).collect::<Vec<String>>();
50
- return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} is missing mandatory headers: {}", message, missing_headers.join(", ")))));
70
+ let missing_headers = difference
71
+ .iter()
72
+ .map(|h| h.to_string())
73
+ .collect::<Vec<String>>();
74
+ return Some(Err(magnus::Error::new(
75
+ ruby.exception_standard_error(),
76
+ format!(
77
+ "{} is missing mandatory headers: {}",
78
+ message,
79
+ missing_headers.join(", ")
80
+ ),
81
+ )));
51
82
  }
52
83
  None
53
84
  }
54
85
 
55
- fn index_of_header_in_mandatory_list(mandatory_headers_list: Vec<String>, column_name: String) -> Option<usize> {
56
- mandatory_headers_list.iter().position(|h| h.to_string() == column_name)
86
+ fn index_of_header_in_mandatory_list(
87
+ mandatory_headers_list: Vec<String>,
88
+ column_name: String,
89
+ ) -> Option<usize> {
90
+ mandatory_headers_list
91
+ .iter()
92
+ .position(|h| h == &column_name)
57
93
  }
58
94
 
59
- fn filter_headers(csv_headers: &Vec<String>, expected_headers: &Vec<String>) -> Vec<String> {
60
- csv_headers.iter().map(|v| v.to_string()).filter(|h| expected_headers.contains(h)).collect::<Vec<String>>()
95
+ fn filter_headers(csv_headers: &[String], expected_headers: &[String]) -> Vec<String> {
96
+ csv_headers
97
+ .iter()
98
+ .filter(|h| expected_headers.contains(h))
99
+ .cloned()
100
+ .collect()
61
101
  }
62
102
 
63
- fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
64
- headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect()
103
+ fn create_header_map(headers: &[String]) -> HashMap<String, usize> {
104
+ headers
105
+ .iter()
106
+ .enumerate()
107
+ .map(|(i, h)| (h.to_string(), i))
108
+ .collect()
65
109
  }
66
110
 
67
111
  pub trait FileExtension {
@@ -71,7 +115,7 @@ pub trait FileExtension {
71
115
 
72
116
  impl<P: AsRef<Path>> FileExtension for P {
73
117
  fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
74
- if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
118
+ if let Some(extension) = self.as_ref().extension().and_then(OsStr::to_str) {
75
119
  return extensions
76
120
  .iter()
77
121
  .any(|x| x.as_ref().eq_ignore_ascii_case(extension));
@@ -86,17 +130,20 @@ impl<P: AsRef<Path>> FileExtension for P {
86
130
 
87
131
  pub fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
88
132
  let maybe_correct = correct_datetime(s);
89
- if maybe_correct.is_some() { return maybe_correct; }
133
+ if maybe_correct.is_some() {
134
+ return maybe_correct;
135
+ }
90
136
 
91
137
  // Try YYYY-MM-DD format
92
138
  if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
93
139
  return date.and_hms_opt(0, 0, 0);
94
140
  }
95
141
 
96
- NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
142
+ NaiveDate::parse_from_str(s, "%d-%b-%y")
143
+ .ok()
144
+ .and_then(|d| d.and_hms_opt(0, 0, 0))
97
145
  }
98
146
 
99
147
  pub fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
100
148
  NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
101
149
  }
102
-
@@ -0,0 +1,79 @@
1
+ use crate::utils::{correct_datetime, to_datetime_error};
2
+ use chrono::{NaiveDateTime, NaiveTime};
3
+ use magnus::Ruby;
4
+
5
+ pub struct DateTimeProcessor {
6
+ date_value: NaiveDateTime,
7
+ }
8
+
9
+ impl DateTimeProcessor {
10
+ pub fn new(date_value: NaiveDateTime) -> Self {
11
+ DateTimeProcessor { date_value }
12
+ }
13
+
14
+ pub fn process_time_column(
15
+ &self,
16
+ ruby: &Ruby,
17
+ value: &str,
18
+ row_index: usize,
19
+ column_name: &str,
20
+ ) -> magnus::error::Result<String> {
21
+ if let Some(correct) = correct_datetime(value) {
22
+ return Ok(correct.to_string());
23
+ }
24
+
25
+ let time = string_to_time(value)
26
+ .ok_or_else(|| to_datetime_error(ruby, value, row_index, column_name))?;
27
+
28
+ Ok(self.combine_date_time(time).to_string())
29
+ }
30
+
31
+ pub fn combine_date_time(&self, time: NaiveTime) -> NaiveDateTime {
32
+ NaiveDateTime::new(self.date_value.date(), time)
33
+ }
34
+
35
+ pub fn combine_datetime_parts(&self, time_source: NaiveDateTime) -> NaiveDateTime {
36
+ NaiveDateTime::new(self.date_value.date(), time_source.time())
37
+ }
38
+ }
39
+
40
+ fn string_to_time(s: &str) -> Option<NaiveTime> {
41
+ NaiveTime::parse_from_str(s, "%H:%M").ok()
42
+ }
43
+
44
+ #[cfg(test)]
45
+ mod tests {
46
+ use super::*;
47
+ use chrono::NaiveDate;
48
+
49
+ #[test]
50
+ fn test_combine_date_time() {
51
+ let date = NaiveDate::from_ymd_opt(2024, 1, 15)
52
+ .unwrap()
53
+ .and_hms_opt(0, 0, 0)
54
+ .unwrap();
55
+ let processor = DateTimeProcessor::new(date);
56
+
57
+ let time = NaiveTime::from_hms_opt(14, 30, 0).unwrap();
58
+ let result = processor.combine_date_time(time);
59
+
60
+ assert_eq!(result.to_string(), "2024-01-15 14:30:00");
61
+ }
62
+
63
+ #[test]
64
+ fn test_combine_datetime_parts() {
65
+ let date = NaiveDate::from_ymd_opt(2024, 1, 15)
66
+ .unwrap()
67
+ .and_hms_opt(0, 0, 0)
68
+ .unwrap();
69
+ let processor = DateTimeProcessor::new(date);
70
+
71
+ let time_source = NaiveDate::from_ymd_opt(2020, 1, 1)
72
+ .unwrap()
73
+ .and_hms_opt(14, 30, 0)
74
+ .unwrap();
75
+ let result = processor.combine_datetime_parts(time_source);
76
+
77
+ assert_eq!(result.to_string(), "2024-01-15 14:30:00");
78
+ }
79
+ }
@@ -0,0 +1,130 @@
1
+ use crate::utils::string_to_datetime;
2
+ use calamine::{Data, DataType};
3
+ use chrono::NaiveDateTime;
4
+
5
+ pub trait FilterableRecord {
6
+ fn is_empty(&self) -> bool;
7
+ fn has_empty_first_column(&self) -> bool;
8
+ fn get_request_id(&self, index: usize) -> Option<String>;
9
+ fn get_status(&self, index: Option<usize>) -> Option<String>;
10
+ fn get_date(&self, index: usize) -> Option<NaiveDateTime>;
11
+ }
12
+
13
+ pub struct RowFilters {
14
+ exclusions: Vec<String>,
15
+ status_exclusions: Vec<String>,
16
+ earliest_start_date: Option<NaiveDateTime>,
17
+ }
18
+
19
+ impl RowFilters {
20
+ pub fn new(
21
+ exclusions: Vec<String>,
22
+ status_exclusions: Vec<String>,
23
+ earliest_start_date: Option<NaiveDateTime>,
24
+ ) -> Self {
25
+ RowFilters {
26
+ exclusions,
27
+ status_exclusions,
28
+ earliest_start_date,
29
+ }
30
+ }
31
+
32
+ pub fn should_skip<R: FilterableRecord>(
33
+ &self,
34
+ record: &R,
35
+ request_id_index: usize,
36
+ status_index: Option<usize>,
37
+ date_index: usize,
38
+ ) -> bool {
39
+ record.is_empty()
40
+ || record.has_empty_first_column()
41
+ || self.should_skip_by_exclusion(record, request_id_index, status_index)
42
+ || self.should_skip_by_status(record, status_index)
43
+ || self.should_skip_by_date(record, date_index)
44
+ }
45
+
46
+ fn should_skip_by_exclusion<R: FilterableRecord>(
47
+ &self,
48
+ record: &R,
49
+ request_id_index: usize,
50
+ status_index: Option<usize>,
51
+ ) -> bool {
52
+ if let Some(status) = record.get_status(status_index) {
53
+ if status == "Recalled" {
54
+ return false;
55
+ }
56
+ }
57
+
58
+ record
59
+ .get_request_id(request_id_index)
60
+ .map(|id| self.exclusions.contains(&id))
61
+ .unwrap_or(false)
62
+ }
63
+
64
+ fn should_skip_by_status<R: FilterableRecord>(
65
+ &self,
66
+ record: &R,
67
+ status_index: Option<usize>,
68
+ ) -> bool {
69
+ record
70
+ .get_status(status_index)
71
+ .map(|status| self.status_exclusions.contains(&status))
72
+ .unwrap_or(false)
73
+ }
74
+
75
+ fn should_skip_by_date<R: FilterableRecord>(&self, record: &R, date_index: usize) -> bool {
76
+ self.earliest_start_date
77
+ .and_then(|earliest| record.get_date(date_index).map(|date| date <= earliest))
78
+ .unwrap_or(false)
79
+ }
80
+ }
81
+
82
+ use csv::StringRecord;
83
+
84
+ impl FilterableRecord for StringRecord {
85
+ fn is_empty(&self) -> bool {
86
+ self.iter().all(|r| r.is_empty())
87
+ }
88
+
89
+ fn has_empty_first_column(&self) -> bool {
90
+ self.get(0).map(|s| s.is_empty()).unwrap_or(true)
91
+ }
92
+
93
+ fn get_request_id(&self, index: usize) -> Option<String> {
94
+ self.get(index).map(|s| s.to_string())
95
+ }
96
+
97
+ fn get_status(&self, index: Option<usize>) -> Option<String> {
98
+ index.and_then(|idx| self.get(idx).map(|s| s.to_string()))
99
+ }
100
+
101
+ fn get_date(&self, index: usize) -> Option<NaiveDateTime> {
102
+ self.get(index).and_then(string_to_datetime)
103
+ }
104
+ }
105
+
106
+ impl FilterableRecord for Vec<&Data> {
107
+ fn is_empty(&self) -> bool {
108
+ self.iter().all(|c| *c == &Data::Empty)
109
+ }
110
+
111
+ fn has_empty_first_column(&self) -> bool {
112
+ self.first().map(|d| *d == &Data::Empty).unwrap_or(true)
113
+ }
114
+
115
+ fn get_request_id(&self, index: usize) -> Option<String> {
116
+ self.get(index).map(|d| d.to_string())
117
+ }
118
+
119
+ fn get_status(&self, index: Option<usize>) -> Option<String> {
120
+ index.and_then(|idx| self.get(idx).and_then(|d| d.as_string()))
121
+ }
122
+
123
+ fn get_date(&self, index: usize) -> Option<NaiveDateTime> {
124
+ self.get(index).and_then(|data| match data {
125
+ Data::DateTime(d) => d.as_datetime(),
126
+ Data::DateTimeIso(s) => string_to_datetime(s),
127
+ _ => None,
128
+ })
129
+ }
130
+ }
@@ -0,0 +1,4 @@
1
+ pub mod datetime;
2
+ pub mod filters;
3
+ pub mod types;
4
+ pub mod validation;
@@ -0,0 +1,97 @@
1
+ use crate::utils::{missing_header, string_to_datetime};
2
+ use chrono::NaiveDateTime;
3
+ use magnus::{RArray, Ruby};
4
+ use std::collections::HashMap;
5
+
6
+ #[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
7
+ pub struct MandatoryColumn<T> {
8
+ pub value: T,
9
+ pub index: usize,
10
+ }
11
+
12
+ impl<T> MandatoryColumn<T> {
13
+ pub fn new(value: T, index: usize) -> Self {
14
+ MandatoryColumn { value, index }
15
+ }
16
+ }
17
+
18
+ pub struct ProcessingConfig {
19
+ pub exclusions: Vec<String>,
20
+ pub status_exclusions: Vec<String>,
21
+ pub mandatory_headers: Vec<String>,
22
+ pub expected_trust_name: String,
23
+ pub is_streamed_file: bool,
24
+ pub earliest_start_date: Option<NaiveDateTime>,
25
+ }
26
+
27
+ impl ProcessingConfig {
28
+ pub fn from_ruby(
29
+ exclusions: RArray,
30
+ mandatory_headers: RArray,
31
+ status_exclusions: RArray,
32
+ expected_trust_name: String,
33
+ is_streamed_file: bool,
34
+ earliest_start_date: Option<String>,
35
+ ) -> magnus::error::Result<Self> {
36
+ Ok(ProcessingConfig {
37
+ exclusions: RArray::to_vec(exclusions)?,
38
+ status_exclusions: RArray::to_vec(status_exclusions)?,
39
+ mandatory_headers: RArray::to_vec(mandatory_headers)?,
40
+ expected_trust_name,
41
+ is_streamed_file,
42
+ earliest_start_date: earliest_start_date.and_then(|s| string_to_datetime(&s)),
43
+ })
44
+ }
45
+ }
46
+
47
+ pub struct HeaderConfig {
48
+ pub request_id: usize,
49
+ pub date: usize,
50
+ pub start: usize,
51
+ pub end: usize,
52
+ pub actual_start: usize,
53
+ pub actual_end: usize,
54
+ pub status: Option<usize>,
55
+ pub trust_name: usize,
56
+ }
57
+
58
+ impl HeaderConfig {
59
+ pub fn from_header_map(
60
+ map: &HashMap<String, usize>,
61
+ ruby: &Ruby,
62
+ ) -> magnus::error::Result<Self> {
63
+ Ok(HeaderConfig {
64
+ request_id: *map
65
+ .get("Request Id")
66
+ .ok_or_else(|| missing_header(ruby, "Request Id"))?,
67
+ date: *map
68
+ .get("Date")
69
+ .ok_or_else(|| missing_header(ruby, "Date"))?,
70
+ start: *map
71
+ .get("Start")
72
+ .ok_or_else(|| missing_header(ruby, "Start"))?,
73
+ end: *map.get("End").ok_or_else(|| missing_header(ruby, "End"))?,
74
+ actual_start: *map
75
+ .get("Actual Start")
76
+ .ok_or_else(|| missing_header(ruby, "Actual Start"))?,
77
+ actual_end: *map
78
+ .get("Actual End")
79
+ .ok_or_else(|| missing_header(ruby, "Actual End"))?,
80
+ status: map.get("Status").copied(),
81
+ trust_name: *map
82
+ .get("Trust")
83
+ .ok_or_else(|| missing_header(ruby, "Trust"))?,
84
+ })
85
+ }
86
+
87
+ pub fn is_time_column(&self, index: usize) -> bool {
88
+ index == self.start
89
+ || index == self.end
90
+ || index == self.actual_start
91
+ || index == self.actual_end
92
+ }
93
+
94
+ pub fn is_date_column(&self, index: usize) -> bool {
95
+ index == self.date
96
+ }
97
+ }
@@ -0,0 +1,34 @@
1
+ use magnus::{Error, Ruby};
2
+
3
+ pub struct TrustValidator {
4
+ expected_name: String,
5
+ is_streamed: bool,
6
+ }
7
+
8
+ impl TrustValidator {
9
+ pub fn new(expected_name: String, is_streamed: bool) -> Self {
10
+ TrustValidator {
11
+ expected_name,
12
+ is_streamed,
13
+ }
14
+ }
15
+
16
+ pub fn validate(&self, ruby: &Ruby, actual_name: &str) -> magnus::error::Result<()> {
17
+ if self.is_streamed {
18
+ return Ok(());
19
+ }
20
+
21
+ let trimmed = actual_name.trim();
22
+ if trimmed != self.expected_name {
23
+ return Err(Error::new(
24
+ ruby.exception_standard_error(),
25
+ format!(
26
+ "Trust actual name: '{}' is not as expected: '{}'",
27
+ trimmed, self.expected_name
28
+ ),
29
+ ));
30
+ }
31
+
32
+ Ok(())
33
+ }
34
+ }