patchwork_csv_utils 0.1.22-arm64-darwin → 0.1.24-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -8,7 +8,7 @@ gemspec
8
8
  group :development do
9
9
  gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler'
11
- gem 'rb_sys', '~> 0.9.98'
11
+ gem 'rb_sys', '~> 0.9.117'
12
12
  gem 'rspec', '~> 3.0'
13
13
  gem 'rubocop', '~> 1.21'
14
14
  end
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.22)
4
+ patchwork_csv_utils (0.1.24)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -19,7 +19,9 @@ GEM
19
19
  rake (13.2.1)
20
20
  rake-compiler (1.2.7)
21
21
  rake
22
- rb_sys (0.9.98)
22
+ rake-compiler-dock (1.9.1)
23
+ rb_sys (0.9.117)
24
+ rake-compiler-dock (= 1.9.1)
23
25
  regexp_parser (2.9.2)
24
26
  rexml (3.3.2)
25
27
  strscan
@@ -63,7 +65,7 @@ DEPENDENCIES
63
65
  patchwork_csv_utils!
64
66
  rake (~> 13.0)
65
67
  rake-compiler
66
- rb_sys (~> 0.9.98)
68
+ rb_sys (~> 0.9.117)
67
69
  rspec (~> 3.0)
68
70
  rubocop (~> 1.21)
69
71
 
@@ -2,6 +2,7 @@
2
2
  name = "csv_utils"
3
3
  version = "0.1.0"
4
4
  edition = "2021"
5
+ rust-version = "1.83"
5
6
  authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
6
7
  publish = false
7
8
 
@@ -9,7 +10,7 @@ publish = false
9
10
  crate-type = ["cdylib"]
10
11
 
11
12
  [dependencies]
12
- magnus = { version = "0.7.1" }
13
- csv = "1.3.0"
14
- calamine = { version = "0.25.0", features = ["dates"] }
15
- chrono = "0.4.38"
13
+ magnus = { version = "0.8" }
14
+ csv = "1.4"
15
+ calamine = { version = "0.31", features = ["dates"] }
16
+ chrono = "0.4"
@@ -1,15 +1,15 @@
1
- use magnus::{define_module, function, prelude::*};
2
1
  use crate::utils::csv::transform_csv;
3
2
  use crate::utils::dedup::dedup;
4
3
  use crate::utils::xls::to_csv;
4
+ use magnus::{function, prelude::*, Ruby};
5
5
 
6
6
  pub mod utils;
7
7
 
8
8
  #[magnus::init]
9
- fn init() -> Result<(), magnus::Error> {
10
- let module = define_module("CsvUtils")?;
9
+ fn init(ruby: &Ruby) -> Result<(), magnus::Error> {
10
+ let module = ruby.define_module("CsvUtils")?;
11
11
  module.define_singleton_method("dedup", function!(dedup, 4))?;
12
- module.define_singleton_method("to_csv", function!(to_csv, 7))?;
13
- module.define_singleton_method("transform_csv", function!(transform_csv, 7))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 8))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 8))?;
14
14
  Ok(())
15
15
  }
@@ -1,90 +1,150 @@
1
- use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
1
+ use chrono::Utc;
2
2
  use csv::{Reader, StringRecord, Writer};
3
3
  use magnus::{Error, RArray, Ruby};
4
4
  use std::collections::HashMap;
5
5
  use std::fs::File;
6
6
 
7
- use crate::utils::{check_mandatory_headers, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
8
-
9
- pub fn transform_csv(ruby: &Ruby,
10
- csv_path: String,
11
- target_path: String,
12
- exclusions: RArray,
13
- mandatory_headers: RArray,
14
- status_exclusions: RArray,
15
- expected_trust_name: String,
16
- is_streamed_file: bool) -> magnus::error::Result<()> {
7
+ use crate::utils::shared::datetime::DateTimeProcessor;
8
+ use crate::utils::shared::filters::RowFilters;
9
+ use crate::utils::shared::types::{HeaderConfig, MandatoryColumn, ProcessingConfig};
10
+ use crate::utils::shared::validation::TrustValidator;
11
+ use crate::utils::{
12
+ check_mandatory_headers, create_header_map, headers_as_byte_record,
13
+ index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value,
14
+ string_to_datetime, FileExtension,
15
+ };
16
+
17
+ #[allow(clippy::too_many_arguments)]
18
+ pub fn transform_csv(
19
+ ruby: &Ruby,
20
+ csv_path: String,
21
+ target_path: String,
22
+ exclusions: RArray,
23
+ mandatory_headers: RArray,
24
+ status_exclusions: RArray,
25
+ expected_trust_name: String,
26
+ is_streamed_file: bool,
27
+ earliest_start_date: Option<String>,
28
+ ) -> magnus::error::Result<()> {
17
29
  if !csv_path.has_extension(&["csv"]) {
18
- return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
30
+ return Err(Error::new(
31
+ ruby.exception_standard_error(),
32
+ "csv_path must be a csv file".to_string(),
33
+ ));
19
34
  }
20
35
 
21
- let exclusions = RArray::to_vec(exclusions)?;
22
- let status_exclusions = RArray::to_vec(status_exclusions)?;
23
- let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
36
+ let config = ProcessingConfig::from_ruby(
37
+ exclusions,
38
+ mandatory_headers,
39
+ status_exclusions,
40
+ expected_trust_name,
41
+ is_streamed_file,
42
+ earliest_start_date,
43
+ )?;
24
44
 
25
45
  let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
26
46
  let mut csv: Reader<File> = Reader::from_reader(csv_file);
27
47
  let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
28
- let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?.clone();
48
+
49
+ let headers = csv
50
+ .headers()
51
+ .map_err(|e| magnus_err(ruby, e, "csv_path headers"))?
52
+ .clone();
29
53
  let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
30
54
 
31
55
  if let Some(value) =
32
- check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
56
+ check_mandatory_headers(ruby, &headers_list, &config.mandatory_headers, "csv")
57
+ {
58
+ return value;
59
+ }
33
60
 
34
- let header_map: HashMap<String, usize> = create_header_map(&mandatory_headers);
35
- let inverse_header_map: HashMap<usize, String> = mandatory_headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
61
+ let header_map = create_header_map(&config.mandatory_headers);
62
+ let header_config = HeaderConfig::from_header_map(&header_map, ruby)?;
63
+ let inverse_header_map: HashMap<usize, String> = config
64
+ .mandatory_headers
65
+ .iter()
66
+ .enumerate()
67
+ .map(|(i, h)| (i, h.to_string()))
68
+ .collect();
36
69
 
37
- let csv_headers = headers_as_byte_record(mandatory_headers.clone());
38
- wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
70
+ wtr.write_byte_record(&headers_as_byte_record(config.mandatory_headers.clone()))
71
+ .map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
39
72
 
40
- let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
41
- let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
42
- let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
43
- let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
44
- let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
45
- let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
46
- let status = header_map.get("Status");
47
- let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
73
+ let filters = RowFilters::new(
74
+ config.exclusions,
75
+ config.status_exclusions,
76
+ config.earliest_start_date,
77
+ );
78
+ let trust_validator = TrustValidator::new(config.expected_trust_name, config.is_streamed_file);
48
79
 
49
- let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
80
+ let mandatory_records =
81
+ get_mandatory_records(ruby, &mut csv, &headers_list, &config.mandatory_headers)?;
50
82
 
51
83
  for (ri, record) in mandatory_records.iter().enumerate() {
52
-
53
- if skip_excluded_rows(request_id, &status, &record, &exclusions) { continue; }
54
- if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
55
- if has_empty_row_skip(&record) { continue; }
56
- if has_empty_first_col_skip_row(&record) { continue; }
84
+ if filters.should_skip(
85
+ record,
86
+ header_config.request_id,
87
+ header_config.status,
88
+ header_config.date,
89
+ ) {
90
+ continue;
91
+ }
57
92
 
58
93
  let mut date_value = Utc::now().naive_utc();
59
94
 
60
- let mut columns = vec![];
61
- for (i, column) in mandatory_headers.iter().enumerate() {
62
- let column_index = header_map.get(column).ok_or(missing_header(ruby, column))?;
63
- let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
64
- let column_value = column_value.trim_end();
65
-
66
- if !is_streamed_file {
67
- validate_trust_name(ruby, &expected_trust_name, trust_name, i, &column_value.to_string())?;
68
- }
69
-
70
- if i == *date {
71
- let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
72
- date_value = current;
73
- columns.push(current.to_string());
74
- } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
75
- if column_value.is_empty() {
76
- columns.push(column_value.to_string());
95
+ let columns: Result<Vec<String>, Error> = config
96
+ .mandatory_headers
97
+ .iter()
98
+ .enumerate()
99
+ .map(|(i, column)| {
100
+ let column_index = *header_map
101
+ .get(column)
102
+ .ok_or_else(|| missing_header(ruby, column))?;
103
+ let column_value = record
104
+ .get(column_index)
105
+ .ok_or_else(|| missing_value(ruby, column))?
106
+ .trim_end();
107
+
108
+ if i == header_config.trust_name {
109
+ trust_validator.validate(ruby, column_value)?;
110
+ }
111
+
112
+ if header_config.is_date_column(i) {
113
+ let current = string_to_datetime(column_value).ok_or_else(|| {
114
+ Error::new(
115
+ ruby.exception_standard_error(),
116
+ format!(
117
+ "Could not parse datetime '{}', row: {}, col: Date",
118
+ column_value, ri
119
+ ),
120
+ )
121
+ })?;
122
+ date_value = current;
123
+ Ok(current.to_string())
124
+ } else if header_config.is_time_column(i) {
125
+ if column_value.is_empty() {
126
+ Ok(column_value.to_string())
127
+ } else {
128
+ let column_name = inverse_header_map
129
+ .get(&i)
130
+ .map(|s| s.as_str())
131
+ .unwrap_or("Unknown");
132
+ DateTimeProcessor::new(date_value).process_time_column(
133
+ ruby,
134
+ column_value,
135
+ ri,
136
+ column_name,
137
+ )
138
+ }
77
139
  } else {
78
- let column_name = get_column_name(&inverse_header_map, &i);
79
- let current = process_datetime(ruby, ri, date_value, column_value, &column_name)?;
80
- columns.push(current);
140
+ Ok(column_value.to_string())
81
141
  }
82
- } else {
83
- columns.push(column_value.to_string());
84
- }
85
- }
86
- let record = columns.into_iter().collect::<StringRecord>();
87
- wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
142
+ })
143
+ .collect();
144
+
145
+ let record = columns?.into_iter().collect::<StringRecord>();
146
+ wtr.write_byte_record(record.as_byte_record())
147
+ .map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
88
148
  }
89
149
 
90
150
  wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
@@ -92,110 +152,43 @@ pub fn transform_csv(ruby: &Ruby,
92
152
  Ok(())
93
153
  }
94
154
 
95
-
96
- fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, i: usize, s: &String) -> magnus::error::Result<()> {
97
- if i == *trust_name {
98
- let s = s.trim();
99
- if s != &expected_trust_name.clone() {
100
- return Err(Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
101
- }
102
- }
103
- Ok(())
104
- }
105
-
106
- fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
107
- let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
108
-
109
- let mut records = vec![];
110
- for row in csv.records() {
111
- let row = row.map_err(|e| magnus_err(ruby, e, "record error"))?;
112
- let mut columns = vec![];
113
- for (i, column_value) in row.iter().enumerate() {
114
- let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
115
- if mandatory_headers_list.contains(column_name) {
116
- let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
117
- columns.push(CsvMandatoryColumn::new(column_value.to_string(), index));
118
- }
119
- }
120
- columns.sort_by(|a, b| a.index.cmp(&b.index));
121
- let columns = columns.iter().map(|c| c.value.to_string()).collect::<StringRecord>();
122
- records.push(columns);
123
- }
124
-
125
- Ok(records)
126
- }
127
-
128
- fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
129
- let maybe_correct = correct_datetime(c);
130
- if let Some(correct) = maybe_correct {
131
- return Ok(correct.to_string());
132
- }
133
-
134
- let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
135
- let datetime = transform_time_to_datetime(date_value, current_time);
136
- Ok(datetime.to_string())
137
- }
138
-
139
- fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> String {
140
- let unknown = "Unknown".to_string();
141
- let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
142
- column_name.to_string()
155
+ fn get_mandatory_records(
156
+ ruby: &Ruby,
157
+ csv: &mut Reader<File>,
158
+ csv_header_list: &[String],
159
+ mandatory_headers_list: &[String],
160
+ ) -> magnus::error::Result<Vec<StringRecord>> {
161
+ let inverse_header_map: HashMap<usize, String> = csv_header_list
162
+ .iter()
163
+ .enumerate()
164
+ .map(|(i, h)| (i, h.to_string()))
165
+ .collect();
166
+
167
+ csv.records()
168
+ .map(|row_result| {
169
+ let row = row_result.map_err(|e| magnus_err(ruby, e, "record error"))?;
170
+
171
+ let mut columns: Vec<MandatoryColumn<String>> = row
172
+ .iter()
173
+ .enumerate()
174
+ .filter_map(|(i, column_value)| {
175
+ inverse_header_map.get(&i).and_then(|column_name| {
176
+ if mandatory_headers_list.contains(column_name) {
177
+ index_of_header_in_mandatory_list(
178
+ mandatory_headers_list.to_vec(),
179
+ column_name.to_string(),
180
+ )
181
+ .map(|index| MandatoryColumn::new(column_value.to_string(), index))
182
+ } else {
183
+ None
184
+ }
185
+ })
186
+ })
187
+ .collect();
188
+
189
+ columns.sort_by_key(|c| c.index);
190
+
191
+ Ok(columns.into_iter().map(|c| c.value).collect())
192
+ })
193
+ .collect()
143
194
  }
144
-
145
- fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
146
- if let Some(status_index) = status {
147
- if let Some(status) = r.get(**status_index) {
148
- if status.eq("Recalled") {
149
- return false
150
- }
151
- }
152
- }
153
-
154
- let value = r.get(*request_id).unwrap_or_default();
155
- exclusions.contains(&value.to_string())
156
- }
157
-
158
- fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
159
- status
160
- .map(|index| exclusions.contains(&r[*index].to_string()))
161
- .unwrap_or(false)
162
- }
163
-
164
- fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
165
- let maybe_correct = correct_datetime(s);
166
- if maybe_correct.is_some() { return maybe_correct; }
167
-
168
- NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
169
- }
170
-
171
- fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
172
- NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
173
- }
174
-
175
- fn string_to_time(s: &str) -> Option<NaiveTime> {
176
- NaiveTime::parse_from_str(s, "%H:%M").ok()
177
- }
178
-
179
- fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime {
180
- NaiveDateTime::new(t1.date(), t2)
181
- }
182
-
183
- fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
184
- record[0].is_empty()
185
- }
186
-
187
- fn has_empty_row_skip(record: &StringRecord) -> bool {
188
- record.iter().all(|r| r.is_empty())
189
- }
190
-
191
- #[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
192
- struct CsvMandatoryColumn {
193
- value: String,
194
- index: usize,
195
- }
196
-
197
- impl CsvMandatoryColumn {
198
- fn new(value: String, index: usize) -> Self {
199
- CsvMandatoryColumn { value, index }
200
- }
201
- }
@@ -3,92 +3,127 @@ use std::fs::File;
3
3
  use csv::{Reader, StringRecord, Writer};
4
4
  use magnus::{RArray, Ruby};
5
5
 
6
- use crate::utils::{FileExtension, magnus_err, check_mandatory_headers, create_header_map, missing_header, missing_value, headers_as_byte_record};
7
-
8
- pub fn dedup(ruby: &Ruby, previous_csv_path: String,
9
- new_csv_path: String,
10
- target_path: String,
11
- mandatory_headers: RArray,
6
+ use crate::utils::shared::filters::FilterableRecord;
7
+ use crate::utils::{
8
+ check_mandatory_headers, create_header_map, headers_as_byte_record, magnus_err, missing_header,
9
+ missing_value, FileExtension,
10
+ };
11
+
12
+ pub fn dedup(
13
+ ruby: &Ruby,
14
+ previous_csv_path: String,
15
+ new_csv_path: String,
16
+ target_path: String,
17
+ mandatory_headers: RArray,
12
18
  ) -> magnus::error::Result<()> {
13
- if let Some(value) =
14
- check_file_extension(ruby, &previous_csv_path, "previous_csv_path") { return value; }
15
-
16
- if let Some(value) =
17
- check_file_extension(ruby, &new_csv_path, "new_csv_path") { return value; }
19
+ validate_csv_extension(ruby, &previous_csv_path, "previous_csv_path")?;
20
+ validate_csv_extension(ruby, &new_csv_path, "new_csv_path")?;
18
21
 
19
- let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
22
+ let csv1 =
23
+ File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
20
24
  let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
21
25
 
22
26
  let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
23
27
 
24
- let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
25
- let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
28
+ let mut previous_csv: Reader<File> = Reader::from_reader(csv1);
29
+ let mut new_csv: Reader<File> = Reader::from_reader(csv2);
26
30
 
27
31
  let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
28
32
 
29
- let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv"))?.clone();
30
- let previous_headers_list : Vec<String> = previous_headers.iter().map(|h| h.to_string()).collect();
31
- let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv"))?.clone();
32
- let new_headers_list : Vec<String> = new_headers.iter().map(|h| h.to_string()).collect();
33
-
34
-
35
- if let Some(value) =
36
- check_mandatory_headers(ruby, &previous_headers_list, &mandatory_headers, "previous_csv") { return value; }
37
-
38
- if let Some(value) =
39
- check_mandatory_headers(ruby, &new_headers_list, &mandatory_headers, "new_csv") { return value; }
40
-
41
- let csv_headers = headers_as_byte_record(mandatory_headers.clone());
42
- wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
43
-
44
- let previous_mandatory_records = get_records(ruby, &mut previous_csv, previous_headers_list, &mandatory_headers)?;
45
- let new_mandatory_records = get_records(ruby, &mut new_csv, new_headers_list, &mandatory_headers)?;
33
+ let previous_headers = previous_csv
34
+ .headers()
35
+ .map_err(|e| magnus_err(ruby, e, "previous_csv"))?
36
+ .clone();
37
+ let previous_headers_list: Vec<String> =
38
+ previous_headers.iter().map(|h| h.to_string()).collect();
39
+ let new_headers = new_csv
40
+ .headers()
41
+ .map_err(|e| magnus_err(ruby, e, "new_csv"))?
42
+ .clone();
43
+ let new_headers_list: Vec<String> = new_headers.iter().map(|h| h.to_string()).collect();
44
+
45
+ if let Some(err) = check_mandatory_headers(
46
+ ruby,
47
+ &previous_headers_list,
48
+ &mandatory_headers,
49
+ "previous_csv",
50
+ ) {
51
+ return err;
52
+ }
46
53
 
47
- for new_record in new_mandatory_records {
48
- if !previous_mandatory_records.contains(&new_record) {
49
- wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
50
- }
54
+ if let Some(err) =
55
+ check_mandatory_headers(ruby, &new_headers_list, &mandatory_headers, "new_csv")
56
+ {
57
+ return err;
51
58
  }
52
59
 
60
+ wtr.write_byte_record(&headers_as_byte_record(mandatory_headers.clone()))
61
+ .map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
62
+
63
+ let previous_records = get_records(
64
+ ruby,
65
+ &mut previous_csv,
66
+ &previous_headers_list,
67
+ &mandatory_headers,
68
+ )?;
69
+ let new_records = get_records(ruby, &mut new_csv, &new_headers_list, &mandatory_headers)?;
70
+
71
+ new_records
72
+ .into_iter()
73
+ .filter(|record| !previous_records.contains(record))
74
+ .try_for_each(|record| {
75
+ wtr.write_byte_record(record.as_byte_record())
76
+ .map_err(|e| magnus_err(ruby, e, "write_byte_record"))
77
+ })?;
78
+
53
79
  wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
54
80
 
55
81
  Ok(())
56
82
  }
57
83
 
58
- fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers: Vec<String>, headers: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
59
- let header_map = create_header_map(&csv_headers);
60
- let mut records = vec![];
61
- for record in csv.records() {
62
- let record = record.map_err(|e| magnus_err(ruby, e, "record error"))?;
63
-
64
- if has_empty_row_skip(&record) { continue; }
65
- if has_empty_first_col_skip_row(&record) { continue; }
66
-
67
- let mut columns = vec![];
68
- for column in headers.iter() {
69
- let column_index = header_map.get(column).ok_or(missing_header(ruby, column))?;
70
- let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
71
- columns.push(column_value.trim_end());
72
- }
73
- let columns = columns.into_iter().collect::<StringRecord>();
74
- records.push(columns);
75
- }
76
- Ok(records)
84
+ fn get_records(
85
+ ruby: &Ruby,
86
+ csv: &mut Reader<File>,
87
+ csv_headers: &[String],
88
+ headers: &[String],
89
+ ) -> magnus::error::Result<Vec<StringRecord>> {
90
+ let header_map = create_header_map(csv_headers);
91
+
92
+ csv.records()
93
+ .filter_map(|record_result| match record_result {
94
+ Ok(record) if !record.is_empty() && !record.has_empty_first_column() => {
95
+ Some(Ok(record))
96
+ }
97
+ Ok(_) => None,
98
+ Err(e) => Some(Err(magnus_err(ruby, e, "record error"))),
99
+ })
100
+ .map(|record_result| {
101
+ let record = record_result?;
102
+
103
+ let columns: Result<Vec<&str>, magnus::Error> = headers
104
+ .iter()
105
+ .map(|column| {
106
+ let column_index = *header_map
107
+ .get(column)
108
+ .ok_or_else(|| missing_header(ruby, column))?;
109
+ record
110
+ .get(column_index)
111
+ .map(|s| s.trim_end())
112
+ .ok_or_else(|| missing_value(ruby, column))
113
+ })
114
+ .collect();
115
+
116
+ Ok(columns?.into_iter().collect::<StringRecord>())
117
+ })
118
+ .collect()
77
119
  }
78
120
 
79
- fn check_file_extension(ruby: &Ruby, csv_path: &String, message: &str) -> Option<magnus::error::Result<()>> {
121
+ fn validate_csv_extension(ruby: &Ruby, csv_path: &str, message: &str) -> magnus::error::Result<()> {
80
122
  if !csv_path.has_extension(&["csv"]) {
81
- return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} must be a csv file", message))));
123
+ return Err(magnus::Error::new(
124
+ ruby.exception_standard_error(),
125
+ format!("{} must be a csv file", message),
126
+ ));
82
127
  }
83
- None
84
- }
85
-
86
- fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
87
- previous_record[0].is_empty()
88
- }
89
-
90
- fn has_empty_row_skip(record: &StringRecord) -> bool {
91
- record.iter().all(|r| r.is_empty())
128
+ Ok(())
92
129
  }
93
-
94
-