patchwork_csv_utils 0.1.10-x86_64-linux → 0.1.12-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55b3b6982ebaadca79bd6b458a51be253d07c6e58bc7270f9fde70f8b535d349
4
- data.tar.gz: ba693a5d934e92637279159d61f47b16cb9ec4594b5afa22fa2cd279ed2983a7
3
+ metadata.gz: 7f1ff8917cb0cdd1a3f57ac35dcada6b4eaca668d5c543686123c52c82eeb122
4
+ data.tar.gz: 6068c35072c1c2ff03359f62f7af722552e81fe01b0f6b9087cc13610c1d21cd
5
5
  SHA512:
6
- metadata.gz: 4a6bb7dbe3b9fcdb7d15335eca40489b5237f71f1145479c6f3aa588f70891a5f8f15e1e73cf3c38fdde59c2df2a5dce84a04404cfeacbb6508474f74b2bebf4
7
- data.tar.gz: dd040e99cfebdf8c2672fda983c5d444e70c581fba101b0b35ebcfa46833b46dec12f9765f16833a42bfd248739be717292a485f5bfd719c7a69fd91791d4a9a
6
+ metadata.gz: 6a4f942409488f71d987cf79bf22a772974965c5ad027d02ef32229909fad20132e50b08f7f5e93f1fe9dd64e1bfa9933a4d492ed14f6ddc4ed3feb137a3fc3e
7
+ data.tar.gz: eaef6d732137590f8d010442655edfbd17135b98ad961da4a0831644121e67564b674fe78a9877a52dee1a15ea8ffaa6e8a7fdb6343e0e106e60316c2813dec6
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.10)
4
+ patchwork_csv_utils (0.1.12)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -13,9 +13,9 @@ gem install patchwork_csv_utils
13
13
 
14
14
  ```irb
15
15
  require 'csv_utils'
16
- CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
17
- CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
- CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
16
+ CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv', ['mandatory_headers'])
17
+ CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
18
+ CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
19
19
  ```
20
20
 
21
21
  ## Release
@@ -8,8 +8,8 @@ pub mod utils;
8
8
  #[magnus::init]
9
9
  fn init() -> Result<(), magnus::Error> {
10
10
  let module = define_module("CsvUtils")?;
11
- module.define_singleton_method("dedup", function!(dedup, 3))?;
12
- module.define_singleton_method("to_csv", function!(to_csv, 3))?;
13
- module.define_singleton_method("transform_csv", function!(transform_csv, 3))?;
11
+ module.define_singleton_method("dedup", function!(dedup, 4))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 4))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 4))?;
14
14
  Ok(())
15
15
  }
@@ -2,26 +2,35 @@ use std::collections::HashMap;
2
2
  use std::fs::File;
3
3
 
4
4
  use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
5
- use csv::{StringRecord, Writer};
5
+ use csv::{Reader, StringRecord, Writer};
6
6
  use magnus::{Error, RArray, Ruby};
7
7
 
8
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
8
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
9
9
 
10
- pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
10
+ pub fn transform_csv(ruby: &Ruby, csv_path: String,
11
+ target_path: String, exclusions: RArray,
12
+ mandatory_headers: RArray, ) -> magnus::error::Result<()> {
11
13
  if !csv_path.has_extension(&["csv"]) {
12
- return Err(magnus::Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
14
+ return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
13
15
  }
14
16
 
15
17
  let exclusions = RArray::to_vec(exclusions)?;
18
+ let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
16
19
 
17
20
  let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
18
- let mut csv: csv::Reader<File> = csv::Reader::from_reader(csv_file);
21
+ let mut csv: Reader<File> = Reader::from_reader(csv_file);
19
22
  let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
20
- let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
21
- let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
22
- let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
23
+ let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?.clone();
24
+ let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
23
25
 
24
- wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
26
+ if let Some(value) =
27
+ check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
28
+
29
+ let header_map: HashMap<String, usize> = create_header_map(&mandatory_headers);
30
+ let inverse_header_map: HashMap<usize, String> = mandatory_headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
31
+
32
+ let csv_headers = headers_as_byte_record(mandatory_headers.clone());
33
+ wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
25
34
 
26
35
  let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
27
36
  let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
@@ -30,8 +39,9 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
30
39
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
31
40
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
32
41
 
33
- for (ri, record) in csv.records().enumerate() {
34
- let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
42
+ let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
43
+
44
+ for (ri, record) in mandatory_records.iter().enumerate() {
35
45
 
36
46
  if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
37
47
  if has_empty_row_skip(&record) { continue; }
@@ -39,22 +49,29 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
39
49
 
40
50
  let mut date_value = Utc::now().naive_utc();
41
51
 
42
- let record = record.iter().enumerate().map(|(i, c)| {
43
- let c = c.trim_end();
52
+ let mut columns = vec![];
53
+ for (i, column) in mandatory_headers.iter().enumerate() {
54
+ let column_index = header_map.get(column).ok_or(missing_header(ruby, column))?;
55
+ let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
56
+ let column_value = column_value.trim_end();
57
+
44
58
  if i == *date {
45
- let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
59
+ let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
46
60
  date_value = current;
47
- Ok(current.to_string())
61
+ columns.push(current.to_string());
48
62
  } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
49
- if c.is_empty() { return Ok(c.to_string()); }
50
- let column_name = get_column_name(&inverse_header_map, &i);
51
- process_datetime(ruby, ri, date_value, c, &column_name)
63
+ if column_value.is_empty() {
64
+ columns.push(column_value.to_string());
65
+ } else {
66
+ let column_name = get_column_name(&inverse_header_map, &i);
67
+ let current = process_datetime(ruby, ri, date_value, column_value, &column_name)?;
68
+ columns.push(current);
69
+ }
52
70
  } else {
53
- Ok(c.to_string())
71
+ columns.push(column_value.to_string());
54
72
  }
55
- }).collect::<Result<StringRecord, magnus::Error>>()?;
56
-
57
- let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
73
+ }
74
+ let record = columns.into_iter().collect::<StringRecord>();
58
75
  wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
59
76
  }
60
77
 
@@ -63,6 +80,28 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
63
80
  Ok(())
64
81
  }
65
82
 
83
+ fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
84
+ let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
85
+
86
+ let mut records = vec![];
87
+ for row in csv.records() {
88
+ let row = row.map_err(|e| magnus_err(ruby, e, "record error"))?;
89
+ let mut columns = vec![];
90
+ for (i, column_value) in row.iter().enumerate() {
91
+ let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
92
+ if mandatory_headers_list.contains(column_name) {
93
+ let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
94
+ columns.push(CsvMandatoryColumn::new(column_value.to_string(), index));
95
+ }
96
+ }
97
+ columns.sort_by(|a, b| a.index.cmp(&b.index));
98
+ let columns = columns.iter().map(|c| c.value.to_string()).collect::<StringRecord>();
99
+ records.push(columns);
100
+ }
101
+
102
+ Ok(records)
103
+ }
104
+
66
105
  fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
67
106
  let maybe_correct = correct_datetime(c);
68
107
  if let Some(correct) = maybe_correct {
@@ -110,4 +149,16 @@ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
110
149
 
111
150
  fn has_empty_row_skip(record: &StringRecord) -> bool {
112
151
  record.iter().all(|r| r.is_empty())
152
+ }
153
+
154
+ #[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
155
+ struct CsvMandatoryColumn {
156
+ value: String,
157
+ index: usize,
158
+ }
159
+
160
+ impl CsvMandatoryColumn {
161
+ fn new(value: String, index: usize) -> Self {
162
+ CsvMandatoryColumn { value, index }
163
+ }
113
164
  }
@@ -1,54 +1,51 @@
1
1
  use std::fs::File;
2
2
 
3
- use csv::{StringRecord, Writer};
4
- use magnus::Ruby;
3
+ use csv::{Reader, StringRecord, Writer};
4
+ use magnus::{RArray, Ruby};
5
5
 
6
- use crate::utils::{FileExtension, magnus_err};
6
+ use crate::utils::{FileExtension, magnus_err, check_mandatory_headers, create_header_map, missing_header, missing_value, headers_as_byte_record};
7
7
 
8
- pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
9
- if !previous_csv_path.has_extension(&["csv"]) {
10
- return Err(magnus::Error::new(ruby.exception_standard_error(), "previous_csv_path must be a csv file".to_string()));
11
- }
12
- if !new_csv_path.has_extension(&["csv"]) {
13
- return Err(magnus::Error::new(ruby.exception_standard_error(), "new_csv_path must be a csv file".to_string()));
14
- }
8
+ pub fn dedup(ruby: &Ruby, previous_csv_path: String,
9
+ new_csv_path: String,
10
+ target_path: String,
11
+ mandatory_headers: RArray,
12
+ ) -> magnus::error::Result<()> {
13
+ if let Some(value) =
14
+ check_file_extension(ruby, &previous_csv_path, "previous_csv_path") { return value; }
15
+
16
+ if let Some(value) =
17
+ check_file_extension(ruby, &new_csv_path, "new_csv_path") { return value; }
15
18
 
16
19
  let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
17
20
  let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
18
21
 
22
+ let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
23
+
19
24
  let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
20
25
  let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
21
26
 
22
27
  let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
28
 
24
- let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
25
- let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
29
+ let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv"))?.clone();
30
+ let previous_headers_list : Vec<String> = previous_headers.iter().map(|h| h.to_string()).collect();
31
+ let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv"))?.clone();
32
+ let new_headers_list : Vec<String> = new_headers.iter().map(|h| h.to_string()).collect();
26
33
 
27
- if previous_headers != new_headers {
28
- return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
29
- }
30
34
 
31
- wtr.write_byte_record(previous_headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
35
+ if let Some(value) =
36
+ check_mandatory_headers(ruby, &previous_headers_list, &mandatory_headers, "previous_csv") { return value; }
32
37
 
33
- let mut previous_records = vec![];
34
- for previous_record in previous_csv.records() {
35
- let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
38
+ if let Some(value) =
39
+ check_mandatory_headers(ruby, &new_headers_list, &mandatory_headers, "new_csv") { return value; }
36
40
 
37
- if has_empty_row_skip(&previous_record) { continue; }
38
- if has_empty_first_col_skip_row(&previous_record) { continue; }
41
+ let csv_headers = headers_as_byte_record(mandatory_headers.clone());
42
+ wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
39
43
 
40
- let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
41
- previous_records.push(previous_record)
42
- }
43
-
44
- for new_record in new_csv.records() {
45
- let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
44
+ let previous_mandatory_records = get_records(ruby, &mut previous_csv, previous_headers_list, &mandatory_headers)?;
45
+ let new_mandatory_records = get_records(ruby, &mut new_csv, new_headers_list, &mandatory_headers)?;
46
46
 
47
- if has_empty_row_skip(&new_record) { continue; }
48
- if has_empty_first_col_skip_row(&new_record) { continue; }
49
-
50
- let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
51
- if !previous_records.contains(&new_record) {
47
+ for new_record in new_mandatory_records {
48
+ if !previous_mandatory_records.contains(&new_record) {
52
49
  wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
53
50
  }
54
51
  }
@@ -58,6 +55,34 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
58
55
  Ok(())
59
56
  }
60
57
 
58
+ fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers: Vec<String>, headers: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
59
+ let header_map = create_header_map(&csv_headers);
60
+ let mut records = vec![];
61
+ for record in csv.records() {
62
+ let record = record.map_err(|e| magnus_err(ruby, e, "record error"))?;
63
+
64
+ if has_empty_row_skip(&record) { continue; }
65
+ if has_empty_first_col_skip_row(&record) { continue; }
66
+
67
+ let mut columns = vec![];
68
+ for column in headers.iter() {
69
+ let column_index = header_map.get(column).ok_or(missing_header(ruby, column))?;
70
+ let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
71
+ columns.push(column_value.trim_end());
72
+ }
73
+ let columns = columns.into_iter().collect::<StringRecord>();
74
+ records.push(columns);
75
+ }
76
+ Ok(records)
77
+ }
78
+
79
+ fn check_file_extension(ruby: &Ruby, csv_path: &String, message: &str) -> Option<magnus::error::Result<()>> {
80
+ if !csv_path.has_extension(&["csv"]) {
81
+ return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} must be a csv file", message))));
82
+ }
83
+ None
84
+ }
85
+
61
86
  fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
62
87
  previous_record[0].is_empty()
63
88
  }
@@ -1,6 +1,8 @@
1
+ use std::collections::{HashMap, HashSet};
1
2
  use std::error::Error;
2
3
  use std::ffi::OsStr;
3
4
  use std::path::Path;
5
+ use ::csv::{ByteRecord, StringRecord};
4
6
  use magnus::Ruby;
5
7
 
6
8
  pub mod csv;
@@ -11,6 +13,15 @@ fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
11
13
  magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
12
14
  }
13
15
 
16
+ fn missing_value(ruby: &Ruby, header: &str) -> magnus::Error {
17
+ magnus::Error::new(ruby.exception_standard_error(), format!("Missing value for '{}' header", header))
18
+ }
19
+
20
+ fn headers_as_byte_record(headers: Vec<String>) -> ByteRecord {
21
+ let string_record = headers.into_iter().collect::<StringRecord>();
22
+ string_record.as_byte_record().clone()
23
+ }
24
+
14
25
  fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
15
26
  magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
16
27
  }
@@ -19,6 +30,39 @@ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus:
19
30
  magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
20
31
  }
21
32
 
33
+ fn check_mandatory_headers(ruby: &Ruby, headers: &Vec<String>, mandatory_headers: &Vec<String>, message: &str) -> Option<magnus::error::Result<()>> {
34
+ let csv_mandatory_headers = filter_headers(headers, mandatory_headers);
35
+
36
+ if csv_mandatory_headers.is_empty() {
37
+ return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} has no mandatory headers", message))));
38
+ }
39
+
40
+ let csv_mandatory_headers = csv_mandatory_headers.to_owned().clone();
41
+ let mandatory_headers = mandatory_headers.to_owned().clone();
42
+
43
+ let set1 = csv_mandatory_headers.iter().collect::<HashSet<_>>();
44
+ let set2 = mandatory_headers.iter().collect::<HashSet<_>>();
45
+ let difference = set2.difference(&set1).collect::<Vec<_>>();
46
+
47
+ if !difference.is_empty() {
48
+ let missing_headers = difference.iter().map(|h| h.to_string()).collect::<Vec<String>>();
49
+ return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} is missing mandatory headers: {}", message, missing_headers.join(", ")))));
50
+ }
51
+ None
52
+ }
53
+
54
+ fn index_of_header_in_mandatory_list(mandatory_headers_list: Vec<String>, column_name: String) -> Option<usize> {
55
+ mandatory_headers_list.iter().position(|h| h.to_string() == column_name)
56
+ }
57
+
58
+ fn filter_headers(csv_headers: &Vec<String>, expected_headers: &Vec<String>) -> Vec<String> {
59
+ csv_headers.iter().map(|v| v.to_string()).filter(|h| expected_headers.contains(h)).collect::<Vec<String>>()
60
+ }
61
+
62
+ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
63
+ headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect()
64
+ }
65
+
22
66
  pub trait FileExtension {
23
67
  fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
24
68
  }
@@ -33,4 +77,5 @@ impl<P: AsRef<Path>> FileExtension for P {
33
77
 
34
78
  false
35
79
  }
36
- }
80
+ }
81
+
@@ -6,14 +6,19 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
6
6
  use chrono::{NaiveDateTime, Utc};
7
7
  use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
9
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
10
10
 
11
- pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
11
+ pub fn to_csv(ruby: &Ruby, xls_path: String,
12
+ target_path: String,
13
+ exclusions: RArray,
14
+ mandatory_headers: RArray
15
+ ) -> magnus::error::Result<()> {
12
16
  if !xls_path.has_extension(&["xls"]) {
13
17
  return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
14
18
  }
15
19
 
16
20
  let exclusions = RArray::to_vec(exclusions)?;
21
+ let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
17
22
 
18
23
  let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
19
24
  let range = workbook.worksheet_range_at(0)
@@ -21,14 +26,22 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RA
21
26
  .and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
22
27
 
23
28
  let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
24
- let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
29
+ let headers_list : Vec<String> = headers.iter().map(|h| h.to_string()).collect();
30
+
31
+ if let Some(value) =
32
+ check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
33
+
34
+ let header_map: HashMap<String, usize> = mandatory_headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
25
35
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
26
36
  let mut dest = BufWriter::new(csv_out_file);
27
37
 
28
- write_csv(ruby, &mut dest, &range, header_map, exclusions)
38
+ write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
29
39
  }
30
40
 
31
- fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
41
+ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
42
+ header_map: HashMap<String, usize>, exclusions: Vec<String>,
43
+ mandatory_headers: Vec<String>,
44
+ headers_list: Vec<String>) -> magnus::error::Result<()> {
32
45
  let n = range.get_size().1 - 1;
33
46
 
34
47
  let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
@@ -38,17 +51,23 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
38
51
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
39
52
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
40
53
 
41
- for (ri, r) in range.rows().enumerate() {
54
+ let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
55
+
56
+ for (ri, r) in mandatory_rows.into_iter().enumerate() {
42
57
  let mut date_value = Utc::now().naive_utc();
43
58
 
44
- if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
45
- if skip_empty_rows(r) { continue; }
46
- if skip_rows_with_no_request_id(&request_id, r) { continue; }
47
- if date_value_is_not_present(&date, r) {
59
+ if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
60
+ if skip_empty_rows(&r) { continue; }
61
+ if skip_rows_with_no_request_id(&request_id, &r) { continue; }
62
+ if date_value_is_not_present(&date, &r) {
48
63
  return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
49
64
  }
50
65
 
51
- for (i, c) in r.iter().enumerate() {
66
+ for (i, c) in mandatory_headers.iter().enumerate() {
67
+
68
+ let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
69
+ let c = r.get(*column_index).ok_or(missing_value(ruby, c))?;
70
+
52
71
  match *c {
53
72
  Data::Empty => Ok(()),
54
73
  Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
@@ -77,21 +96,42 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
77
96
  Ok(())
78
97
  }
79
98
 
80
- fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
81
- r[*date] == Data::Empty
99
+ fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
100
+ let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
101
+
102
+ let mut records = vec![];
103
+ for row in range.rows() {
104
+ let mut columns = vec![];
105
+ for (i, column_value) in row.iter().enumerate() {
106
+ let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
107
+ if mandatory_headers_list.contains(column_name) {
108
+ let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
109
+ columns.push(XlsMandatoryColumn::new(column_value, index));
110
+ }
111
+ }
112
+ columns.sort_by(|a, b| a.index.cmp(&b.index));
113
+ let columns = columns.iter().map(|c| c.value).collect::<Vec<&Data>>();
114
+ records.push(columns);
115
+ }
116
+
117
+ Ok(records)
82
118
  }
83
119
 
84
- fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
120
+ fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
121
+ r[*date] == &Data::Empty
122
+ }
123
+
124
+ fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
85
125
  let value = r[*request_id].to_string();
86
126
  exclusions.contains(&value.to_string())
87
127
  }
88
128
 
89
- fn skip_empty_rows(r: &[Data]) -> bool {
90
- r.iter().all(|c| c == &Data::Empty)
129
+ fn skip_empty_rows(r: &Vec<&Data>) -> bool {
130
+ r.into_iter().all(|c| c == &&Data::Empty)
91
131
  }
92
132
 
93
- fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
94
- r[*request_id] == Data::Empty
133
+ fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
134
+ r[*request_id] == &Data::Empty
95
135
  }
96
136
 
97
137
  fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
@@ -111,4 +151,15 @@ fn clean_strings(s: &str) -> String {
111
151
  .replace("\r", " ")
112
152
  .replace("\"", "")
113
153
  .replace("'", "")
114
- }
154
+ }
155
+
156
+ struct XlsMandatoryColumn<'a> {
157
+ value: &'a Data,
158
+ index: usize,
159
+ }
160
+
161
+ impl<'a> XlsMandatoryColumn<'a> {
162
+ fn new(value: &'a Data, index: usize) -> Self {
163
+ XlsMandatoryColumn { value, index }
164
+ }
165
+ }
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.10'
4
+ VERSION = '0.1.12'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.12
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-09 00:00:00.000000000 Z
11
+ date: 2024-09-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email: