patchwork_csv_utils 0.1.11-x86_64-linux → 0.1.13-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2886e4c0dad30530b1462bdbed9932d66c3c1e3e480b79cbc9d707299b96d8d
4
- data.tar.gz: 7875cce3aea97aac78e05d15224d7f0fd0c70677ffce3bbd8d37a1fb9c034bc9
3
+ metadata.gz: a786c19115a7385e97f6eb02a4b59d60c39eb28e92dcee390ed268ba28b44c4e
4
+ data.tar.gz: 871457cba4388fdb138aa450e201f14aa387279e8181889d9e1bdf37b6c4140a
5
5
  SHA512:
6
- metadata.gz: 98016cc44bb7105647f1d30394da6b977e2bf6dd3ca4bcec4697ec54fa6728810606304f492376d837ccff2c57ce0ed9bd96c9db8412bd81af9cd84d077bdc37
7
- data.tar.gz: 9184db69f8cc88e6b2f57933c73f957bd45fd68ba100325a20f0d38f63830be83b4c3882673008bb41b983a431855a93764090abda6116450eb4f971f23f25eb
6
+ metadata.gz: 535c10c53645b0220289e1c0f98e170fa108c4d841c33023486a5acde941f0237e8c002f25e0211dceb1cb08edfdf73e13cb7dd6fda27721a2b5d85c24bd7ea1
7
+ data.tar.gz: d03168eef93bb5a6bf6e16e75ca2a80f148f04538776f1828aec54750c677c97a32eac682ecfc9238c74d217c34b5e5060d03fd924483277fafa7b5896df4f94
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.11)
4
+ patchwork_csv_utils (0.1.13)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -5,11 +5,11 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
5
5
  use csv::{Reader, StringRecord, Writer};
6
6
  use magnus::{Error, RArray, Ruby};
7
7
 
8
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record};
8
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
9
9
 
10
10
  pub fn transform_csv(ruby: &Ruby, csv_path: String,
11
11
  target_path: String, exclusions: RArray,
12
- mandatory_headers: RArray,) -> magnus::error::Result<()> {
12
+ mandatory_headers: RArray, ) -> magnus::error::Result<()> {
13
13
  if !csv_path.has_extension(&["csv"]) {
14
14
  return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
15
15
  }
@@ -21,13 +21,13 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
21
21
  let mut csv: Reader<File> = Reader::from_reader(csv_file);
22
22
  let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
23
  let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?.clone();
24
- let headers_list : Vec<String> = headers.iter().map(|h| h.to_string()).collect();
24
+ let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
25
25
 
26
26
  if let Some(value) =
27
27
  check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
28
28
 
29
- let header_map: HashMap<String, usize> = create_header_map(&headers);
30
- let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
29
+ let header_map: HashMap<String, usize> = create_header_map(&mandatory_headers);
30
+ let inverse_header_map: HashMap<usize, String> = mandatory_headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
31
31
 
32
32
  let csv_headers = headers_as_byte_record(mandatory_headers.clone());
33
33
  wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
@@ -39,8 +39,9 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
39
39
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
40
40
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
41
41
 
42
- for (ri, record) in csv.records().enumerate() {
43
- let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
42
+ let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
43
+
44
+ for (ri, record) in mandatory_records.iter().enumerate() {
44
45
 
45
46
  if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
46
47
  if has_empty_row_skip(&record) { continue; }
@@ -79,6 +80,28 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
79
80
  Ok(())
80
81
  }
81
82
 
83
+ fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
84
+ let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
85
+
86
+ let mut records = vec![];
87
+ for row in csv.records() {
88
+ let row = row.map_err(|e| magnus_err(ruby, e, "record error"))?;
89
+ let mut columns = vec![];
90
+ for (i, column_value) in row.iter().enumerate() {
91
+ let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
92
+ if mandatory_headers_list.contains(column_name) {
93
+ let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
94
+ columns.push(CsvMandatoryColumn::new(column_value.to_string(), index));
95
+ }
96
+ }
97
+ columns.sort_by(|a, b| a.index.cmp(&b.index));
98
+ let columns = columns.iter().map(|c| c.value.to_string()).collect::<StringRecord>();
99
+ records.push(columns);
100
+ }
101
+
102
+ Ok(records)
103
+ }
104
+
82
105
  fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
83
106
  let maybe_correct = correct_datetime(c);
84
107
  if let Some(correct) = maybe_correct {
@@ -126,4 +149,16 @@ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
126
149
 
127
150
  fn has_empty_row_skip(record: &StringRecord) -> bool {
128
151
  record.iter().all(|r| r.is_empty())
152
+ }
153
+
154
+ #[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
155
+ struct CsvMandatoryColumn {
156
+ value: String,
157
+ index: usize,
158
+ }
159
+
160
+ impl CsvMandatoryColumn {
161
+ fn new(value: String, index: usize) -> Self {
162
+ CsvMandatoryColumn { value, index }
163
+ }
129
164
  }
@@ -41,8 +41,8 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String,
41
41
  let csv_headers = headers_as_byte_record(mandatory_headers.clone());
42
42
  wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
43
43
 
44
- let previous_mandatory_records = get_records(ruby, &mut previous_csv, &previous_headers, &mandatory_headers)?;
45
- let new_mandatory_records = get_records(ruby, &mut new_csv, &new_headers, &mandatory_headers)?;
44
+ let previous_mandatory_records = get_records(ruby, &mut previous_csv, previous_headers_list, &mandatory_headers)?;
45
+ let new_mandatory_records = get_records(ruby, &mut new_csv, new_headers_list, &mandatory_headers)?;
46
46
 
47
47
  for new_record in new_mandatory_records {
48
48
  if !previous_mandatory_records.contains(&new_record) {
@@ -55,7 +55,7 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String,
55
55
  Ok(())
56
56
  }
57
57
 
58
- fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers: &StringRecord, headers: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
58
+ fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers: Vec<String>, headers: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
59
59
  let header_map = create_header_map(&csv_headers);
60
60
  let mut records = vec![];
61
61
  for record in csv.records() {
@@ -1,4 +1,4 @@
1
- use std::collections::HashMap;
1
+ use std::collections::{HashMap, HashSet};
2
2
  use std::error::Error;
3
3
  use std::ffi::OsStr;
4
4
  use std::path::Path;
@@ -37,18 +37,29 @@ fn check_mandatory_headers(ruby: &Ruby, headers: &Vec<String>, mandatory_headers
37
37
  return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} has no mandatory headers", message))));
38
38
  }
39
39
 
40
- if &csv_mandatory_headers != mandatory_headers {
41
- let missing_headers = mandatory_headers.into_iter().filter(|h| !csv_mandatory_headers.contains(h)).map(|v| v.to_string()).collect::<Vec<String>>();
40
+ let csv_mandatory_headers = csv_mandatory_headers.to_owned().clone();
41
+ let mandatory_headers = mandatory_headers.to_owned().clone();
42
+
43
+ let set1 = csv_mandatory_headers.iter().collect::<HashSet<_>>();
44
+ let set2 = mandatory_headers.iter().collect::<HashSet<_>>();
45
+ let difference = set2.difference(&set1).collect::<Vec<_>>();
46
+
47
+ if !difference.is_empty() {
48
+ let missing_headers = difference.iter().map(|h| h.to_string()).collect::<Vec<String>>();
42
49
  return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} is missing mandatory headers: {}", message, missing_headers.join(", ")))));
43
50
  }
44
51
  None
45
52
  }
46
53
 
54
+ fn index_of_header_in_mandatory_list(mandatory_headers_list: Vec<String>, column_name: String) -> Option<usize> {
55
+ mandatory_headers_list.iter().position(|h| h.to_string() == column_name)
56
+ }
57
+
47
58
  fn filter_headers(csv_headers: &Vec<String>, expected_headers: &Vec<String>) -> Vec<String> {
48
59
  csv_headers.iter().map(|v| v.to_string()).filter(|h| expected_headers.contains(h)).collect::<Vec<String>>()
49
60
  }
50
61
 
51
- fn create_header_map(headers: &StringRecord) -> HashMap<String, usize> {
62
+ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
52
63
  headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect()
53
64
  }
54
65
 
@@ -66,4 +77,5 @@ impl<P: AsRef<Path>> FileExtension for P {
66
77
 
67
78
  false
68
79
  }
69
- }
80
+ }
81
+
@@ -6,7 +6,7 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
6
6
  use chrono::{NaiveDateTime, Utc};
7
7
  use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value};
9
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
10
10
 
11
11
  pub fn to_csv(ruby: &Ruby, xls_path: String,
12
12
  target_path: String,
@@ -31,17 +31,18 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
31
31
  if let Some(value) =
32
32
  check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
33
33
 
34
- let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
34
+ let header_map: HashMap<String, usize> = mandatory_headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
35
35
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
36
36
  let mut dest = BufWriter::new(csv_out_file);
37
37
 
38
- write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers)
38
+ write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
39
39
  }
40
40
 
41
41
  fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
42
- header_map: HashMap<String, usize>, exclusions: Vec<String>, mandatory_headers: Vec<String>) -> magnus::error::Result<()> {
43
- let n = range.get_size().1 - 1;
44
-
42
+ header_map: HashMap<String, usize>, exclusions: Vec<String>,
43
+ mandatory_headers: Vec<String>,
44
+ headers_list: Vec<String>) -> magnus::error::Result<()> {
45
+ let n = mandatory_headers.len() - 1;
45
46
  let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
46
47
  let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
47
48
  let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
@@ -49,13 +50,15 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
49
50
  let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
50
51
  let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
51
52
 
52
- for (ri, r) in range.rows().enumerate() {
53
+ let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
54
+
55
+ for (ri, r) in mandatory_rows.into_iter().enumerate() {
53
56
  let mut date_value = Utc::now().naive_utc();
54
57
 
55
- if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
56
- if skip_empty_rows(r) { continue; }
57
- if skip_rows_with_no_request_id(&request_id, r) { continue; }
58
- if date_value_is_not_present(&date, r) {
58
+ if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
59
+ if skip_empty_rows(&r) { continue; }
60
+ if skip_rows_with_no_request_id(&request_id, &r) { continue; }
61
+ if date_value_is_not_present(&date, &r) {
59
62
  return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
60
63
  }
61
64
 
@@ -92,21 +95,42 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
92
95
  Ok(())
93
96
  }
94
97
 
95
- fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
96
- r[*date] == Data::Empty
98
+ fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
99
+ let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
100
+
101
+ let mut records = vec![];
102
+ for row in range.rows() {
103
+ let mut columns = vec![];
104
+ for (i, column_value) in row.iter().enumerate() {
105
+ let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
106
+ if mandatory_headers_list.contains(column_name) {
107
+ let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
108
+ columns.push(XlsMandatoryColumn::new(column_value, index));
109
+ }
110
+ }
111
+ columns.sort_by(|a, b| a.index.cmp(&b.index));
112
+ let columns = columns.iter().map(|c| c.value).collect::<Vec<&Data>>();
113
+ records.push(columns);
114
+ }
115
+
116
+ Ok(records)
117
+ }
118
+
119
+ fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
120
+ r[*date] == &Data::Empty
97
121
  }
98
122
 
99
- fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
123
+ fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
100
124
  let value = r[*request_id].to_string();
101
125
  exclusions.contains(&value.to_string())
102
126
  }
103
127
 
104
- fn skip_empty_rows(r: &[Data]) -> bool {
105
- r.iter().all(|c| c == &Data::Empty)
128
+ fn skip_empty_rows(r: &Vec<&Data>) -> bool {
129
+ r.into_iter().all(|c| c == &&Data::Empty)
106
130
  }
107
131
 
108
- fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
109
- r[*request_id] == Data::Empty
132
+ fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
133
+ r[*request_id] == &Data::Empty
110
134
  }
111
135
 
112
136
  fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
@@ -126,4 +150,15 @@ fn clean_strings(s: &str) -> String {
126
150
  .replace("\r", " ")
127
151
  .replace("\"", "")
128
152
  .replace("'", "")
129
- }
153
+ }
154
+
155
+ struct XlsMandatoryColumn<'a> {
156
+ value: &'a Data,
157
+ index: usize,
158
+ }
159
+
160
+ impl<'a> XlsMandatoryColumn<'a> {
161
+ fn new(value: &'a Data, index: usize) -> Self {
162
+ XlsMandatoryColumn { value, index }
163
+ }
164
+ }
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.11'
4
+ VERSION = '0.1.13'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.13
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-09-03 00:00:00.000000000 Z
11
+ date: 2024-09-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email: