patchwork_csv_utils 0.1.10-x86_64-darwin → 0.1.12-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/ext/csv_utils/src/lib.rs +3 -3
- data/ext/csv_utils/src/utils/csv.rs +73 -22
- data/ext/csv_utils/src/utils/dedup.rs +57 -32
- data/ext/csv_utils/src/utils/mod.rs +46 -1
- data/ext/csv_utils/src/utils/xls.rs +70 -19
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6641485d47a1ac2560273f63ee4666a84648b03c2fd493fdbf822f71b95b5058
|
4
|
+
data.tar.gz: 1c0d98407df7fbd7b1d50a16457cc672a6542bf854655ac402b546ff58fed05c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0e184033fa2e6ed8418ccafd4f7747aa8111e490167031295efbc147f5d1a12f991ff3934af269e65872c3f1d2ef2140c11160ce8fb6143b52e4f790c4ad5cf
|
7
|
+
data.tar.gz: d767afc020114a7ae899ac63e775f20e3bdc4547aecaae86199d6ca87d08c982a32397447d2340a6fd4aa3872f77624f46219803eaa186ad2cd37a286992b194
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -13,9 +13,9 @@ gem install patchwork_csv_utils
|
|
13
13
|
|
14
14
|
```irb
|
15
15
|
require 'csv_utils'
|
16
|
-
CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
|
17
|
-
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
|
18
|
-
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
|
16
|
+
CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv', ['mandatory_headers'])
|
17
|
+
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
18
|
+
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
19
19
|
```
|
20
20
|
|
21
21
|
## Release
|
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -8,8 +8,8 @@ pub mod utils;
|
|
8
8
|
#[magnus::init]
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
|
-
module.define_singleton_method("dedup", function!(dedup,
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
11
|
+
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 4))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 4))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -2,26 +2,35 @@ use std::collections::HashMap;
|
|
2
2
|
use std::fs::File;
|
3
3
|
|
4
4
|
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
5
|
-
use csv::{StringRecord, Writer};
|
5
|
+
use csv::{Reader, StringRecord, Writer};
|
6
6
|
use magnus::{Error, RArray, Ruby};
|
7
7
|
|
8
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
|
8
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
|
9
9
|
|
10
|
-
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
10
|
+
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
11
|
+
target_path: String, exclusions: RArray,
|
12
|
+
mandatory_headers: RArray, ) -> magnus::error::Result<()> {
|
11
13
|
if !csv_path.has_extension(&["csv"]) {
|
12
|
-
return Err(
|
14
|
+
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
13
15
|
}
|
14
16
|
|
15
17
|
let exclusions = RArray::to_vec(exclusions)?;
|
18
|
+
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
16
19
|
|
17
20
|
let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
|
18
|
-
let mut csv:
|
21
|
+
let mut csv: Reader<File> = Reader::from_reader(csv_file);
|
19
22
|
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
20
|
-
let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))
|
21
|
-
let
|
22
|
-
let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
23
|
+
let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?.clone();
|
24
|
+
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
23
25
|
|
24
|
-
|
26
|
+
if let Some(value) =
|
27
|
+
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
28
|
+
|
29
|
+
let header_map: HashMap<String, usize> = create_header_map(&mandatory_headers);
|
30
|
+
let inverse_header_map: HashMap<usize, String> = mandatory_headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
31
|
+
|
32
|
+
let csv_headers = headers_as_byte_record(mandatory_headers.clone());
|
33
|
+
wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
25
34
|
|
26
35
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
27
36
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -30,8 +39,9 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
30
39
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
31
40
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
32
41
|
|
33
|
-
|
34
|
-
|
42
|
+
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
43
|
+
|
44
|
+
for (ri, record) in mandatory_records.iter().enumerate() {
|
35
45
|
|
36
46
|
if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
|
37
47
|
if has_empty_row_skip(&record) { continue; }
|
@@ -39,22 +49,29 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
39
49
|
|
40
50
|
let mut date_value = Utc::now().naive_utc();
|
41
51
|
|
42
|
-
let
|
43
|
-
|
52
|
+
let mut columns = vec![];
|
53
|
+
for (i, column) in mandatory_headers.iter().enumerate() {
|
54
|
+
let column_index = header_map.get(column).ok_or(missing_header(ruby, column))?;
|
55
|
+
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
56
|
+
let column_value = column_value.trim_end();
|
57
|
+
|
44
58
|
if i == *date {
|
45
|
-
let current = string_to_datetime(
|
59
|
+
let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
|
46
60
|
date_value = current;
|
47
|
-
|
61
|
+
columns.push(current.to_string());
|
48
62
|
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
49
|
-
if
|
50
|
-
|
51
|
-
|
63
|
+
if column_value.is_empty() {
|
64
|
+
columns.push(column_value.to_string());
|
65
|
+
} else {
|
66
|
+
let column_name = get_column_name(&inverse_header_map, &i);
|
67
|
+
let current = process_datetime(ruby, ri, date_value, column_value, &column_name)?;
|
68
|
+
columns.push(current);
|
69
|
+
}
|
52
70
|
} else {
|
53
|
-
|
71
|
+
columns.push(column_value.to_string());
|
54
72
|
}
|
55
|
-
}
|
56
|
-
|
57
|
-
let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
73
|
+
}
|
74
|
+
let record = columns.into_iter().collect::<StringRecord>();
|
58
75
|
wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
59
76
|
}
|
60
77
|
|
@@ -63,6 +80,28 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
63
80
|
Ok(())
|
64
81
|
}
|
65
82
|
|
83
|
+
fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
|
84
|
+
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
85
|
+
|
86
|
+
let mut records = vec![];
|
87
|
+
for row in csv.records() {
|
88
|
+
let row = row.map_err(|e| magnus_err(ruby, e, "record error"))?;
|
89
|
+
let mut columns = vec![];
|
90
|
+
for (i, column_value) in row.iter().enumerate() {
|
91
|
+
let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
|
92
|
+
if mandatory_headers_list.contains(column_name) {
|
93
|
+
let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
|
94
|
+
columns.push(CsvMandatoryColumn::new(column_value.to_string(), index));
|
95
|
+
}
|
96
|
+
}
|
97
|
+
columns.sort_by(|a, b| a.index.cmp(&b.index));
|
98
|
+
let columns = columns.iter().map(|c| c.value.to_string()).collect::<StringRecord>();
|
99
|
+
records.push(columns);
|
100
|
+
}
|
101
|
+
|
102
|
+
Ok(records)
|
103
|
+
}
|
104
|
+
|
66
105
|
fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
|
67
106
|
let maybe_correct = correct_datetime(c);
|
68
107
|
if let Some(correct) = maybe_correct {
|
@@ -110,4 +149,16 @@ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
|
|
110
149
|
|
111
150
|
fn has_empty_row_skip(record: &StringRecord) -> bool {
|
112
151
|
record.iter().all(|r| r.is_empty())
|
152
|
+
}
|
153
|
+
|
154
|
+
#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
|
155
|
+
struct CsvMandatoryColumn {
|
156
|
+
value: String,
|
157
|
+
index: usize,
|
158
|
+
}
|
159
|
+
|
160
|
+
impl CsvMandatoryColumn {
|
161
|
+
fn new(value: String, index: usize) -> Self {
|
162
|
+
CsvMandatoryColumn { value, index }
|
163
|
+
}
|
113
164
|
}
|
@@ -1,54 +1,51 @@
|
|
1
1
|
use std::fs::File;
|
2
2
|
|
3
|
-
use csv::{StringRecord, Writer};
|
4
|
-
use magnus::Ruby;
|
3
|
+
use csv::{Reader, StringRecord, Writer};
|
4
|
+
use magnus::{RArray, Ruby};
|
5
5
|
|
6
|
-
use crate::utils::{FileExtension, magnus_err};
|
6
|
+
use crate::utils::{FileExtension, magnus_err, check_mandatory_headers, create_header_map, missing_header, missing_value, headers_as_byte_record};
|
7
7
|
|
8
|
-
pub fn dedup(ruby: &Ruby, previous_csv_path: String,
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
pub fn dedup(ruby: &Ruby, previous_csv_path: String,
|
9
|
+
new_csv_path: String,
|
10
|
+
target_path: String,
|
11
|
+
mandatory_headers: RArray,
|
12
|
+
) -> magnus::error::Result<()> {
|
13
|
+
if let Some(value) =
|
14
|
+
check_file_extension(ruby, &previous_csv_path, "previous_csv_path") { return value; }
|
15
|
+
|
16
|
+
if let Some(value) =
|
17
|
+
check_file_extension(ruby, &new_csv_path, "new_csv_path") { return value; }
|
15
18
|
|
16
19
|
let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
|
17
20
|
let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
|
18
21
|
|
22
|
+
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
23
|
+
|
19
24
|
let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
|
20
25
|
let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
|
21
26
|
|
22
27
|
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
23
28
|
|
24
|
-
let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "
|
25
|
-
let
|
29
|
+
let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv"))?.clone();
|
30
|
+
let previous_headers_list : Vec<String> = previous_headers.iter().map(|h| h.to_string()).collect();
|
31
|
+
let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv"))?.clone();
|
32
|
+
let new_headers_list : Vec<String> = new_headers.iter().map(|h| h.to_string()).collect();
|
26
33
|
|
27
|
-
if previous_headers != new_headers {
|
28
|
-
return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
|
29
|
-
}
|
30
34
|
|
31
|
-
|
35
|
+
if let Some(value) =
|
36
|
+
check_mandatory_headers(ruby, &previous_headers_list, &mandatory_headers, "previous_csv") { return value; }
|
32
37
|
|
33
|
-
let
|
34
|
-
|
35
|
-
let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
|
38
|
+
if let Some(value) =
|
39
|
+
check_mandatory_headers(ruby, &new_headers_list, &mandatory_headers, "new_csv") { return value; }
|
36
40
|
|
37
|
-
|
38
|
-
|
41
|
+
let csv_headers = headers_as_byte_record(mandatory_headers.clone());
|
42
|
+
wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
39
43
|
|
40
|
-
|
41
|
-
|
42
|
-
}
|
43
|
-
|
44
|
-
for new_record in new_csv.records() {
|
45
|
-
let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
|
44
|
+
let previous_mandatory_records = get_records(ruby, &mut previous_csv, previous_headers_list, &mandatory_headers)?;
|
45
|
+
let new_mandatory_records = get_records(ruby, &mut new_csv, new_headers_list, &mandatory_headers)?;
|
46
46
|
|
47
|
-
|
48
|
-
if
|
49
|
-
|
50
|
-
let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
51
|
-
if !previous_records.contains(&new_record) {
|
47
|
+
for new_record in new_mandatory_records {
|
48
|
+
if !previous_mandatory_records.contains(&new_record) {
|
52
49
|
wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
53
50
|
}
|
54
51
|
}
|
@@ -58,6 +55,34 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
|
|
58
55
|
Ok(())
|
59
56
|
}
|
60
57
|
|
58
|
+
fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers: Vec<String>, headers: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
|
59
|
+
let header_map = create_header_map(&csv_headers);
|
60
|
+
let mut records = vec![];
|
61
|
+
for record in csv.records() {
|
62
|
+
let record = record.map_err(|e| magnus_err(ruby, e, "record error"))?;
|
63
|
+
|
64
|
+
if has_empty_row_skip(&record) { continue; }
|
65
|
+
if has_empty_first_col_skip_row(&record) { continue; }
|
66
|
+
|
67
|
+
let mut columns = vec![];
|
68
|
+
for column in headers.iter() {
|
69
|
+
let column_index = header_map.get(column).ok_or(missing_header(ruby, column))?;
|
70
|
+
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
71
|
+
columns.push(column_value.trim_end());
|
72
|
+
}
|
73
|
+
let columns = columns.into_iter().collect::<StringRecord>();
|
74
|
+
records.push(columns);
|
75
|
+
}
|
76
|
+
Ok(records)
|
77
|
+
}
|
78
|
+
|
79
|
+
fn check_file_extension(ruby: &Ruby, csv_path: &String, message: &str) -> Option<magnus::error::Result<()>> {
|
80
|
+
if !csv_path.has_extension(&["csv"]) {
|
81
|
+
return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} must be a csv file", message))));
|
82
|
+
}
|
83
|
+
None
|
84
|
+
}
|
85
|
+
|
61
86
|
fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
|
62
87
|
previous_record[0].is_empty()
|
63
88
|
}
|
@@ -1,6 +1,8 @@
|
|
1
|
+
use std::collections::{HashMap, HashSet};
|
1
2
|
use std::error::Error;
|
2
3
|
use std::ffi::OsStr;
|
3
4
|
use std::path::Path;
|
5
|
+
use ::csv::{ByteRecord, StringRecord};
|
4
6
|
use magnus::Ruby;
|
5
7
|
|
6
8
|
pub mod csv;
|
@@ -11,6 +13,15 @@ fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
|
|
11
13
|
magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
|
12
14
|
}
|
13
15
|
|
16
|
+
fn missing_value(ruby: &Ruby, header: &str) -> magnus::Error {
|
17
|
+
magnus::Error::new(ruby.exception_standard_error(), format!("Missing value for '{}' header", header))
|
18
|
+
}
|
19
|
+
|
20
|
+
fn headers_as_byte_record(headers: Vec<String>) -> ByteRecord {
|
21
|
+
let string_record = headers.into_iter().collect::<StringRecord>();
|
22
|
+
string_record.as_byte_record().clone()
|
23
|
+
}
|
24
|
+
|
14
25
|
fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
|
15
26
|
magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
|
16
27
|
}
|
@@ -19,6 +30,39 @@ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus:
|
|
19
30
|
magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
|
20
31
|
}
|
21
32
|
|
33
|
+
fn check_mandatory_headers(ruby: &Ruby, headers: &Vec<String>, mandatory_headers: &Vec<String>, message: &str) -> Option<magnus::error::Result<()>> {
|
34
|
+
let csv_mandatory_headers = filter_headers(headers, mandatory_headers);
|
35
|
+
|
36
|
+
if csv_mandatory_headers.is_empty() {
|
37
|
+
return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} has no mandatory headers", message))));
|
38
|
+
}
|
39
|
+
|
40
|
+
let csv_mandatory_headers = csv_mandatory_headers.to_owned().clone();
|
41
|
+
let mandatory_headers = mandatory_headers.to_owned().clone();
|
42
|
+
|
43
|
+
let set1 = csv_mandatory_headers.iter().collect::<HashSet<_>>();
|
44
|
+
let set2 = mandatory_headers.iter().collect::<HashSet<_>>();
|
45
|
+
let difference = set2.difference(&set1).collect::<Vec<_>>();
|
46
|
+
|
47
|
+
if !difference.is_empty() {
|
48
|
+
let missing_headers = difference.iter().map(|h| h.to_string()).collect::<Vec<String>>();
|
49
|
+
return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} is missing mandatory headers: {}", message, missing_headers.join(", ")))));
|
50
|
+
}
|
51
|
+
None
|
52
|
+
}
|
53
|
+
|
54
|
+
fn index_of_header_in_mandatory_list(mandatory_headers_list: Vec<String>, column_name: String) -> Option<usize> {
|
55
|
+
mandatory_headers_list.iter().position(|h| h.to_string() == column_name)
|
56
|
+
}
|
57
|
+
|
58
|
+
fn filter_headers(csv_headers: &Vec<String>, expected_headers: &Vec<String>) -> Vec<String> {
|
59
|
+
csv_headers.iter().map(|v| v.to_string()).filter(|h| expected_headers.contains(h)).collect::<Vec<String>>()
|
60
|
+
}
|
61
|
+
|
62
|
+
fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
|
63
|
+
headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect()
|
64
|
+
}
|
65
|
+
|
22
66
|
pub trait FileExtension {
|
23
67
|
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
24
68
|
}
|
@@ -33,4 +77,5 @@ impl<P: AsRef<Path>> FileExtension for P {
|
|
33
77
|
|
34
78
|
false
|
35
79
|
}
|
36
|
-
}
|
80
|
+
}
|
81
|
+
|
@@ -6,14 +6,19 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
|
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
10
10
|
|
11
|
-
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
11
|
+
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
|
+
target_path: String,
|
13
|
+
exclusions: RArray,
|
14
|
+
mandatory_headers: RArray
|
15
|
+
) -> magnus::error::Result<()> {
|
12
16
|
if !xls_path.has_extension(&["xls"]) {
|
13
17
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
14
18
|
}
|
15
19
|
|
16
20
|
let exclusions = RArray::to_vec(exclusions)?;
|
21
|
+
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
17
22
|
|
18
23
|
let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
|
19
24
|
let range = workbook.worksheet_range_at(0)
|
@@ -21,14 +26,22 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RA
|
|
21
26
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
22
27
|
|
23
28
|
let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
|
24
|
-
let
|
29
|
+
let headers_list : Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
30
|
+
|
31
|
+
if let Some(value) =
|
32
|
+
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
33
|
+
|
34
|
+
let header_map: HashMap<String, usize> = mandatory_headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
|
25
35
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
26
36
|
let mut dest = BufWriter::new(csv_out_file);
|
27
37
|
|
28
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions)
|
38
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
|
29
39
|
}
|
30
40
|
|
31
|
-
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
41
|
+
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
42
|
+
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
43
|
+
mandatory_headers: Vec<String>,
|
44
|
+
headers_list: Vec<String>) -> magnus::error::Result<()> {
|
32
45
|
let n = range.get_size().1 - 1;
|
33
46
|
|
34
47
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
@@ -38,17 +51,23 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
38
51
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
39
52
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
40
53
|
|
41
|
-
|
54
|
+
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
55
|
+
|
56
|
+
for (ri, r) in mandatory_rows.into_iter().enumerate() {
|
42
57
|
let mut date_value = Utc::now().naive_utc();
|
43
58
|
|
44
|
-
if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
|
45
|
-
if skip_empty_rows(r) { continue; }
|
46
|
-
if skip_rows_with_no_request_id(&request_id, r) { continue; }
|
47
|
-
if date_value_is_not_present(&date, r) {
|
59
|
+
if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
|
60
|
+
if skip_empty_rows(&r) { continue; }
|
61
|
+
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
62
|
+
if date_value_is_not_present(&date, &r) {
|
48
63
|
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
49
64
|
}
|
50
65
|
|
51
|
-
for (i, c) in
|
66
|
+
for (i, c) in mandatory_headers.iter().enumerate() {
|
67
|
+
|
68
|
+
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
69
|
+
let c = r.get(*column_index).ok_or(missing_value(ruby, c))?;
|
70
|
+
|
52
71
|
match *c {
|
53
72
|
Data::Empty => Ok(()),
|
54
73
|
Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
|
@@ -77,21 +96,42 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
77
96
|
Ok(())
|
78
97
|
}
|
79
98
|
|
80
|
-
fn
|
81
|
-
|
99
|
+
fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
|
100
|
+
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
101
|
+
|
102
|
+
let mut records = vec![];
|
103
|
+
for row in range.rows() {
|
104
|
+
let mut columns = vec![];
|
105
|
+
for (i, column_value) in row.iter().enumerate() {
|
106
|
+
let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
|
107
|
+
if mandatory_headers_list.contains(column_name) {
|
108
|
+
let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
|
109
|
+
columns.push(XlsMandatoryColumn::new(column_value, index));
|
110
|
+
}
|
111
|
+
}
|
112
|
+
columns.sort_by(|a, b| a.index.cmp(&b.index));
|
113
|
+
let columns = columns.iter().map(|c| c.value).collect::<Vec<&Data>>();
|
114
|
+
records.push(columns);
|
115
|
+
}
|
116
|
+
|
117
|
+
Ok(records)
|
82
118
|
}
|
83
119
|
|
84
|
-
fn
|
120
|
+
fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
|
121
|
+
r[*date] == &Data::Empty
|
122
|
+
}
|
123
|
+
|
124
|
+
fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
85
125
|
let value = r[*request_id].to_string();
|
86
126
|
exclusions.contains(&value.to_string())
|
87
127
|
}
|
88
128
|
|
89
|
-
fn skip_empty_rows(r: &
|
90
|
-
r.
|
129
|
+
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
130
|
+
r.into_iter().all(|c| c == &&Data::Empty)
|
91
131
|
}
|
92
132
|
|
93
|
-
fn skip_rows_with_no_request_id(request_id: &usize, r: &
|
94
|
-
r[*request_id] == Data::Empty
|
133
|
+
fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
|
134
|
+
r[*request_id] == &Data::Empty
|
95
135
|
}
|
96
136
|
|
97
137
|
fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
|
@@ -111,4 +151,15 @@ fn clean_strings(s: &str) -> String {
|
|
111
151
|
.replace("\r", " ")
|
112
152
|
.replace("\"", "")
|
113
153
|
.replace("'", "")
|
114
|
-
}
|
154
|
+
}
|
155
|
+
|
156
|
+
struct XlsMandatoryColumn<'a> {
|
157
|
+
value: &'a Data,
|
158
|
+
index: usize,
|
159
|
+
}
|
160
|
+
|
161
|
+
impl<'a> XlsMandatoryColumn<'a> {
|
162
|
+
fn new(value: &'a Data, index: usize) -> Self {
|
163
|
+
XlsMandatoryColumn { value, index }
|
164
|
+
}
|
165
|
+
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|