patchwork_csv_utils 0.1.11-arm64-darwin → 0.1.13-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/utils/csv.rs +42 -7
- data/ext/csv_utils/src/utils/dedup.rs +3 -3
- data/ext/csv_utils/src/utils/mod.rs +17 -5
- data/ext/csv_utils/src/utils/xls.rs +54 -19
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54fc4d5750dd83ab95eebe09a6270e8b38e409cc0cf9ed2beb5fb0eeddd096ec
|
4
|
+
data.tar.gz: 7dc07de1618c260cb80c2feee25ace49fe9fba817c63b18f3dd2aa0fd0a4d2a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28050396ee306f0512d7f5c689b0a098f2df0fcacc0d6eda085eb070a6cfe2f8097cadb4508b2bc36a1a601f3f73755c6f528bee9b424f89c59af709b497b785
|
7
|
+
data.tar.gz: 7907c50d50888bcac36868296616ae741e9a5d8855ec9007733b45be2e227f1f96a4248df710e52897f97d197bfda0001cc5eb44e3e7954d5d31ae3e270c8dfd
|
data/Gemfile.lock
CHANGED
@@ -5,11 +5,11 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
|
5
5
|
use csv::{Reader, StringRecord, Writer};
|
6
6
|
use magnus::{Error, RArray, Ruby};
|
7
7
|
|
8
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record};
|
8
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
|
9
9
|
|
10
10
|
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
11
11
|
target_path: String, exclusions: RArray,
|
12
|
-
mandatory_headers: RArray,) -> magnus::error::Result<()> {
|
12
|
+
mandatory_headers: RArray, ) -> magnus::error::Result<()> {
|
13
13
|
if !csv_path.has_extension(&["csv"]) {
|
14
14
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
15
15
|
}
|
@@ -21,13 +21,13 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
21
21
|
let mut csv: Reader<File> = Reader::from_reader(csv_file);
|
22
22
|
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
23
23
|
let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?.clone();
|
24
|
-
let headers_list
|
24
|
+
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
25
25
|
|
26
26
|
if let Some(value) =
|
27
27
|
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
28
28
|
|
29
|
-
let header_map: HashMap<String, usize> = create_header_map(&
|
30
|
-
let inverse_header_map: HashMap<usize, String> =
|
29
|
+
let header_map: HashMap<String, usize> = create_header_map(&mandatory_headers);
|
30
|
+
let inverse_header_map: HashMap<usize, String> = mandatory_headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
31
31
|
|
32
32
|
let csv_headers = headers_as_byte_record(mandatory_headers.clone());
|
33
33
|
wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
@@ -39,8 +39,9 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
39
39
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
40
40
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
41
41
|
|
42
|
-
|
43
|
-
|
42
|
+
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
43
|
+
|
44
|
+
for (ri, record) in mandatory_records.iter().enumerate() {
|
44
45
|
|
45
46
|
if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
|
46
47
|
if has_empty_row_skip(&record) { continue; }
|
@@ -79,6 +80,28 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
79
80
|
Ok(())
|
80
81
|
}
|
81
82
|
|
83
|
+
fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
|
84
|
+
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
85
|
+
|
86
|
+
let mut records = vec![];
|
87
|
+
for row in csv.records() {
|
88
|
+
let row = row.map_err(|e| magnus_err(ruby, e, "record error"))?;
|
89
|
+
let mut columns = vec![];
|
90
|
+
for (i, column_value) in row.iter().enumerate() {
|
91
|
+
let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
|
92
|
+
if mandatory_headers_list.contains(column_name) {
|
93
|
+
let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
|
94
|
+
columns.push(CsvMandatoryColumn::new(column_value.to_string(), index));
|
95
|
+
}
|
96
|
+
}
|
97
|
+
columns.sort_by(|a, b| a.index.cmp(&b.index));
|
98
|
+
let columns = columns.iter().map(|c| c.value.to_string()).collect::<StringRecord>();
|
99
|
+
records.push(columns);
|
100
|
+
}
|
101
|
+
|
102
|
+
Ok(records)
|
103
|
+
}
|
104
|
+
|
82
105
|
fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
|
83
106
|
let maybe_correct = correct_datetime(c);
|
84
107
|
if let Some(correct) = maybe_correct {
|
@@ -126,4 +149,16 @@ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
|
|
126
149
|
|
127
150
|
fn has_empty_row_skip(record: &StringRecord) -> bool {
|
128
151
|
record.iter().all(|r| r.is_empty())
|
152
|
+
}
|
153
|
+
|
154
|
+
#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
|
155
|
+
struct CsvMandatoryColumn {
|
156
|
+
value: String,
|
157
|
+
index: usize,
|
158
|
+
}
|
159
|
+
|
160
|
+
impl CsvMandatoryColumn {
|
161
|
+
fn new(value: String, index: usize) -> Self {
|
162
|
+
CsvMandatoryColumn { value, index }
|
163
|
+
}
|
129
164
|
}
|
@@ -41,8 +41,8 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String,
|
|
41
41
|
let csv_headers = headers_as_byte_record(mandatory_headers.clone());
|
42
42
|
wtr.write_byte_record(&csv_headers).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
43
43
|
|
44
|
-
let previous_mandatory_records = get_records(ruby, &mut previous_csv,
|
45
|
-
let new_mandatory_records = get_records(ruby, &mut new_csv,
|
44
|
+
let previous_mandatory_records = get_records(ruby, &mut previous_csv, previous_headers_list, &mandatory_headers)?;
|
45
|
+
let new_mandatory_records = get_records(ruby, &mut new_csv, new_headers_list, &mandatory_headers)?;
|
46
46
|
|
47
47
|
for new_record in new_mandatory_records {
|
48
48
|
if !previous_mandatory_records.contains(&new_record) {
|
@@ -55,7 +55,7 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String,
|
|
55
55
|
Ok(())
|
56
56
|
}
|
57
57
|
|
58
|
-
fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers:
|
58
|
+
fn get_records(ruby: &Ruby, csv: &mut Reader<File>, csv_headers: Vec<String>, headers: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
|
59
59
|
let header_map = create_header_map(&csv_headers);
|
60
60
|
let mut records = vec![];
|
61
61
|
for record in csv.records() {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use std::collections::HashMap;
|
1
|
+
use std::collections::{HashMap, HashSet};
|
2
2
|
use std::error::Error;
|
3
3
|
use std::ffi::OsStr;
|
4
4
|
use std::path::Path;
|
@@ -37,18 +37,29 @@ fn check_mandatory_headers(ruby: &Ruby, headers: &Vec<String>, mandatory_headers
|
|
37
37
|
return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} has no mandatory headers", message))));
|
38
38
|
}
|
39
39
|
|
40
|
-
|
41
|
-
|
40
|
+
let csv_mandatory_headers = csv_mandatory_headers.to_owned().clone();
|
41
|
+
let mandatory_headers = mandatory_headers.to_owned().clone();
|
42
|
+
|
43
|
+
let set1 = csv_mandatory_headers.iter().collect::<HashSet<_>>();
|
44
|
+
let set2 = mandatory_headers.iter().collect::<HashSet<_>>();
|
45
|
+
let difference = set2.difference(&set1).collect::<Vec<_>>();
|
46
|
+
|
47
|
+
if !difference.is_empty() {
|
48
|
+
let missing_headers = difference.iter().map(|h| h.to_string()).collect::<Vec<String>>();
|
42
49
|
return Some(Err(magnus::Error::new(ruby.exception_standard_error(), format!("{} is missing mandatory headers: {}", message, missing_headers.join(", ")))));
|
43
50
|
}
|
44
51
|
None
|
45
52
|
}
|
46
53
|
|
54
|
+
fn index_of_header_in_mandatory_list(mandatory_headers_list: Vec<String>, column_name: String) -> Option<usize> {
|
55
|
+
mandatory_headers_list.iter().position(|h| h.to_string() == column_name)
|
56
|
+
}
|
57
|
+
|
47
58
|
fn filter_headers(csv_headers: &Vec<String>, expected_headers: &Vec<String>) -> Vec<String> {
|
48
59
|
csv_headers.iter().map(|v| v.to_string()).filter(|h| expected_headers.contains(h)).collect::<Vec<String>>()
|
49
60
|
}
|
50
61
|
|
51
|
-
fn create_header_map(headers: &
|
62
|
+
fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
|
52
63
|
headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect()
|
53
64
|
}
|
54
65
|
|
@@ -66,4 +77,5 @@ impl<P: AsRef<Path>> FileExtension for P {
|
|
66
77
|
|
67
78
|
false
|
68
79
|
}
|
69
|
-
}
|
80
|
+
}
|
81
|
+
|
@@ -6,7 +6,7 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
|
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value};
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
@@ -31,17 +31,18 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
31
31
|
if let Some(value) =
|
32
32
|
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
33
33
|
|
34
|
-
let header_map: HashMap<String, usize> =
|
34
|
+
let header_map: HashMap<String, usize> = mandatory_headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
|
35
35
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
36
36
|
let mut dest = BufWriter::new(csv_out_file);
|
37
37
|
|
38
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers)
|
38
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
|
39
39
|
}
|
40
40
|
|
41
41
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
42
|
-
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
43
|
-
|
44
|
-
|
42
|
+
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
43
|
+
mandatory_headers: Vec<String>,
|
44
|
+
headers_list: Vec<String>) -> magnus::error::Result<()> {
|
45
|
+
let n = mandatory_headers.len() - 1;
|
45
46
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
46
47
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
47
48
|
let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
|
@@ -49,13 +50,15 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
49
50
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
50
51
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
51
52
|
|
52
|
-
|
53
|
+
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
54
|
+
|
55
|
+
for (ri, r) in mandatory_rows.into_iter().enumerate() {
|
53
56
|
let mut date_value = Utc::now().naive_utc();
|
54
57
|
|
55
|
-
if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
|
56
|
-
if skip_empty_rows(r) { continue; }
|
57
|
-
if skip_rows_with_no_request_id(&request_id, r) { continue; }
|
58
|
-
if date_value_is_not_present(&date, r) {
|
58
|
+
if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
|
59
|
+
if skip_empty_rows(&r) { continue; }
|
60
|
+
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
61
|
+
if date_value_is_not_present(&date, &r) {
|
59
62
|
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
60
63
|
}
|
61
64
|
|
@@ -92,21 +95,42 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
92
95
|
Ok(())
|
93
96
|
}
|
94
97
|
|
95
|
-
fn
|
96
|
-
|
98
|
+
fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
|
99
|
+
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
100
|
+
|
101
|
+
let mut records = vec![];
|
102
|
+
for row in range.rows() {
|
103
|
+
let mut columns = vec![];
|
104
|
+
for (i, column_value) in row.iter().enumerate() {
|
105
|
+
let column_name = inverse_header_map.get(&i).ok_or(missing_header(ruby, &i.to_string()))?;
|
106
|
+
if mandatory_headers_list.contains(column_name) {
|
107
|
+
let index = index_of_header_in_mandatory_list(mandatory_headers_list.clone(), column_name.to_string()).unwrap();
|
108
|
+
columns.push(XlsMandatoryColumn::new(column_value, index));
|
109
|
+
}
|
110
|
+
}
|
111
|
+
columns.sort_by(|a, b| a.index.cmp(&b.index));
|
112
|
+
let columns = columns.iter().map(|c| c.value).collect::<Vec<&Data>>();
|
113
|
+
records.push(columns);
|
114
|
+
}
|
115
|
+
|
116
|
+
Ok(records)
|
117
|
+
}
|
118
|
+
|
119
|
+
fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
|
120
|
+
r[*date] == &Data::Empty
|
97
121
|
}
|
98
122
|
|
99
|
-
fn skip_excluded_rows(request_id: &usize, r: &
|
123
|
+
fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
100
124
|
let value = r[*request_id].to_string();
|
101
125
|
exclusions.contains(&value.to_string())
|
102
126
|
}
|
103
127
|
|
104
|
-
fn skip_empty_rows(r: &
|
105
|
-
r.
|
128
|
+
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
129
|
+
r.into_iter().all(|c| c == &&Data::Empty)
|
106
130
|
}
|
107
131
|
|
108
|
-
fn skip_rows_with_no_request_id(request_id: &usize, r: &
|
109
|
-
r[*request_id] == Data::Empty
|
132
|
+
fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
|
133
|
+
r[*request_id] == &Data::Empty
|
110
134
|
}
|
111
135
|
|
112
136
|
fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
|
@@ -126,4 +150,15 @@ fn clean_strings(s: &str) -> String {
|
|
126
150
|
.replace("\r", " ")
|
127
151
|
.replace("\"", "")
|
128
152
|
.replace("'", "")
|
129
|
-
}
|
153
|
+
}
|
154
|
+
|
155
|
+
struct XlsMandatoryColumn<'a> {
|
156
|
+
value: &'a Data,
|
157
|
+
index: usize,
|
158
|
+
}
|
159
|
+
|
160
|
+
impl<'a> XlsMandatoryColumn<'a> {
|
161
|
+
fn new(value: &'a Data, index: usize) -> Self {
|
162
|
+
XlsMandatoryColumn { value, index }
|
163
|
+
}
|
164
|
+
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|