patchwork_csv_utils 0.1.14-x86_64-darwin → 0.1.16-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +16 -3
- data/ext/csv_utils/src/utils/mod.rs +9 -0
- data/ext/csv_utils/src/utils/xls.rs +20 -6
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5d1440820a1d43cf883d17038c250ce063de4aefebaa334eac95575adb6dac1
|
4
|
+
data.tar.gz: 8779a49dba19537387b4df168e861b890dfb2a33db1e7b85b9648742c17210c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 426b46497c78547b927dce9360f35352212eb3d9a12a2e46198015f314ba2b8d51a73e20f4340a1566bc2f7dc9e47ea47cd570694f8e4c97e7fb9af3bce72a4c
|
7
|
+
data.tar.gz: c15ad59179ee3a5a4a0a81da52579eb5545d6e1f4834416cec0bf94d3d89fdd6a60e5e5412275c1203c9e50b30b32c0b43fc5d567cb5c0eb630ad5ddcbea6a56
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -14,8 +14,8 @@ gem install patchwork_csv_utils
|
|
14
14
|
```irb
|
15
15
|
require 'csv_utils'
|
16
16
|
CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv', ['mandatory_headers'])
|
17
|
-
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
18
|
-
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
17
|
+
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'], ['status_exclusions'])
|
18
|
+
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'], ['status_exclusions'])
|
19
19
|
```
|
20
20
|
|
21
21
|
## Release
|
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 6))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 6))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -1,20 +1,22 @@
|
|
1
1
|
use std::collections::HashMap;
|
2
2
|
use std::fs::File;
|
3
|
-
|
4
3
|
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
5
4
|
use csv::{Reader, StringRecord, Writer};
|
6
5
|
use magnus::{Error, RArray, Ruby};
|
7
6
|
|
8
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
|
7
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list, validate_trust_name};
|
9
8
|
|
10
9
|
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
11
10
|
target_path: String, exclusions: RArray,
|
12
|
-
mandatory_headers: RArray,
|
11
|
+
mandatory_headers: RArray,
|
12
|
+
status_exclusions: RArray,
|
13
|
+
expected_trust_name: String,) -> magnus::error::Result<()> {
|
13
14
|
if !csv_path.has_extension(&["csv"]) {
|
14
15
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
15
16
|
}
|
16
17
|
|
17
18
|
let exclusions = RArray::to_vec(exclusions)?;
|
19
|
+
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
18
20
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
19
21
|
|
20
22
|
let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
|
@@ -38,12 +40,15 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
38
40
|
let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
|
39
41
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
40
42
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
43
|
+
let status = header_map.get("Status");
|
44
|
+
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
41
45
|
|
42
46
|
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
43
47
|
|
44
48
|
for (ri, record) in mandatory_records.iter().enumerate() {
|
45
49
|
|
46
50
|
if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
|
51
|
+
if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
|
47
52
|
if has_empty_row_skip(&record) { continue; }
|
48
53
|
if has_empty_first_col_skip_row(&record) { continue; }
|
49
54
|
|
@@ -55,6 +60,8 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
55
60
|
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
56
61
|
let column_value = column_value.trim_end();
|
57
62
|
|
63
|
+
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, &column_value.to_string())?;
|
64
|
+
|
58
65
|
if i == *date {
|
59
66
|
let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
|
60
67
|
date_value = current;
|
@@ -124,6 +131,12 @@ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<Str
|
|
124
131
|
exclusions.contains(&value.to_string())
|
125
132
|
}
|
126
133
|
|
134
|
+
fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
135
|
+
status
|
136
|
+
.map(|index| exclusions.contains(&r[*index].to_string()))
|
137
|
+
.unwrap_or(false)
|
138
|
+
}
|
139
|
+
|
127
140
|
fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
128
141
|
let maybe_correct = correct_datetime(s);
|
129
142
|
if maybe_correct.is_some() { return maybe_correct; }
|
@@ -9,6 +9,15 @@ pub mod csv;
|
|
9
9
|
pub mod dedup;
|
10
10
|
pub mod xls;
|
11
11
|
|
12
|
+
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
|
13
|
+
if ri > 0 && i == *trust_name {
|
14
|
+
if s != &expected_trust_name.clone() {
|
15
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
16
|
+
}
|
17
|
+
}
|
18
|
+
Ok(())
|
19
|
+
}
|
20
|
+
|
12
21
|
fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
|
13
22
|
magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
|
14
23
|
}
|
@@ -6,12 +6,14 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
|
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list, validate_trust_name};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
13
13
|
exclusions: RArray,
|
14
|
-
mandatory_headers: RArray
|
14
|
+
mandatory_headers: RArray,
|
15
|
+
status_exclusions: RArray,
|
16
|
+
expected_trust_name: String,
|
15
17
|
) -> magnus::error::Result<()> {
|
16
18
|
if !xls_path.has_extension(&["xls"]) {
|
17
19
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
@@ -19,6 +21,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
19
21
|
|
20
22
|
let exclusions = RArray::to_vec(exclusions)?;
|
21
23
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
24
|
+
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
22
25
|
|
23
26
|
let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
|
24
27
|
let range = workbook.worksheet_range_at(0)
|
@@ -26,7 +29,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
26
29
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
27
30
|
|
28
31
|
let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
|
29
|
-
let headers_list
|
32
|
+
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
30
33
|
|
31
34
|
if let Some(value) =
|
32
35
|
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
@@ -35,13 +38,15 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
35
38
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
36
39
|
let mut dest = BufWriter::new(csv_out_file);
|
37
40
|
|
38
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
|
41
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name)
|
39
42
|
}
|
40
43
|
|
41
44
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
42
45
|
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
43
46
|
mandatory_headers: Vec<String>,
|
44
|
-
headers_list: Vec<String
|
47
|
+
headers_list: Vec<String>,
|
48
|
+
status_exclusions: Vec<String>,
|
49
|
+
expected_trust_name: String) -> magnus::error::Result<()> {
|
45
50
|
let n = mandatory_headers.len() - 1;
|
46
51
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
47
52
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -49,6 +54,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
49
54
|
let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
|
50
55
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
51
56
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
57
|
+
let status = header_map.get("Status");
|
58
|
+
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
52
59
|
|
53
60
|
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
54
61
|
|
@@ -56,6 +63,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
56
63
|
let mut date_value = Utc::now().naive_utc();
|
57
64
|
|
58
65
|
if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
|
66
|
+
if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
|
59
67
|
if skip_empty_rows(&r) { continue; }
|
60
68
|
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
61
69
|
if date_value_is_not_present(&date, &r) {
|
@@ -63,13 +71,13 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
63
71
|
}
|
64
72
|
|
65
73
|
for (i, c) in mandatory_headers.iter().enumerate() {
|
66
|
-
|
67
74
|
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
68
75
|
let c = r.get(*column_index).ok_or(missing_value(ruby, c))?;
|
69
76
|
|
70
77
|
match *c {
|
71
78
|
Data::Empty => Ok(()),
|
72
79
|
Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
|
80
|
+
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, s)?;
|
73
81
|
handle_commas(dest, s)
|
74
82
|
}
|
75
83
|
Data::Float(ref f) => write!(dest, "{}", f),
|
@@ -125,6 +133,12 @@ fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<Strin
|
|
125
133
|
exclusions.contains(&value.to_string())
|
126
134
|
}
|
127
135
|
|
136
|
+
fn skip_excluded_status_rows(status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
137
|
+
status
|
138
|
+
.map(|index| exclusions.contains(&r[*index].to_string()))
|
139
|
+
.unwrap_or(false)
|
140
|
+
}
|
141
|
+
|
128
142
|
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
129
143
|
r.into_iter().all(|c| c == &&Data::Empty)
|
130
144
|
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|