patchwork_csv_utils 0.1.14-x86_64-darwin → 0.1.16-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +16 -3
- data/ext/csv_utils/src/utils/mod.rs +9 -0
- data/ext/csv_utils/src/utils/xls.rs +20 -6
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5d1440820a1d43cf883d17038c250ce063de4aefebaa334eac95575adb6dac1
|
4
|
+
data.tar.gz: 8779a49dba19537387b4df168e861b890dfb2a33db1e7b85b9648742c17210c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 426b46497c78547b927dce9360f35352212eb3d9a12a2e46198015f314ba2b8d51a73e20f4340a1566bc2f7dc9e47ea47cd570694f8e4c97e7fb9af3bce72a4c
|
7
|
+
data.tar.gz: c15ad59179ee3a5a4a0a81da52579eb5545d6e1f4834416cec0bf94d3d89fdd6a60e5e5412275c1203c9e50b30b32c0b43fc5d567cb5c0eb630ad5ddcbea6a56
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -14,8 +14,8 @@ gem install patchwork_csv_utils
|
|
14
14
|
```irb
|
15
15
|
require 'csv_utils'
|
16
16
|
CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv', ['mandatory_headers'])
|
17
|
-
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
18
|
-
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
17
|
+
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'], ['status_exclusions'])
|
18
|
+
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'], ['status_exclusions'])
|
19
19
|
```
|
20
20
|
|
21
21
|
## Release
|
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 6))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 6))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -1,20 +1,22 @@
|
|
1
1
|
use std::collections::HashMap;
|
2
2
|
use std::fs::File;
|
3
|
-
|
4
3
|
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
5
4
|
use csv::{Reader, StringRecord, Writer};
|
6
5
|
use magnus::{Error, RArray, Ruby};
|
7
6
|
|
8
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
|
7
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list, validate_trust_name};
|
9
8
|
|
10
9
|
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
11
10
|
target_path: String, exclusions: RArray,
|
12
|
-
mandatory_headers: RArray,
|
11
|
+
mandatory_headers: RArray,
|
12
|
+
status_exclusions: RArray,
|
13
|
+
expected_trust_name: String,) -> magnus::error::Result<()> {
|
13
14
|
if !csv_path.has_extension(&["csv"]) {
|
14
15
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
15
16
|
}
|
16
17
|
|
17
18
|
let exclusions = RArray::to_vec(exclusions)?;
|
19
|
+
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
18
20
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
19
21
|
|
20
22
|
let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
|
@@ -38,12 +40,15 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
38
40
|
let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
|
39
41
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
40
42
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
43
|
+
let status = header_map.get("Status");
|
44
|
+
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
41
45
|
|
42
46
|
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
43
47
|
|
44
48
|
for (ri, record) in mandatory_records.iter().enumerate() {
|
45
49
|
|
46
50
|
if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
|
51
|
+
if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
|
47
52
|
if has_empty_row_skip(&record) { continue; }
|
48
53
|
if has_empty_first_col_skip_row(&record) { continue; }
|
49
54
|
|
@@ -55,6 +60,8 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
55
60
|
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
56
61
|
let column_value = column_value.trim_end();
|
57
62
|
|
63
|
+
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, &column_value.to_string())?;
|
64
|
+
|
58
65
|
if i == *date {
|
59
66
|
let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
|
60
67
|
date_value = current;
|
@@ -124,6 +131,12 @@ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<Str
|
|
124
131
|
exclusions.contains(&value.to_string())
|
125
132
|
}
|
126
133
|
|
134
|
+
fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
135
|
+
status
|
136
|
+
.map(|index| exclusions.contains(&r[*index].to_string()))
|
137
|
+
.unwrap_or(false)
|
138
|
+
}
|
139
|
+
|
127
140
|
fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
128
141
|
let maybe_correct = correct_datetime(s);
|
129
142
|
if maybe_correct.is_some() { return maybe_correct; }
|
@@ -9,6 +9,15 @@ pub mod csv;
|
|
9
9
|
pub mod dedup;
|
10
10
|
pub mod xls;
|
11
11
|
|
12
|
+
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
|
13
|
+
if ri > 0 && i == *trust_name {
|
14
|
+
if s != &expected_trust_name.clone() {
|
15
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
16
|
+
}
|
17
|
+
}
|
18
|
+
Ok(())
|
19
|
+
}
|
20
|
+
|
12
21
|
fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
|
13
22
|
magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
|
14
23
|
}
|
@@ -6,12 +6,14 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
|
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list, validate_trust_name};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
13
13
|
exclusions: RArray,
|
14
|
-
mandatory_headers: RArray
|
14
|
+
mandatory_headers: RArray,
|
15
|
+
status_exclusions: RArray,
|
16
|
+
expected_trust_name: String,
|
15
17
|
) -> magnus::error::Result<()> {
|
16
18
|
if !xls_path.has_extension(&["xls"]) {
|
17
19
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
@@ -19,6 +21,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
19
21
|
|
20
22
|
let exclusions = RArray::to_vec(exclusions)?;
|
21
23
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
24
|
+
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
22
25
|
|
23
26
|
let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
|
24
27
|
let range = workbook.worksheet_range_at(0)
|
@@ -26,7 +29,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
26
29
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
27
30
|
|
28
31
|
let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
|
29
|
-
let headers_list
|
32
|
+
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
30
33
|
|
31
34
|
if let Some(value) =
|
32
35
|
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
@@ -35,13 +38,15 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
35
38
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
36
39
|
let mut dest = BufWriter::new(csv_out_file);
|
37
40
|
|
38
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
|
41
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name)
|
39
42
|
}
|
40
43
|
|
41
44
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
42
45
|
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
43
46
|
mandatory_headers: Vec<String>,
|
44
|
-
headers_list: Vec<String
|
47
|
+
headers_list: Vec<String>,
|
48
|
+
status_exclusions: Vec<String>,
|
49
|
+
expected_trust_name: String) -> magnus::error::Result<()> {
|
45
50
|
let n = mandatory_headers.len() - 1;
|
46
51
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
47
52
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -49,6 +54,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
49
54
|
let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
|
50
55
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
51
56
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
57
|
+
let status = header_map.get("Status");
|
58
|
+
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
52
59
|
|
53
60
|
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
54
61
|
|
@@ -56,6 +63,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
56
63
|
let mut date_value = Utc::now().naive_utc();
|
57
64
|
|
58
65
|
if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
|
66
|
+
if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
|
59
67
|
if skip_empty_rows(&r) { continue; }
|
60
68
|
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
61
69
|
if date_value_is_not_present(&date, &r) {
|
@@ -63,13 +71,13 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
63
71
|
}
|
64
72
|
|
65
73
|
for (i, c) in mandatory_headers.iter().enumerate() {
|
66
|
-
|
67
74
|
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
68
75
|
let c = r.get(*column_index).ok_or(missing_value(ruby, c))?;
|
69
76
|
|
70
77
|
match *c {
|
71
78
|
Data::Empty => Ok(()),
|
72
79
|
Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
|
80
|
+
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, s)?;
|
73
81
|
handle_commas(dest, s)
|
74
82
|
}
|
75
83
|
Data::Float(ref f) => write!(dest, "{}", f),
|
@@ -125,6 +133,12 @@ fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<Strin
|
|
125
133
|
exclusions.contains(&value.to_string())
|
126
134
|
}
|
127
135
|
|
136
|
+
fn skip_excluded_status_rows(status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
137
|
+
status
|
138
|
+
.map(|index| exclusions.contains(&r[*index].to_string()))
|
139
|
+
.unwrap_or(false)
|
140
|
+
}
|
141
|
+
|
128
142
|
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
129
143
|
r.into_iter().all(|c| c == &&Data::Empty)
|
130
144
|
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|