patchwork_csv_utils 0.1.20-x86_64-darwin → 0.1.22-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/Gemfile.lock +2 -1
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +19 -6
- data/ext/csv_utils/src/utils/xls.rs +21 -6
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2768d0d23733c2d299db6130f9d7d3577ef009aa554d0a3f19d1716403e32020
|
4
|
+
data.tar.gz: 05c46784c97f711bd996620fa66c28ef39e1faf58143087cdad88ee1f9163b14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4b55756b6e559e388801d34aeb02a9ad83989d7d247a4e39f3f4af2fae041c8dad97703cb83c8ead91301336f42e9b481e5296a9250223a7edc6b2ce8b0e6df
|
7
|
+
data.tar.gz: b6d8a81d3643bb8937a8dad13360217e9084a8cdf0d9ddf840f70715fea0a94aec0ad72bc27687d046aca83b9f69b61c9866129d6786f40ba723a3e73f4ca3e2
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-3.0.7
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
patchwork_csv_utils (0.1.
|
4
|
+
patchwork_csv_utils (0.1.22)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -56,6 +56,7 @@ GEM
|
|
56
56
|
PLATFORMS
|
57
57
|
arm64-darwin-22
|
58
58
|
arm64-darwin-23
|
59
|
+
arm64-darwin-24
|
59
60
|
x86_64-linux
|
60
61
|
|
61
62
|
DEPENDENCIES
|
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 7))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 7))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -6,11 +6,14 @@ use std::fs::File;
|
|
6
6
|
|
7
7
|
use crate::utils::{check_mandatory_headers, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
|
8
8
|
|
9
|
-
pub fn transform_csv(ruby: &Ruby,
|
10
|
-
|
9
|
+
pub fn transform_csv(ruby: &Ruby,
|
10
|
+
csv_path: String,
|
11
|
+
target_path: String,
|
12
|
+
exclusions: RArray,
|
11
13
|
mandatory_headers: RArray,
|
12
14
|
status_exclusions: RArray,
|
13
|
-
expected_trust_name: String,
|
15
|
+
expected_trust_name: String,
|
16
|
+
is_streamed_file: bool) -> magnus::error::Result<()> {
|
14
17
|
if !csv_path.has_extension(&["csv"]) {
|
15
18
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
16
19
|
}
|
@@ -47,7 +50,7 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
47
50
|
|
48
51
|
for (ri, record) in mandatory_records.iter().enumerate() {
|
49
52
|
|
50
|
-
if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
|
53
|
+
if skip_excluded_rows(request_id, &status, &record, &exclusions) { continue; }
|
51
54
|
if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
|
52
55
|
if has_empty_row_skip(&record) { continue; }
|
53
56
|
if has_empty_first_col_skip_row(&record) { continue; }
|
@@ -60,7 +63,9 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
60
63
|
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
61
64
|
let column_value = column_value.trim_end();
|
62
65
|
|
63
|
-
|
66
|
+
if !is_streamed_file {
|
67
|
+
validate_trust_name(ruby, &expected_trust_name, trust_name, i, &column_value.to_string())?;
|
68
|
+
}
|
64
69
|
|
65
70
|
if i == *date {
|
66
71
|
let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
|
@@ -137,7 +142,15 @@ fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> St
|
|
137
142
|
column_name.to_string()
|
138
143
|
}
|
139
144
|
|
140
|
-
fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
145
|
+
fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
146
|
+
if let Some(status_index) = status {
|
147
|
+
if let Some(status) = r.get(**status_index) {
|
148
|
+
if status.eq("Recalled") {
|
149
|
+
return false
|
150
|
+
}
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
141
154
|
let value = r.get(*request_id).unwrap_or_default();
|
142
155
|
exclusions.contains(&value.to_string())
|
143
156
|
}
|
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
|
2
2
|
use std::fs::File;
|
3
3
|
use std::io::{BufWriter, Write};
|
4
4
|
|
5
|
-
use calamine::{open_workbook_auto, Data, Range, Reader};
|
5
|
+
use calamine::{open_workbook_auto, Data, DataType, Range, Reader};
|
6
6
|
use chrono::{NaiveDateTime, Timelike, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
@@ -14,6 +14,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
14
14
|
mandatory_headers: RArray,
|
15
15
|
status_exclusions: RArray,
|
16
16
|
expected_trust_name: String,
|
17
|
+
is_streamed_file: bool
|
17
18
|
) -> magnus::error::Result<()> {
|
18
19
|
if !xls_path.has_extension(&["xls","xlsx"]) {
|
19
20
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
|
@@ -40,7 +41,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
40
41
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
41
42
|
let mut dest = BufWriter::new(csv_out_file);
|
42
43
|
|
43
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name)
|
44
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file)
|
44
45
|
}
|
45
46
|
|
46
47
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
@@ -48,7 +49,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
48
49
|
mandatory_headers: Vec<String>,
|
49
50
|
headers_list: Vec<String>,
|
50
51
|
status_exclusions: Vec<String>,
|
51
|
-
expected_trust_name: String
|
52
|
+
expected_trust_name: String,
|
53
|
+
is_streamed_file: bool) -> magnus::error::Result<()> {
|
52
54
|
let n = mandatory_headers.len() - 1;
|
53
55
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
54
56
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -64,14 +66,17 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
64
66
|
for (ri, r) in mandatory_rows.into_iter().enumerate() {
|
65
67
|
let mut date_value = Utc::now().naive_utc();
|
66
68
|
|
67
|
-
if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
|
69
|
+
if skip_excluded_rows(&request_id, &status, &r, &exclusions) { continue; }
|
68
70
|
if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
|
69
71
|
if skip_empty_rows(&r) { continue; }
|
70
72
|
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
71
73
|
if date_value_is_not_present(&date, &r) {
|
72
74
|
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
73
75
|
}
|
74
|
-
|
76
|
+
|
77
|
+
if !is_streamed_file {
|
78
|
+
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, &r)?;
|
79
|
+
}
|
75
80
|
|
76
81
|
for (i, c) in mandatory_headers.iter().enumerate() {
|
77
82
|
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
@@ -180,7 +185,17 @@ fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
|
|
180
185
|
r[*date] == &Data::Empty
|
181
186
|
}
|
182
187
|
|
183
|
-
fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
188
|
+
fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
189
|
+
if let Some(status_index) = status {
|
190
|
+
if let Some(status) = r.get(**status_index) {
|
191
|
+
if let Some(status_str) = status.as_string() {
|
192
|
+
if status_str.eq("Recalled") {
|
193
|
+
return false
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
184
199
|
let value = r[*request_id].to_string();
|
185
200
|
exclusions.contains(&value.to_string())
|
186
201
|
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.22
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- ".rspec"
|
21
21
|
- ".rubocop.yml"
|
22
|
+
- ".ruby-version"
|
22
23
|
- Cargo.lock
|
23
24
|
- Cargo.toml
|
24
25
|
- Gemfile
|