patchwork_csv_utils 0.1.21-arm64-darwin → 0.1.23-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/Gemfile.lock +2 -1
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +33 -17
- data/ext/csv_utils/src/utils/mod.rs +17 -0
- data/ext/csv_utils/src/utils/xls.rs +44 -7
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff496709a23c4cfeba6216aef09b3feb5b32609362b45cdd269e8b1e3d610adc
|
4
|
+
data.tar.gz: 21d6e4dd7e4cb58150b46c8efb771a6f864d19c20b6c57b7fa9d79ecd303b69b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b4d883cf490921a2365f70785eff0673d01cf4be369350b1953d83c7a9a39b81290bcac66d6747484d7ae37d4c4a491ac49b6ce9a644e67cd1c66e735cc74f6
|
7
|
+
data.tar.gz: e4703bc008ffbbe68fa02fd4eea982e29da38e07c4df0c1d776e71e921b2475bcf4dc91f076907827448e8e106493be7e4cb5fc7e8af5d955ff1856381a48afe
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-3.0.7
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
patchwork_csv_utils (0.1.
|
4
|
+
patchwork_csv_utils (0.1.23)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -56,6 +56,7 @@ GEM
|
|
56
56
|
PLATFORMS
|
57
57
|
arm64-darwin-22
|
58
58
|
arm64-darwin-23
|
59
|
+
arm64-darwin-24
|
59
60
|
x86_64-linux
|
60
61
|
|
61
62
|
DEPENDENCIES
|
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 8))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 8))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -1,21 +1,27 @@
|
|
1
|
-
use chrono::{
|
1
|
+
use chrono::{NaiveDateTime, NaiveTime, Utc};
|
2
2
|
use csv::{Reader, StringRecord, Writer};
|
3
3
|
use magnus::{Error, RArray, Ruby};
|
4
4
|
use std::collections::HashMap;
|
5
5
|
use std::fs::File;
|
6
6
|
|
7
|
-
use crate::utils::{check_mandatory_headers, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
|
7
|
+
use crate::utils::{check_mandatory_headers, correct_datetime, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
|
8
8
|
|
9
|
-
pub fn transform_csv(ruby: &Ruby,
|
10
|
-
|
9
|
+
pub fn transform_csv(ruby: &Ruby,
|
10
|
+
csv_path: String,
|
11
|
+
target_path: String,
|
12
|
+
exclusions: RArray,
|
11
13
|
mandatory_headers: RArray,
|
12
14
|
status_exclusions: RArray,
|
13
15
|
expected_trust_name: String,
|
14
|
-
is_streamed_file: bool
|
16
|
+
is_streamed_file: bool,
|
17
|
+
earliest_start_date: Option<String>) -> magnus::error::Result<()> {
|
15
18
|
if !csv_path.has_extension(&["csv"]) {
|
16
19
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
17
20
|
}
|
18
21
|
|
22
|
+
let start_date = earliest_start_date
|
23
|
+
.and_then(|date_str| string_to_datetime(&date_str));
|
24
|
+
|
19
25
|
let exclusions = RArray::to_vec(exclusions)?;
|
20
26
|
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
21
27
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
@@ -48,7 +54,8 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
48
54
|
|
49
55
|
for (ri, record) in mandatory_records.iter().enumerate() {
|
50
56
|
|
51
|
-
if
|
57
|
+
if skip_rows_before_start_date(&start_date, &record, &date) { continue; }
|
58
|
+
if skip_excluded_rows(request_id, &status, &record, &exclusions) { continue; }
|
52
59
|
if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
|
53
60
|
if has_empty_row_skip(&record) { continue; }
|
54
61
|
if has_empty_first_col_skip_row(&record) { continue; }
|
@@ -140,27 +147,36 @@ fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> St
|
|
140
147
|
column_name.to_string()
|
141
148
|
}
|
142
149
|
|
143
|
-
fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
150
|
+
fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
151
|
+
if let Some(status_index) = status {
|
152
|
+
if let Some(status) = r.get(**status_index) {
|
153
|
+
if status.eq("Recalled") {
|
154
|
+
return false
|
155
|
+
}
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
144
159
|
let value = r.get(*request_id).unwrap_or_default();
|
145
160
|
exclusions.contains(&value.to_string())
|
146
161
|
}
|
147
162
|
|
163
|
+
fn skip_rows_before_start_date(&start_date: &Option<NaiveDateTime>, r: &StringRecord, date_index: &usize) -> bool {
|
164
|
+
if let Some(start_date) = start_date {
|
165
|
+
if let Some(date_str) = r.get(*date_index) {
|
166
|
+
if let Some(date) = string_to_datetime(date_str) {
|
167
|
+
return date <= start_date;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
false
|
172
|
+
}
|
173
|
+
|
148
174
|
fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
149
175
|
status
|
150
176
|
.map(|index| exclusions.contains(&r[*index].to_string()))
|
151
177
|
.unwrap_or(false)
|
152
178
|
}
|
153
179
|
|
154
|
-
fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
155
|
-
let maybe_correct = correct_datetime(s);
|
156
|
-
if maybe_correct.is_some() { return maybe_correct; }
|
157
|
-
|
158
|
-
NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
|
159
|
-
}
|
160
|
-
|
161
|
-
fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
|
162
|
-
NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
|
163
|
-
}
|
164
180
|
|
165
181
|
fn string_to_time(s: &str) -> Option<NaiveTime> {
|
166
182
|
NaiveTime::parse_from_str(s, "%H:%M").ok()
|
@@ -4,6 +4,7 @@ use std::ffi::OsStr;
|
|
4
4
|
use std::path::Path;
|
5
5
|
use ::csv::{ByteRecord, StringRecord};
|
6
6
|
use magnus::Ruby;
|
7
|
+
use chrono::{NaiveDate, NaiveDateTime};
|
7
8
|
|
8
9
|
pub mod csv;
|
9
10
|
pub mod dedup;
|
@@ -83,3 +84,19 @@ impl<P: AsRef<Path>> FileExtension for P {
|
|
83
84
|
}
|
84
85
|
}
|
85
86
|
|
87
|
+
pub fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
88
|
+
let maybe_correct = correct_datetime(s);
|
89
|
+
if maybe_correct.is_some() { return maybe_correct; }
|
90
|
+
|
91
|
+
// Try YYYY-MM-DD format
|
92
|
+
if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
|
93
|
+
return date.and_hms_opt(0, 0, 0);
|
94
|
+
}
|
95
|
+
|
96
|
+
NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
|
97
|
+
}
|
98
|
+
|
99
|
+
pub fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
|
100
|
+
NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
|
101
|
+
}
|
102
|
+
|
@@ -2,11 +2,11 @@ use std::collections::HashMap;
|
|
2
2
|
use std::fs::File;
|
3
3
|
use std::io::{BufWriter, Write};
|
4
4
|
|
5
|
-
use calamine::{open_workbook_auto, Data, Range, Reader};
|
5
|
+
use calamine::{open_workbook_auto, Data, DataType, Range, Reader};
|
6
6
|
use chrono::{NaiveDateTime, Timelike, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
|
9
|
+
use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
@@ -14,7 +14,8 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
14
14
|
mandatory_headers: RArray,
|
15
15
|
status_exclusions: RArray,
|
16
16
|
expected_trust_name: String,
|
17
|
-
is_streamed_file: bool
|
17
|
+
is_streamed_file: bool,
|
18
|
+
earliest_start_date: Option<String>
|
18
19
|
) -> magnus::error::Result<()> {
|
19
20
|
if !xls_path.has_extension(&["xls","xlsx"]) {
|
20
21
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
|
@@ -23,6 +24,9 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
23
24
|
let exclusions = RArray::to_vec(exclusions)?;
|
24
25
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
25
26
|
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
27
|
+
|
28
|
+
let start_date = earliest_start_date
|
29
|
+
.and_then(|date_str| string_to_datetime(&date_str));
|
26
30
|
|
27
31
|
let mut workbook = open_workbook_auto(&xls_path)
|
28
32
|
.map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
|
@@ -41,7 +45,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
41
45
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
42
46
|
let mut dest = BufWriter::new(csv_out_file);
|
43
47
|
|
44
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file)
|
48
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file, start_date)
|
45
49
|
}
|
46
50
|
|
47
51
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
@@ -50,7 +54,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
50
54
|
headers_list: Vec<String>,
|
51
55
|
status_exclusions: Vec<String>,
|
52
56
|
expected_trust_name: String,
|
53
|
-
is_streamed_file: bool
|
57
|
+
is_streamed_file: bool,
|
58
|
+
start_date: Option<NaiveDateTime>) -> magnus::error::Result<()> {
|
54
59
|
let n = mandatory_headers.len() - 1;
|
55
60
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
56
61
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -66,7 +71,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
66
71
|
for (ri, r) in mandatory_rows.into_iter().enumerate() {
|
67
72
|
let mut date_value = Utc::now().naive_utc();
|
68
73
|
|
69
|
-
if
|
74
|
+
if skip_rows_before_start_date(&start_date, &r, &date) { continue; }
|
75
|
+
if skip_excluded_rows(&request_id, &status, &r, &exclusions) { continue; }
|
70
76
|
if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
|
71
77
|
if skip_empty_rows(&r) { continue; }
|
72
78
|
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
@@ -185,7 +191,17 @@ fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
|
|
185
191
|
r[*date] == &Data::Empty
|
186
192
|
}
|
187
193
|
|
188
|
-
fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
194
|
+
fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
195
|
+
if let Some(status_index) = status {
|
196
|
+
if let Some(status) = r.get(**status_index) {
|
197
|
+
if let Some(status_str) = status.as_string() {
|
198
|
+
if status_str.eq("Recalled") {
|
199
|
+
return false
|
200
|
+
}
|
201
|
+
}
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
189
205
|
let value = r[*request_id].to_string();
|
190
206
|
exclusions.contains(&value.to_string())
|
191
207
|
}
|
@@ -204,6 +220,27 @@ fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
|
|
204
220
|
r[*request_id] == &Data::Empty
|
205
221
|
}
|
206
222
|
|
223
|
+
fn skip_rows_before_start_date(start_date: &Option<NaiveDateTime>, r: &Vec<&Data>, date_index: &usize) -> bool {
|
224
|
+
if let Some(start_date) = start_date {
|
225
|
+
if let Some(date_data) = r.get(*date_index) {
|
226
|
+
match date_data {
|
227
|
+
Data::DateTime(d) => {
|
228
|
+
if let Some(date) = d.as_datetime() {
|
229
|
+
return date <= *start_date;
|
230
|
+
}
|
231
|
+
}
|
232
|
+
Data::DateTimeIso(s) => {
|
233
|
+
if let Some(date) = string_to_datetime(s) {
|
234
|
+
return date <= *start_date;
|
235
|
+
}
|
236
|
+
}
|
237
|
+
_ => {}
|
238
|
+
}
|
239
|
+
}
|
240
|
+
}
|
241
|
+
false
|
242
|
+
}
|
243
|
+
|
207
244
|
fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
|
208
245
|
NaiveDateTime::new(t1.date(), t2.time())
|
209
246
|
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- ".rspec"
|
21
21
|
- ".rubocop.yml"
|
22
|
+
- ".ruby-version"
|
22
23
|
- Cargo.lock
|
23
24
|
- Cargo.toml
|
24
25
|
- Gemfile
|