patchwork_csv_utils 0.1.21-aarch64-linux → 0.1.23-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aca603756d24698f85aee053ff0d3684216d2b209c246ae46d695864f9b69229
4
- data.tar.gz: 3077975203564e253de4ec9968611d58f3d486e9a16100ebe2df913fc1d5fe26
3
+ metadata.gz: 6df6491a9c17a0d4a1b59317b03b538928c08a03e4aa2391378e5d2407a86f28
4
+ data.tar.gz: 60a1ff9624bfb08ac17b92829afb6384bb509506fdf42a3ae693e96aeb2f7275
5
5
  SHA512:
6
- metadata.gz: 177c8082986e0e48ab4852f11fd082475117cd0aef6c13fbf44951ca9f6712202b641224a070f339fb288c0c264aec5d0a9a9df711c1e8e3bdc0905dc751b53b
7
- data.tar.gz: f6132a683eede976ae4788bb9126322e2de9b262d9c1ef4ac6d63b9a402a66e98d72dbe4f2269e3c40650519145903791cf4929c642a3d0e8c38f008644adbed
6
+ metadata.gz: 1e5e29a1d67ea11d7bc2086518b2955af94480a5039976c5674317e61e59bb3664aa1c8701cdbcf0d748042e191e0e4ab6290f5438ec9b9c407b476668f68a1f
7
+ data.tar.gz: 57d32644e07ec2566af7745cd042d10ccadb9efd270deb68392389a3c174fae54af39ee4666288f012b59dd294b4d124487d83531a3e1cb2ec07748fbbbfa489
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-3.0.7
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.21)
4
+ patchwork_csv_utils (0.1.23)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -56,6 +56,7 @@ GEM
56
56
  PLATFORMS
57
57
  arm64-darwin-22
58
58
  arm64-darwin-23
59
+ arm64-darwin-24
59
60
  x86_64-linux
60
61
 
61
62
  DEPENDENCIES
@@ -9,7 +9,7 @@ pub mod utils;
9
9
  fn init() -> Result<(), magnus::Error> {
10
10
  let module = define_module("CsvUtils")?;
11
11
  module.define_singleton_method("dedup", function!(dedup, 4))?;
12
- module.define_singleton_method("to_csv", function!(to_csv, 7))?;
13
- module.define_singleton_method("transform_csv", function!(transform_csv, 7))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 8))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 8))?;
14
14
  Ok(())
15
15
  }
@@ -1,21 +1,27 @@
1
- use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
1
+ use chrono::{NaiveDateTime, NaiveTime, Utc};
2
2
  use csv::{Reader, StringRecord, Writer};
3
3
  use magnus::{Error, RArray, Ruby};
4
4
  use std::collections::HashMap;
5
5
  use std::fs::File;
6
6
 
7
- use crate::utils::{check_mandatory_headers, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
7
+ use crate::utils::{check_mandatory_headers, correct_datetime, create_header_map, headers_as_byte_record, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
8
8
 
9
- pub fn transform_csv(ruby: &Ruby, csv_path: String,
10
- target_path: String, exclusions: RArray,
9
+ pub fn transform_csv(ruby: &Ruby,
10
+ csv_path: String,
11
+ target_path: String,
12
+ exclusions: RArray,
11
13
  mandatory_headers: RArray,
12
14
  status_exclusions: RArray,
13
15
  expected_trust_name: String,
14
- is_streamed_file: bool) -> magnus::error::Result<()> {
16
+ is_streamed_file: bool,
17
+ earliest_start_date: Option<String>) -> magnus::error::Result<()> {
15
18
  if !csv_path.has_extension(&["csv"]) {
16
19
  return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
17
20
  }
18
21
 
22
+ let start_date = earliest_start_date
23
+ .and_then(|date_str| string_to_datetime(&date_str));
24
+
19
25
  let exclusions = RArray::to_vec(exclusions)?;
20
26
  let status_exclusions = RArray::to_vec(status_exclusions)?;
21
27
  let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
@@ -48,7 +54,8 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
48
54
 
49
55
  for (ri, record) in mandatory_records.iter().enumerate() {
50
56
 
51
- if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
57
+ if skip_rows_before_start_date(&start_date, &record, &date) { continue; }
58
+ if skip_excluded_rows(request_id, &status, &record, &exclusions) { continue; }
52
59
  if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
53
60
  if has_empty_row_skip(&record) { continue; }
54
61
  if has_empty_first_col_skip_row(&record) { continue; }
@@ -140,27 +147,36 @@ fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> St
140
147
  column_name.to_string()
141
148
  }
142
149
 
143
- fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
150
+ fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
151
+ if let Some(status_index) = status {
152
+ if let Some(status) = r.get(**status_index) {
153
+ if status.eq("Recalled") {
154
+ return false
155
+ }
156
+ }
157
+ }
158
+
144
159
  let value = r.get(*request_id).unwrap_or_default();
145
160
  exclusions.contains(&value.to_string())
146
161
  }
147
162
 
163
+ fn skip_rows_before_start_date(&start_date: &Option<NaiveDateTime>, r: &StringRecord, date_index: &usize) -> bool {
164
+ if let Some(start_date) = start_date {
165
+ if let Some(date_str) = r.get(*date_index) {
166
+ if let Some(date) = string_to_datetime(date_str) {
167
+ return date <= start_date;
168
+ }
169
+ }
170
+ }
171
+ false
172
+ }
173
+
148
174
  fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
149
175
  status
150
176
  .map(|index| exclusions.contains(&r[*index].to_string()))
151
177
  .unwrap_or(false)
152
178
  }
153
179
 
154
- fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
155
- let maybe_correct = correct_datetime(s);
156
- if maybe_correct.is_some() { return maybe_correct; }
157
-
158
- NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
159
- }
160
-
161
- fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
162
- NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
163
- }
164
180
 
165
181
  fn string_to_time(s: &str) -> Option<NaiveTime> {
166
182
  NaiveTime::parse_from_str(s, "%H:%M").ok()
@@ -4,6 +4,7 @@ use std::ffi::OsStr;
4
4
  use std::path::Path;
5
5
  use ::csv::{ByteRecord, StringRecord};
6
6
  use magnus::Ruby;
7
+ use chrono::{NaiveDate, NaiveDateTime};
7
8
 
8
9
  pub mod csv;
9
10
  pub mod dedup;
@@ -83,3 +84,19 @@ impl<P: AsRef<Path>> FileExtension for P {
83
84
  }
84
85
  }
85
86
 
87
+ pub fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
88
+ let maybe_correct = correct_datetime(s);
89
+ if maybe_correct.is_some() { return maybe_correct; }
90
+
91
+ // Try YYYY-MM-DD format
92
+ if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
93
+ return date.and_hms_opt(0, 0, 0);
94
+ }
95
+
96
+ NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
97
+ }
98
+
99
+ pub fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
100
+ NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
101
+ }
102
+
@@ -2,11 +2,11 @@ use std::collections::HashMap;
2
2
  use std::fs::File;
3
3
  use std::io::{BufWriter, Write};
4
4
 
5
- use calamine::{open_workbook_auto, Data, Range, Reader};
5
+ use calamine::{open_workbook_auto, Data, DataType, Range, Reader};
6
6
  use chrono::{NaiveDateTime, Timelike, Utc};
7
7
  use magnus::{RArray, Ruby};
8
8
 
9
- use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, to_datetime_error, FileExtension};
9
+ use crate::utils::{check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header, missing_value, string_to_datetime, to_datetime_error, FileExtension};
10
10
 
11
11
  pub fn to_csv(ruby: &Ruby, xls_path: String,
12
12
  target_path: String,
@@ -14,7 +14,8 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
14
14
  mandatory_headers: RArray,
15
15
  status_exclusions: RArray,
16
16
  expected_trust_name: String,
17
- is_streamed_file: bool
17
+ is_streamed_file: bool,
18
+ earliest_start_date: Option<String>
18
19
  ) -> magnus::error::Result<()> {
19
20
  if !xls_path.has_extension(&["xls","xlsx"]) {
20
21
  return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
@@ -23,6 +24,9 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
23
24
  let exclusions = RArray::to_vec(exclusions)?;
24
25
  let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
25
26
  let status_exclusions = RArray::to_vec(status_exclusions)?;
27
+
28
+ let start_date = earliest_start_date
29
+ .and_then(|date_str| string_to_datetime(&date_str));
26
30
 
27
31
  let mut workbook = open_workbook_auto(&xls_path)
28
32
  .map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
@@ -41,7 +45,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
41
45
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
42
46
  let mut dest = BufWriter::new(csv_out_file);
43
47
 
44
- write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file)
48
+ write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions, expected_trust_name, is_streamed_file, start_date)
45
49
  }
46
50
 
47
51
  fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
@@ -50,7 +54,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
50
54
  headers_list: Vec<String>,
51
55
  status_exclusions: Vec<String>,
52
56
  expected_trust_name: String,
53
- is_streamed_file: bool) -> magnus::error::Result<()> {
57
+ is_streamed_file: bool,
58
+ start_date: Option<NaiveDateTime>) -> magnus::error::Result<()> {
54
59
  let n = mandatory_headers.len() - 1;
55
60
  let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
56
61
  let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
@@ -66,7 +71,8 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
66
71
  for (ri, r) in mandatory_rows.into_iter().enumerate() {
67
72
  let mut date_value = Utc::now().naive_utc();
68
73
 
69
- if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
74
+ if skip_rows_before_start_date(&start_date, &r, &date) { continue; }
75
+ if skip_excluded_rows(&request_id, &status, &r, &exclusions) { continue; }
70
76
  if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
71
77
  if skip_empty_rows(&r) { continue; }
72
78
  if skip_rows_with_no_request_id(&request_id, &r) { continue; }
@@ -185,7 +191,17 @@ fn date_value_is_not_present(date: &usize, r: &Vec<&Data>) -> bool {
185
191
  r[*date] == &Data::Empty
186
192
  }
187
193
 
188
- fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
194
+ fn skip_excluded_rows(request_id: &usize, status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
195
+ if let Some(status_index) = status {
196
+ if let Some(status) = r.get(**status_index) {
197
+ if let Some(status_str) = status.as_string() {
198
+ if status_str.eq("Recalled") {
199
+ return false
200
+ }
201
+ }
202
+ }
203
+ }
204
+
189
205
  let value = r[*request_id].to_string();
190
206
  exclusions.contains(&value.to_string())
191
207
  }
@@ -204,6 +220,27 @@ fn skip_rows_with_no_request_id(request_id: &usize, r: &Vec<&Data>) -> bool {
204
220
  r[*request_id] == &Data::Empty
205
221
  }
206
222
 
223
+ fn skip_rows_before_start_date(start_date: &Option<NaiveDateTime>, r: &Vec<&Data>, date_index: &usize) -> bool {
224
+ if let Some(start_date) = start_date {
225
+ if let Some(date_data) = r.get(*date_index) {
226
+ match date_data {
227
+ Data::DateTime(d) => {
228
+ if let Some(date) = d.as_datetime() {
229
+ return date <= *start_date;
230
+ }
231
+ }
232
+ Data::DateTimeIso(s) => {
233
+ if let Some(date) = string_to_datetime(s) {
234
+ return date <= *start_date;
235
+ }
236
+ }
237
+ _ => {}
238
+ }
239
+ }
240
+ }
241
+ false
242
+ }
243
+
207
244
  fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
208
245
  NaiveDateTime::new(t1.date(), t2.time())
209
246
  }
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.21'
4
+ VERSION = '0.1.23'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.21
4
+ version: 0.1.23
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-04-14 00:00:00.000000000 Z
11
+ date: 2025-07-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email:
@@ -19,6 +19,7 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - ".rspec"
21
21
  - ".rubocop.yml"
22
+ - ".ruby-version"
22
23
  - Cargo.lock
23
24
  - Cargo.toml
24
25
  - Gemfile