patchwork_csv_utils 0.1.7-x86_64-darwin → 0.1.9-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/ext/csv_utils/src/utils/csv.rs +31 -10
- data/ext/csv_utils/src/utils/mod.rs +4 -0
- data/ext/csv_utils/src/utils/xls.rs +9 -2
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e212bd3e73eeacfee9ebfcb560843920d212939254bfff6ffef90ca3a36fe42
|
4
|
+
data.tar.gz: a22f483c32e99467a020e3ff9a64809fbf7d9946974da06f565f27dc27b46c9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d731b4b50a025bee35941e48a5f5bdc4f57c0a829c69daa66610f08b354370a52502f12a2c28aab85260dc05b02d74d421a48f8d661a0cc9914546f9ed01d86
|
7
|
+
data.tar.gz: b46e0f9280ee98c6f4439dff004c0e9716ae61cbae2d615e1d0b81adb4bdd3b489b460af42cf4b436c315dfd8347dfb5069bafbb43117c678343002123165a0f
|
data/Gemfile.lock
CHANGED
@@ -3,9 +3,9 @@ use std::fs::File;
|
|
3
3
|
|
4
4
|
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
5
5
|
use csv::{StringRecord, Writer};
|
6
|
-
use magnus::{RArray, Ruby};
|
6
|
+
use magnus::{Error, RArray, Ruby};
|
7
7
|
|
8
|
-
use crate::utils::{FileExtension, magnus_err, missing_header};
|
8
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
|
9
9
|
|
10
10
|
pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
|
11
11
|
if !csv_path.has_extension(&["csv"]) {
|
@@ -19,6 +19,7 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
19
19
|
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
20
20
|
let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
|
21
21
|
let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
|
22
|
+
let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
22
23
|
|
23
24
|
wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
24
25
|
|
@@ -41,13 +42,13 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
41
42
|
let record = record.iter().enumerate().map(|(i, c)| {
|
42
43
|
let c = c.trim_end();
|
43
44
|
if i == *date {
|
44
|
-
let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri,
|
45
|
+
let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
|
45
46
|
date_value = current;
|
46
47
|
Ok(current.to_string())
|
47
48
|
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
48
|
-
|
49
|
-
let
|
50
|
-
|
49
|
+
if c.is_empty() { return Ok(c.to_string()); }
|
50
|
+
let column_name = get_column_name(&inverse_header_map, &i);
|
51
|
+
process_datetime(ruby, ri, date_value, c, &column_name)
|
51
52
|
} else {
|
52
53
|
Ok(c.to_string())
|
53
54
|
}
|
@@ -62,15 +63,39 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
62
63
|
Ok(())
|
63
64
|
}
|
64
65
|
|
66
|
+
fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
|
67
|
+
let maybe_correct = correct_datetime(c);
|
68
|
+
if let Some(correct) = maybe_correct {
|
69
|
+
return Ok(correct.to_string());
|
70
|
+
}
|
71
|
+
|
72
|
+
let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
|
73
|
+
let datetime = transform_time_to_datetime(date_value, current_time);
|
74
|
+
Ok(datetime.to_string())
|
75
|
+
}
|
76
|
+
|
77
|
+
fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> String {
|
78
|
+
let unknown = "Unknown".to_string();
|
79
|
+
let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
|
80
|
+
column_name.to_string()
|
81
|
+
}
|
82
|
+
|
65
83
|
fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
66
84
|
let value = r.get(*request_id).unwrap_or_default();
|
67
85
|
exclusions.contains(&value.to_string())
|
68
86
|
}
|
69
87
|
|
70
88
|
fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
89
|
+
let maybe_correct = correct_datetime(s);
|
90
|
+
if maybe_correct.is_some() { return maybe_correct; }
|
91
|
+
|
71
92
|
NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
|
72
93
|
}
|
73
94
|
|
95
|
+
fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
|
96
|
+
NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
|
97
|
+
}
|
98
|
+
|
74
99
|
fn string_to_time(s: &str) -> Option<NaiveTime> {
|
75
100
|
NaiveTime::parse_from_str(s, "%H:%M").ok()
|
76
101
|
}
|
@@ -79,10 +104,6 @@ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime
|
|
79
104
|
NaiveDateTime::new(t1.date(), t2)
|
80
105
|
}
|
81
106
|
|
82
|
-
fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: usize) -> magnus::Error {
|
83
|
-
magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
|
84
|
-
}
|
85
|
-
|
86
107
|
fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
|
87
108
|
record[0].is_empty()
|
88
109
|
}
|
@@ -15,6 +15,10 @@ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
|
|
15
15
|
magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
|
16
16
|
}
|
17
17
|
|
18
|
+
fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
|
19
|
+
magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
|
20
|
+
}
|
21
|
+
|
18
22
|
pub trait FileExtension {
|
19
23
|
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
20
24
|
}
|
@@ -6,7 +6,7 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
|
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header};
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
|
12
12
|
if !xls_path.has_extension(&["xls"]) {
|
@@ -44,6 +44,9 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
44
44
|
if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
|
45
45
|
if skip_empty_rows(r) { continue; }
|
46
46
|
if skip_rows_with_no_request_id(&request_id, r) { continue; }
|
47
|
+
if date_value_is_not_present(&date, r) {
|
48
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
49
|
+
}
|
47
50
|
|
48
51
|
for (i, c) in r.iter().enumerate() {
|
49
52
|
match *c {
|
@@ -53,7 +56,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
53
56
|
}
|
54
57
|
Data::Float(ref f) => write!(dest, "{}", f),
|
55
58
|
Data::DateTime(ref d) => {
|
56
|
-
let mut current = d.as_datetime().
|
59
|
+
let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
|
57
60
|
if i == *date {
|
58
61
|
date_value = current;
|
59
62
|
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
@@ -74,6 +77,10 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
74
77
|
Ok(())
|
75
78
|
}
|
76
79
|
|
80
|
+
fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
|
81
|
+
r[*date] == Data::Empty
|
82
|
+
}
|
83
|
+
|
77
84
|
fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
|
78
85
|
let value = r[*request_id].to_string();
|
79
86
|
exclusions.contains(&value.to_string())
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|