patchwork_csv_utils 0.1.16-x86_64-linux → 0.1.18-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +12 -6
- data/ext/csv_utils/src/utils/mod.rs +4 -9
- data/ext/csv_utils/src/utils/xls.rs +64 -14
- data/lib/csv_utils/2.7/csv_utils.so +0 -0
- data/lib/csv_utils/3.0/csv_utils.so +0 -0
- data/lib/csv_utils/3.1/csv_utils.so +0 -0
- data/lib/csv_utils/3.2/csv_utils.so +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c53dcc394dd22b4c32d7201914e7c9fd1d94f668365c45b5a0466bc9820e9bf2
|
4
|
+
data.tar.gz: 85f24589e760b680d325a8900c2123ce52c8602bef9ed4b3db82561238924b8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36306ea0ff3606f5325f3d97d1ef8a84e93f3879b4652574e8226781ae01aeaef79448be87edb3a305a44290581d0c8ebbb8d8c677395758750b1a0af680a5c4
|
7
|
+
data.tar.gz: b236321552dcc55a1a2d05772f0272fc21c65a707f94b4f67ebd068b948a9dd66d001097cbf965d302cc71202e8dbad67e632323a7ee68c46c4cc865965e20a4
|
data/Gemfile.lock
CHANGED
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 5))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 5))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -4,13 +4,12 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
|
4
4
|
use csv::{Reader, StringRecord, Writer};
|
5
5
|
use magnus::{Error, RArray, Ruby};
|
6
6
|
|
7
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list
|
7
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
|
8
8
|
|
9
9
|
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
10
10
|
target_path: String, exclusions: RArray,
|
11
11
|
mandatory_headers: RArray,
|
12
|
-
status_exclusions: RArray
|
13
|
-
expected_trust_name: String,) -> magnus::error::Result<()> {
|
12
|
+
status_exclusions: RArray) -> magnus::error::Result<()> {
|
14
13
|
if !csv_path.has_extension(&["csv"]) {
|
15
14
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
16
15
|
}
|
@@ -41,7 +40,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
41
40
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
42
41
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
43
42
|
let status = header_map.get("Status");
|
44
|
-
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
45
43
|
|
46
44
|
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
47
45
|
|
@@ -60,8 +58,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
60
58
|
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
61
59
|
let column_value = column_value.trim_end();
|
62
60
|
|
63
|
-
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, &column_value.to_string())?;
|
64
|
-
|
65
61
|
if i == *date {
|
66
62
|
let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
|
67
63
|
date_value = current;
|
@@ -87,6 +83,16 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
87
83
|
Ok(())
|
88
84
|
}
|
89
85
|
|
86
|
+
|
87
|
+
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
|
88
|
+
if i == *trust_name {
|
89
|
+
if s != &expected_trust_name.clone() {
|
90
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
91
|
+
}
|
92
|
+
}
|
93
|
+
Ok(())
|
94
|
+
}
|
95
|
+
|
90
96
|
fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
|
91
97
|
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
92
98
|
|
@@ -9,15 +9,6 @@ pub mod csv;
|
|
9
9
|
pub mod dedup;
|
10
10
|
pub mod xls;
|
11
11
|
|
12
|
-
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
|
13
|
-
if ri > 0 && i == *trust_name {
|
14
|
-
if s != &expected_trust_name.clone() {
|
15
|
-
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
16
|
-
}
|
17
|
-
}
|
18
|
-
Ok(())
|
19
|
-
}
|
20
|
-
|
21
12
|
fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
|
22
13
|
magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
|
23
14
|
}
|
@@ -74,6 +65,7 @@ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
|
|
74
65
|
|
75
66
|
pub trait FileExtension {
|
76
67
|
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
68
|
+
fn extension(&self) -> Option<&str>;
|
77
69
|
}
|
78
70
|
|
79
71
|
impl<P: AsRef<Path>> FileExtension for P {
|
@@ -86,5 +78,8 @@ impl<P: AsRef<Path>> FileExtension for P {
|
|
86
78
|
|
87
79
|
false
|
88
80
|
}
|
81
|
+
fn extension(&self) -> Option<&str> {
|
82
|
+
self.as_ref().extension().and_then(OsStr::to_str)
|
83
|
+
}
|
89
84
|
}
|
90
85
|
|
@@ -2,28 +2,31 @@ use std::collections::HashMap;
|
|
2
2
|
use std::fs::File;
|
3
3
|
use std::io::{BufWriter, Write};
|
4
4
|
|
5
|
-
use calamine::{Data, open_workbook, Range, Reader, Xls};
|
6
|
-
use chrono::{NaiveDateTime, Utc};
|
5
|
+
use calamine::{Data, open_workbook, Range, Reader, Xls, open_workbook_auto};
|
6
|
+
use chrono::{NaiveDateTime, Timelike, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
13
13
|
exclusions: RArray,
|
14
14
|
mandatory_headers: RArray,
|
15
15
|
status_exclusions: RArray,
|
16
|
-
expected_trust_name: String,
|
17
16
|
) -> magnus::error::Result<()> {
|
18
|
-
if !xls_path.has_extension(&["xls"]) {
|
19
|
-
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
17
|
+
if !xls_path.has_extension(&["xls","xlsx"]) {
|
18
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
|
20
19
|
}
|
21
20
|
|
22
21
|
let exclusions = RArray::to_vec(exclusions)?;
|
23
22
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
24
23
|
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
25
24
|
|
26
|
-
|
25
|
+
|
26
|
+
let mut workbook = open_workbook_auto(&xls_path)
|
27
|
+
.map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
|
28
|
+
|
29
|
+
|
27
30
|
let range = workbook.worksheet_range_at(0)
|
28
31
|
.ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
|
29
32
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
@@ -38,15 +41,14 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
38
41
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
39
42
|
let mut dest = BufWriter::new(csv_out_file);
|
40
43
|
|
41
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions
|
44
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions)
|
42
45
|
}
|
43
46
|
|
44
47
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
45
48
|
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
46
49
|
mandatory_headers: Vec<String>,
|
47
50
|
headers_list: Vec<String>,
|
48
|
-
status_exclusions: Vec<String
|
49
|
-
expected_trust_name: String) -> magnus::error::Result<()> {
|
51
|
+
status_exclusions: Vec<String>) -> magnus::error::Result<()> {
|
50
52
|
let n = mandatory_headers.len() - 1;
|
51
53
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
52
54
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -55,7 +57,6 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
55
57
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
56
58
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
57
59
|
let status = header_map.get("Status");
|
58
|
-
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
59
60
|
|
60
61
|
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
61
62
|
|
@@ -69,6 +70,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
69
70
|
if date_value_is_not_present(&date, &r) {
|
70
71
|
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
71
72
|
}
|
73
|
+
// validate_trust_name(ruby, &expected_trust_name, trust_name, ri, &r)?;
|
72
74
|
|
73
75
|
for (i, c) in mandatory_headers.iter().enumerate() {
|
74
76
|
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
@@ -76,11 +78,49 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
76
78
|
|
77
79
|
match *c {
|
78
80
|
Data::Empty => Ok(()),
|
79
|
-
Data::String(ref s) | Data::
|
80
|
-
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, s)?;
|
81
|
+
Data::String(ref s) | Data::DurationIso(ref s) => {
|
81
82
|
handle_commas(dest, s)
|
82
83
|
}
|
83
84
|
Data::Float(ref f) => write!(dest, "{}", f),
|
85
|
+
Data::DateTimeIso(ref s) => {
|
86
|
+
// Normalize the string to ensure manageable precision
|
87
|
+
let normalized_s = if s.contains('.') {
|
88
|
+
let parts: Vec<&str> = s.split('.').collect();
|
89
|
+
format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
|
90
|
+
} else {
|
91
|
+
s.to_string()
|
92
|
+
};
|
93
|
+
|
94
|
+
// Attempt to parse the normalized string as a full datetime
|
95
|
+
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
96
|
+
.or_else(|_| {
|
97
|
+
// If parsing as datetime fails, try parsing as date-only
|
98
|
+
NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
99
|
+
})
|
100
|
+
.or_else(|_| {
|
101
|
+
// If parsing as time-only fails, try parsing as time-only
|
102
|
+
NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
103
|
+
})
|
104
|
+
.map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
|
105
|
+
|
106
|
+
// Apply the same logic as for Data::DateTime
|
107
|
+
if i == *date {
|
108
|
+
date_value = current;
|
109
|
+
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
110
|
+
current = transform_time_to_datetime(date_value, current);
|
111
|
+
}
|
112
|
+
|
113
|
+
// Round up to the next second if we have any fractional seconds
|
114
|
+
let adjusted_time = if current.nanosecond() > 0 {
|
115
|
+
current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
116
|
+
} else {
|
117
|
+
current
|
118
|
+
};
|
119
|
+
|
120
|
+
// Format the output to ensure consistent precision
|
121
|
+
let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
|
122
|
+
write!(dest, "{}", formatted_output)
|
123
|
+
}
|
84
124
|
Data::DateTime(ref d) => {
|
85
125
|
let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
|
86
126
|
if i == *date {
|
@@ -103,6 +143,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
103
143
|
Ok(())
|
104
144
|
}
|
105
145
|
|
146
|
+
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, r: &Vec<&Data>) -> magnus::error::Result<()> {
|
147
|
+
if ri > 0 {
|
148
|
+
let s = r[*trust_name].to_string();
|
149
|
+
if s != expected_trust_name.clone() {
|
150
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
151
|
+
}
|
152
|
+
}
|
153
|
+
Ok(())
|
154
|
+
}
|
155
|
+
|
106
156
|
fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
|
107
157
|
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
108
158
|
|
@@ -161,7 +211,7 @@ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
|
|
161
211
|
|
162
212
|
fn clean_strings(s: &str) -> String {
|
163
213
|
s.replace("\n", " ")
|
164
|
-
.replace("\r", "
|
214
|
+
.replace("\r", "")
|
165
215
|
.replace("\"", "")
|
166
216
|
}
|
167
217
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|