patchwork_csv_utils 0.1.16-x86_64-linux → 0.1.18-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +12 -6
- data/ext/csv_utils/src/utils/mod.rs +4 -9
- data/ext/csv_utils/src/utils/xls.rs +64 -14
- data/lib/csv_utils/2.7/csv_utils.so +0 -0
- data/lib/csv_utils/3.0/csv_utils.so +0 -0
- data/lib/csv_utils/3.1/csv_utils.so +0 -0
- data/lib/csv_utils/3.2/csv_utils.so +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c53dcc394dd22b4c32d7201914e7c9fd1d94f668365c45b5a0466bc9820e9bf2
|
4
|
+
data.tar.gz: 85f24589e760b680d325a8900c2123ce52c8602bef9ed4b3db82561238924b8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36306ea0ff3606f5325f3d97d1ef8a84e93f3879b4652574e8226781ae01aeaef79448be87edb3a305a44290581d0c8ebbb8d8c677395758750b1a0af680a5c4
|
7
|
+
data.tar.gz: b236321552dcc55a1a2d05772f0272fc21c65a707f94b4f67ebd068b948a9dd66d001097cbf965d302cc71202e8dbad67e632323a7ee68c46c4cc865965e20a4
|
data/Gemfile.lock
CHANGED
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 5))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 5))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -4,13 +4,12 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
|
4
4
|
use csv::{Reader, StringRecord, Writer};
|
5
5
|
use magnus::{Error, RArray, Ruby};
|
6
6
|
|
7
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list
|
7
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, create_header_map, missing_value, headers_as_byte_record, index_of_header_in_mandatory_list};
|
8
8
|
|
9
9
|
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
10
10
|
target_path: String, exclusions: RArray,
|
11
11
|
mandatory_headers: RArray,
|
12
|
-
status_exclusions: RArray
|
13
|
-
expected_trust_name: String,) -> magnus::error::Result<()> {
|
12
|
+
status_exclusions: RArray) -> magnus::error::Result<()> {
|
14
13
|
if !csv_path.has_extension(&["csv"]) {
|
15
14
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
16
15
|
}
|
@@ -41,7 +40,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
41
40
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
42
41
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
43
42
|
let status = header_map.get("Status");
|
44
|
-
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
45
43
|
|
46
44
|
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
47
45
|
|
@@ -60,8 +58,6 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
60
58
|
let column_value = record.get(*column_index).ok_or(missing_value(ruby, column))?;
|
61
59
|
let column_value = column_value.trim_end();
|
62
60
|
|
63
|
-
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, &column_value.to_string())?;
|
64
|
-
|
65
61
|
if i == *date {
|
66
62
|
let current = string_to_datetime(column_value).ok_or(to_datetime_error(ruby, column_value, ri, "Date"))?;
|
67
63
|
date_value = current;
|
@@ -87,6 +83,16 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
87
83
|
Ok(())
|
88
84
|
}
|
89
85
|
|
86
|
+
|
87
|
+
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
|
88
|
+
if i == *trust_name {
|
89
|
+
if s != &expected_trust_name.clone() {
|
90
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
91
|
+
}
|
92
|
+
}
|
93
|
+
Ok(())
|
94
|
+
}
|
95
|
+
|
90
96
|
fn get_mandatory_records(ruby: &Ruby, csv: &mut Reader<File>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<StringRecord>> {
|
91
97
|
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
92
98
|
|
@@ -9,15 +9,6 @@ pub mod csv;
|
|
9
9
|
pub mod dedup;
|
10
10
|
pub mod xls;
|
11
11
|
|
12
|
-
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, i: usize, s: &String) -> magnus::error::Result<()> {
|
13
|
-
if ri > 0 && i == *trust_name {
|
14
|
-
if s != &expected_trust_name.clone() {
|
15
|
-
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
16
|
-
}
|
17
|
-
}
|
18
|
-
Ok(())
|
19
|
-
}
|
20
|
-
|
21
12
|
fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
|
22
13
|
magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
|
23
14
|
}
|
@@ -74,6 +65,7 @@ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
|
|
74
65
|
|
75
66
|
pub trait FileExtension {
|
76
67
|
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
68
|
+
fn extension(&self) -> Option<&str>;
|
77
69
|
}
|
78
70
|
|
79
71
|
impl<P: AsRef<Path>> FileExtension for P {
|
@@ -86,5 +78,8 @@ impl<P: AsRef<Path>> FileExtension for P {
|
|
86
78
|
|
87
79
|
false
|
88
80
|
}
|
81
|
+
fn extension(&self) -> Option<&str> {
|
82
|
+
self.as_ref().extension().and_then(OsStr::to_str)
|
83
|
+
}
|
89
84
|
}
|
90
85
|
|
@@ -2,28 +2,31 @@ use std::collections::HashMap;
|
|
2
2
|
use std::fs::File;
|
3
3
|
use std::io::{BufWriter, Write};
|
4
4
|
|
5
|
-
use calamine::{Data, open_workbook, Range, Reader, Xls};
|
6
|
-
use chrono::{NaiveDateTime, Utc};
|
5
|
+
use calamine::{Data, open_workbook, Range, Reader, Xls, open_workbook_auto};
|
6
|
+
use chrono::{NaiveDateTime, Timelike, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
13
13
|
exclusions: RArray,
|
14
14
|
mandatory_headers: RArray,
|
15
15
|
status_exclusions: RArray,
|
16
|
-
expected_trust_name: String,
|
17
16
|
) -> magnus::error::Result<()> {
|
18
|
-
if !xls_path.has_extension(&["xls"]) {
|
19
|
-
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
17
|
+
if !xls_path.has_extension(&["xls","xlsx"]) {
|
18
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
|
20
19
|
}
|
21
20
|
|
22
21
|
let exclusions = RArray::to_vec(exclusions)?;
|
23
22
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
24
23
|
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
25
24
|
|
26
|
-
|
25
|
+
|
26
|
+
let mut workbook = open_workbook_auto(&xls_path)
|
27
|
+
.map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
|
28
|
+
|
29
|
+
|
27
30
|
let range = workbook.worksheet_range_at(0)
|
28
31
|
.ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
|
29
32
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
@@ -38,15 +41,14 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
38
41
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
39
42
|
let mut dest = BufWriter::new(csv_out_file);
|
40
43
|
|
41
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions
|
44
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions)
|
42
45
|
}
|
43
46
|
|
44
47
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
45
48
|
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
46
49
|
mandatory_headers: Vec<String>,
|
47
50
|
headers_list: Vec<String>,
|
48
|
-
status_exclusions: Vec<String
|
49
|
-
expected_trust_name: String) -> magnus::error::Result<()> {
|
51
|
+
status_exclusions: Vec<String>) -> magnus::error::Result<()> {
|
50
52
|
let n = mandatory_headers.len() - 1;
|
51
53
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
52
54
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -55,7 +57,6 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
55
57
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
56
58
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
57
59
|
let status = header_map.get("Status");
|
58
|
-
let trust_name = header_map.get("Trust").ok_or(missing_header(ruby, "Trust"))?;
|
59
60
|
|
60
61
|
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
61
62
|
|
@@ -69,6 +70,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
69
70
|
if date_value_is_not_present(&date, &r) {
|
70
71
|
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
71
72
|
}
|
73
|
+
// validate_trust_name(ruby, &expected_trust_name, trust_name, ri, &r)?;
|
72
74
|
|
73
75
|
for (i, c) in mandatory_headers.iter().enumerate() {
|
74
76
|
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
@@ -76,11 +78,49 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
76
78
|
|
77
79
|
match *c {
|
78
80
|
Data::Empty => Ok(()),
|
79
|
-
Data::String(ref s) | Data::
|
80
|
-
validate_trust_name(ruby, &expected_trust_name, trust_name, ri, i, s)?;
|
81
|
+
Data::String(ref s) | Data::DurationIso(ref s) => {
|
81
82
|
handle_commas(dest, s)
|
82
83
|
}
|
83
84
|
Data::Float(ref f) => write!(dest, "{}", f),
|
85
|
+
Data::DateTimeIso(ref s) => {
|
86
|
+
// Normalize the string to ensure manageable precision
|
87
|
+
let normalized_s = if s.contains('.') {
|
88
|
+
let parts: Vec<&str> = s.split('.').collect();
|
89
|
+
format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
|
90
|
+
} else {
|
91
|
+
s.to_string()
|
92
|
+
};
|
93
|
+
|
94
|
+
// Attempt to parse the normalized string as a full datetime
|
95
|
+
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
96
|
+
.or_else(|_| {
|
97
|
+
// If parsing as datetime fails, try parsing as date-only
|
98
|
+
NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
99
|
+
})
|
100
|
+
.or_else(|_| {
|
101
|
+
// If parsing as time-only fails, try parsing as time-only
|
102
|
+
NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
103
|
+
})
|
104
|
+
.map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
|
105
|
+
|
106
|
+
// Apply the same logic as for Data::DateTime
|
107
|
+
if i == *date {
|
108
|
+
date_value = current;
|
109
|
+
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
110
|
+
current = transform_time_to_datetime(date_value, current);
|
111
|
+
}
|
112
|
+
|
113
|
+
// Round up to the next second if we have any fractional seconds
|
114
|
+
let adjusted_time = if current.nanosecond() > 0 {
|
115
|
+
current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
116
|
+
} else {
|
117
|
+
current
|
118
|
+
};
|
119
|
+
|
120
|
+
// Format the output to ensure consistent precision
|
121
|
+
let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
|
122
|
+
write!(dest, "{}", formatted_output)
|
123
|
+
}
|
84
124
|
Data::DateTime(ref d) => {
|
85
125
|
let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
|
86
126
|
if i == *date {
|
@@ -103,6 +143,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
103
143
|
Ok(())
|
104
144
|
}
|
105
145
|
|
146
|
+
fn validate_trust_name(ruby: &Ruby, expected_trust_name: &String, trust_name: &usize, ri: usize, r: &Vec<&Data>) -> magnus::error::Result<()> {
|
147
|
+
if ri > 0 {
|
148
|
+
let s = r[*trust_name].to_string();
|
149
|
+
if s != expected_trust_name.clone() {
|
150
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Trust actual name: '{}' is not as expected: '{}'", s, expected_trust_name)));
|
151
|
+
}
|
152
|
+
}
|
153
|
+
Ok(())
|
154
|
+
}
|
155
|
+
|
106
156
|
fn get_mandatory_records<'a>(ruby: &Ruby, range: &'a Range<Data>, csv_header_list: &Vec<String>, mandatory_headers_list: &Vec<String>) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
|
107
157
|
let inverse_header_map: HashMap<usize, String> = csv_header_list.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
108
158
|
|
@@ -161,7 +211,7 @@ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
|
|
161
211
|
|
162
212
|
fn clean_strings(s: &str) -> String {
|
163
213
|
s.replace("\n", " ")
|
164
|
-
.replace("\r", "
|
214
|
+
.replace("\r", "")
|
165
215
|
.replace("\"", "")
|
166
216
|
}
|
167
217
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|