patchwork_csv_utils 0.1.17-x86_64-linux → 0.1.19-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/utils/mod.rs +4 -0
- data/ext/csv_utils/src/utils/xls.rs +48 -7
- data/lib/csv_utils/2.7/csv_utils.so +0 -0
- data/lib/csv_utils/3.0/csv_utils.so +0 -0
- data/lib/csv_utils/3.1/csv_utils.so +0 -0
- data/lib/csv_utils/3.2/csv_utils.so +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8638e11585bf0de8be0e52e53723e21601d54d6d27537554d541fdf03ed84c1d
|
4
|
+
data.tar.gz: 57ab3ccfa5a762e029bf662b8a6abd11e3bf44395e21adc34b4c4efb448812cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 517c34da0b755540d78ca39ab057599e9947583f1f27d6c3853842d6a59fba8d8d3147df70f77bfb79b161f7cc633e8a22103a1fff8eb592030944157d5f430e
|
7
|
+
data.tar.gz: d8f481d34a8d950185e19c2bc772b23fa3071c9af013bc2e430295d5b6de8f084454b2b10f9b3a97ddf519e1f49c48da652add9f9a3c25d4732ef096fb68f43d
|
data/Gemfile.lock
CHANGED
@@ -65,6 +65,7 @@ fn create_header_map(headers: &Vec<String>) -> HashMap<String, usize> {
|
|
65
65
|
|
66
66
|
pub trait FileExtension {
|
67
67
|
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
68
|
+
fn extension(&self) -> Option<&str>;
|
68
69
|
}
|
69
70
|
|
70
71
|
impl<P: AsRef<Path>> FileExtension for P {
|
@@ -77,5 +78,8 @@ impl<P: AsRef<Path>> FileExtension for P {
|
|
77
78
|
|
78
79
|
false
|
79
80
|
}
|
81
|
+
fn extension(&self) -> Option<&str> {
|
82
|
+
self.as_ref().extension().and_then(OsStr::to_str)
|
83
|
+
}
|
80
84
|
}
|
81
85
|
|
@@ -2,8 +2,8 @@ use std::collections::HashMap;
|
|
2
2
|
use std::fs::File;
|
3
3
|
use std::io::{BufWriter, Write};
|
4
4
|
|
5
|
-
use calamine::{Data, open_workbook, Range, Reader, Xls};
|
6
|
-
use chrono::{NaiveDateTime, Utc};
|
5
|
+
use calamine::{Data, open_workbook, Range, Reader, Xls, open_workbook_auto};
|
6
|
+
use chrono::{NaiveDateTime, Timelike, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
9
|
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error, check_mandatory_headers, missing_value, index_of_header_in_mandatory_list};
|
@@ -15,15 +15,17 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
15
15
|
status_exclusions: RArray,
|
16
16
|
expected_trust_name: String,
|
17
17
|
) -> magnus::error::Result<()> {
|
18
|
-
if !xls_path.has_extension(&["xls"]) {
|
19
|
-
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
18
|
+
if !xls_path.has_extension(&["xls","xlsx"]) {
|
19
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls or xlsx file".to_string()));
|
20
20
|
}
|
21
21
|
|
22
22
|
let exclusions = RArray::to_vec(exclusions)?;
|
23
23
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
24
24
|
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
25
25
|
|
26
|
-
let mut workbook
|
26
|
+
let mut workbook = open_workbook_auto(&xls_path)
|
27
|
+
.map_err(|e| magnus_err(ruby, e, format!("could not open workbook: {}", xls_path).as_str()))?;
|
28
|
+
|
27
29
|
let range = workbook.worksheet_range_at(0)
|
28
30
|
.ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
|
29
31
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
@@ -77,10 +79,49 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
77
79
|
|
78
80
|
match *c {
|
79
81
|
Data::Empty => Ok(()),
|
80
|
-
Data::String(ref s) | Data::
|
82
|
+
Data::String(ref s) | Data::DurationIso(ref s) => {
|
81
83
|
handle_commas(dest, s)
|
82
84
|
}
|
83
85
|
Data::Float(ref f) => write!(dest, "{}", f),
|
86
|
+
Data::DateTimeIso(ref s) => {
|
87
|
+
// Normalize the string to ensure manageable precision
|
88
|
+
let normalized_s = if s.contains('.') {
|
89
|
+
let parts: Vec<&str> = s.split('.').collect();
|
90
|
+
format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
|
91
|
+
} else {
|
92
|
+
s.to_string()
|
93
|
+
};
|
94
|
+
|
95
|
+
// Attempt to parse the normalized string as a full datetime
|
96
|
+
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
97
|
+
.or_else(|_| {
|
98
|
+
// If parsing as datetime fails, try parsing as date-only
|
99
|
+
NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
100
|
+
})
|
101
|
+
.or_else(|_| {
|
102
|
+
// If parsing as time-only fails, try parsing as time-only
|
103
|
+
NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
104
|
+
})
|
105
|
+
.map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
|
106
|
+
|
107
|
+
// Apply the same logic as for Data::DateTime
|
108
|
+
if i == *date {
|
109
|
+
date_value = current;
|
110
|
+
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
111
|
+
current = transform_time_to_datetime(date_value, current);
|
112
|
+
}
|
113
|
+
|
114
|
+
// Round up to the next second if we have any fractional seconds
|
115
|
+
let adjusted_time = if current.nanosecond() > 0 {
|
116
|
+
current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
117
|
+
} else {
|
118
|
+
current
|
119
|
+
};
|
120
|
+
|
121
|
+
// Format the output to ensure consistent precision
|
122
|
+
let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
|
123
|
+
write!(dest, "{}", formatted_output)
|
124
|
+
}
|
84
125
|
Data::DateTime(ref d) => {
|
85
126
|
let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
|
86
127
|
if i == *date {
|
@@ -171,7 +212,7 @@ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
|
|
171
212
|
|
172
213
|
fn clean_strings(s: &str) -> String {
|
173
214
|
s.replace("\n", " ")
|
174
|
-
.replace("\r", "
|
215
|
+
.replace("\r", "")
|
175
216
|
.replace("\"", "")
|
176
217
|
}
|
177
218
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.19
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|