patchwork_csv_utils 0.1.7-x86_64-darwin → 0.1.9-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/ext/csv_utils/src/utils/csv.rs +31 -10
- data/ext/csv_utils/src/utils/mod.rs +4 -0
- data/ext/csv_utils/src/utils/xls.rs +9 -2
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e212bd3e73eeacfee9ebfcb560843920d212939254bfff6ffef90ca3a36fe42
|
4
|
+
data.tar.gz: a22f483c32e99467a020e3ff9a64809fbf7d9946974da06f565f27dc27b46c9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d731b4b50a025bee35941e48a5f5bdc4f57c0a829c69daa66610f08b354370a52502f12a2c28aab85260dc05b02d74d421a48f8d661a0cc9914546f9ed01d86
|
7
|
+
data.tar.gz: b46e0f9280ee98c6f4439dff004c0e9716ae61cbae2d615e1d0b81adb4bdd3b489b460af42cf4b436c315dfd8347dfb5069bafbb43117c678343002123165a0f
|
data/Gemfile.lock
CHANGED
@@ -3,9 +3,9 @@ use std::fs::File;
|
|
3
3
|
|
4
4
|
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
5
5
|
use csv::{StringRecord, Writer};
|
6
|
-
use magnus::{RArray, Ruby};
|
6
|
+
use magnus::{Error, RArray, Ruby};
|
7
7
|
|
8
|
-
use crate::utils::{FileExtension, magnus_err, missing_header};
|
8
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
|
9
9
|
|
10
10
|
pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
|
11
11
|
if !csv_path.has_extension(&["csv"]) {
|
@@ -19,6 +19,7 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
19
19
|
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
20
20
|
let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
|
21
21
|
let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
|
22
|
+
let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
|
22
23
|
|
23
24
|
wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
|
24
25
|
|
@@ -41,13 +42,13 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
41
42
|
let record = record.iter().enumerate().map(|(i, c)| {
|
42
43
|
let c = c.trim_end();
|
43
44
|
if i == *date {
|
44
|
-
let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri,
|
45
|
+
let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
|
45
46
|
date_value = current;
|
46
47
|
Ok(current.to_string())
|
47
48
|
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
48
|
-
|
49
|
-
let
|
50
|
-
|
49
|
+
if c.is_empty() { return Ok(c.to_string()); }
|
50
|
+
let column_name = get_column_name(&inverse_header_map, &i);
|
51
|
+
process_datetime(ruby, ri, date_value, c, &column_name)
|
51
52
|
} else {
|
52
53
|
Ok(c.to_string())
|
53
54
|
}
|
@@ -62,15 +63,39 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusi
|
|
62
63
|
Ok(())
|
63
64
|
}
|
64
65
|
|
66
|
+
fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
|
67
|
+
let maybe_correct = correct_datetime(c);
|
68
|
+
if let Some(correct) = maybe_correct {
|
69
|
+
return Ok(correct.to_string());
|
70
|
+
}
|
71
|
+
|
72
|
+
let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
|
73
|
+
let datetime = transform_time_to_datetime(date_value, current_time);
|
74
|
+
Ok(datetime.to_string())
|
75
|
+
}
|
76
|
+
|
77
|
+
fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> String {
|
78
|
+
let unknown = "Unknown".to_string();
|
79
|
+
let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
|
80
|
+
column_name.to_string()
|
81
|
+
}
|
82
|
+
|
65
83
|
fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
66
84
|
let value = r.get(*request_id).unwrap_or_default();
|
67
85
|
exclusions.contains(&value.to_string())
|
68
86
|
}
|
69
87
|
|
70
88
|
fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
89
|
+
let maybe_correct = correct_datetime(s);
|
90
|
+
if maybe_correct.is_some() { return maybe_correct; }
|
91
|
+
|
71
92
|
NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
|
72
93
|
}
|
73
94
|
|
95
|
+
fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
|
96
|
+
NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
|
97
|
+
}
|
98
|
+
|
74
99
|
fn string_to_time(s: &str) -> Option<NaiveTime> {
|
75
100
|
NaiveTime::parse_from_str(s, "%H:%M").ok()
|
76
101
|
}
|
@@ -79,10 +104,6 @@ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime
|
|
79
104
|
NaiveDateTime::new(t1.date(), t2)
|
80
105
|
}
|
81
106
|
|
82
|
-
fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: usize) -> magnus::Error {
|
83
|
-
magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
|
84
|
-
}
|
85
|
-
|
86
107
|
fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
|
87
108
|
record[0].is_empty()
|
88
109
|
}
|
@@ -15,6 +15,10 @@ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
|
|
15
15
|
magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
|
16
16
|
}
|
17
17
|
|
18
|
+
fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
|
19
|
+
magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
|
20
|
+
}
|
21
|
+
|
18
22
|
pub trait FileExtension {
|
19
23
|
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
20
24
|
}
|
@@ -6,7 +6,7 @@ use calamine::{Data, open_workbook, Range, Reader, Xls};
|
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
7
|
use magnus::{RArray, Ruby};
|
8
8
|
|
9
|
-
use crate::utils::{FileExtension, magnus_err, missing_header};
|
9
|
+
use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
|
10
10
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
|
12
12
|
if !xls_path.has_extension(&["xls"]) {
|
@@ -44,6 +44,9 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
44
44
|
if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
|
45
45
|
if skip_empty_rows(r) { continue; }
|
46
46
|
if skip_rows_with_no_request_id(&request_id, r) { continue; }
|
47
|
+
if date_value_is_not_present(&date, r) {
|
48
|
+
return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
|
49
|
+
}
|
47
50
|
|
48
51
|
for (i, c) in r.iter().enumerate() {
|
49
52
|
match *c {
|
@@ -53,7 +56,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
53
56
|
}
|
54
57
|
Data::Float(ref f) => write!(dest, "{}", f),
|
55
58
|
Data::DateTime(ref d) => {
|
56
|
-
let mut current = d.as_datetime().
|
59
|
+
let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
|
57
60
|
if i == *date {
|
58
61
|
date_value = current;
|
59
62
|
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
@@ -74,6 +77,10 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
74
77
|
Ok(())
|
75
78
|
}
|
76
79
|
|
80
|
+
fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
|
81
|
+
r[*date] == Data::Empty
|
82
|
+
}
|
83
|
+
|
77
84
|
fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
|
78
85
|
let value = r[*request_id].to_string();
|
79
86
|
exclusions.contains(&value.to_string())
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|