patchwork_csv_utils 0.1.22-x86_64-darwin → 0.1.24-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +238 -278
- data/Gemfile +1 -1
- data/Gemfile.lock +5 -3
- data/ext/csv_utils/Cargo.toml +5 -4
- data/ext/csv_utils/src/lib.rs +5 -5
- data/ext/csv_utils/src/utils/csv.rs +160 -167
- data/ext/csv_utils/src/utils/dedup.rs +102 -67
- data/ext/csv_utils/src/utils/mod.rs +81 -17
- data/ext/csv_utils/src/utils/shared/datetime.rs +79 -0
- data/ext/csv_utils/src/utils/shared/filters.rs +130 -0
- data/ext/csv_utils/src/utils/shared/mod.rs +4 -0
- data/ext/csv_utils/src/utils/shared/types.rs +97 -0
- data/ext/csv_utils/src/utils/shared/validation.rs +34 -0
- data/ext/csv_utils/src/utils/xls.rs +272 -184
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +8 -3
|
@@ -2,243 +2,331 @@ use std::collections::HashMap;
|
|
|
2
2
|
use std::fs::File;
|
|
3
3
|
use std::io::{BufWriter, Write};
|
|
4
4
|
|
|
5
|
-
use calamine::{open_workbook_auto, Data,
|
|
5
|
+
use calamine::{open_workbook_auto, Data, ExcelDateTime, Range, Reader};
|
|
6
6
|
use chrono::{NaiveDateTime, Timelike, Utc};
|
|
7
7
|
use magnus::{RArray, Ruby};
|
|
8
8
|
|
|
9
|
-
use crate::utils::
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
9
|
+
use crate::utils::shared::datetime::DateTimeProcessor;
|
|
10
|
+
use crate::utils::shared::filters::RowFilters;
|
|
11
|
+
use crate::utils::shared::types::{HeaderConfig, MandatoryColumn, ProcessingConfig};
|
|
12
|
+
use crate::utils::shared::validation::TrustValidator;
|
|
13
|
+
use crate::utils::{
|
|
14
|
+
check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header,
|
|
15
|
+
missing_value, FileExtension,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
#[allow(clippy::too_many_arguments)]
|
|
19
|
+
pub fn to_csv(
|
|
20
|
+
ruby: &Ruby,
|
|
21
|
+
xls_path: String,
|
|
22
|
+
target_path: String,
|
|
23
|
+
exclusions: RArray,
|
|
24
|
+
mandatory_headers: RArray,
|
|
25
|
+
status_exclusions: RArray,
|
|
26
|
+
expected_trust_name: String,
|
|
27
|
+
is_streamed_file: bool,
|
|
28
|
+
earliest_start_date: Option<String>,
|
|
18
29
|
) -> magnus::error::Result<()> {
|
|
19
|
-
if !xls_path.has_extension(&["xls","xlsx"]) {
|
|
20
|
-
return Err(magnus::Error::new(
|
|
30
|
+
if !xls_path.has_extension(&["xls", "xlsx"]) {
|
|
31
|
+
return Err(magnus::Error::new(
|
|
32
|
+
ruby.exception_standard_error(),
|
|
33
|
+
"xls_path must be an xls or xlsx file".to_string(),
|
|
34
|
+
));
|
|
21
35
|
}
|
|
22
36
|
|
|
23
|
-
let
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
let config = ProcessingConfig::from_ruby(
|
|
38
|
+
exclusions,
|
|
39
|
+
mandatory_headers,
|
|
40
|
+
status_exclusions,
|
|
41
|
+
expected_trust_name,
|
|
42
|
+
is_streamed_file,
|
|
43
|
+
earliest_start_date,
|
|
44
|
+
)?;
|
|
45
|
+
|
|
46
|
+
let mut workbook = open_workbook_auto(&xls_path).map_err(|e| {
|
|
47
|
+
magnus_err(
|
|
48
|
+
ruby,
|
|
49
|
+
e,
|
|
50
|
+
format!("could not open workbook: {}", xls_path).as_str(),
|
|
51
|
+
)
|
|
52
|
+
})?;
|
|
53
|
+
|
|
54
|
+
let range = workbook
|
|
55
|
+
.worksheet_range_at(0)
|
|
56
|
+
.ok_or_else(|| {
|
|
57
|
+
magnus::Error::new(
|
|
58
|
+
ruby.exception_standard_error(),
|
|
59
|
+
"no worksheet found in xls".to_string(),
|
|
60
|
+
)
|
|
61
|
+
})
|
|
32
62
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
|
33
63
|
|
|
34
|
-
let headers = range.headers().
|
|
64
|
+
let headers = range.headers().ok_or_else(|| {
|
|
65
|
+
magnus::Error::new(
|
|
66
|
+
ruby.exception_standard_error(),
|
|
67
|
+
"no headers found in xls".to_string(),
|
|
68
|
+
)
|
|
69
|
+
})?;
|
|
35
70
|
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
|
36
71
|
|
|
37
72
|
if let Some(value) =
|
|
38
|
-
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv")
|
|
73
|
+
check_mandatory_headers(ruby, &headers_list, &config.mandatory_headers, "csv")
|
|
74
|
+
{
|
|
75
|
+
return value;
|
|
76
|
+
}
|
|
39
77
|
|
|
40
|
-
let
|
|
41
|
-
|
|
78
|
+
let csv_out_file = File::create(&target_path).map_err(|e| {
|
|
79
|
+
magnus_err(
|
|
80
|
+
ruby,
|
|
81
|
+
e,
|
|
82
|
+
format!("could not create csv file: {}", target_path).as_str(),
|
|
83
|
+
)
|
|
84
|
+
})?;
|
|
42
85
|
let mut dest = BufWriter::new(csv_out_file);
|
|
43
86
|
|
|
44
|
-
write_csv(ruby, &mut dest, &range,
|
|
87
|
+
write_csv(ruby, &mut dest, &range, config, headers_list)
|
|
45
88
|
}
|
|
46
89
|
|
|
47
|
-
fn write_csv<W: Write>(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
let n = mandatory_headers.len() - 1;
|
|
55
|
-
let
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
let
|
|
63
|
-
|
|
64
|
-
|
|
90
|
+
fn write_csv<W: Write>(
|
|
91
|
+
ruby: &Ruby,
|
|
92
|
+
dest: &mut W,
|
|
93
|
+
range: &Range<Data>,
|
|
94
|
+
config: ProcessingConfig,
|
|
95
|
+
headers_list: Vec<String>,
|
|
96
|
+
) -> magnus::error::Result<()> {
|
|
97
|
+
let n = config.mandatory_headers.len() - 1;
|
|
98
|
+
let header_map: HashMap<String, usize> = config
|
|
99
|
+
.mandatory_headers
|
|
100
|
+
.iter()
|
|
101
|
+
.enumerate()
|
|
102
|
+
.map(|(i, h)| (h.to_string(), i))
|
|
103
|
+
.collect();
|
|
104
|
+
|
|
105
|
+
let header_config = HeaderConfig::from_header_map(&header_map, ruby)?;
|
|
106
|
+
let filters = RowFilters::new(
|
|
107
|
+
config.exclusions,
|
|
108
|
+
config.status_exclusions,
|
|
109
|
+
config.earliest_start_date,
|
|
110
|
+
);
|
|
111
|
+
let trust_validator = TrustValidator::new(config.expected_trust_name, config.is_streamed_file);
|
|
112
|
+
|
|
113
|
+
let mandatory_rows =
|
|
114
|
+
get_mandatory_records(ruby, range, &headers_list, &config.mandatory_headers)?;
|
|
65
115
|
|
|
66
116
|
for (ri, r) in mandatory_rows.into_iter().enumerate() {
|
|
67
|
-
|
|
117
|
+
if filters.should_skip(
|
|
118
|
+
&r,
|
|
119
|
+
header_config.request_id,
|
|
120
|
+
header_config.status,
|
|
121
|
+
header_config.date,
|
|
122
|
+
) {
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
68
125
|
|
|
69
|
-
if
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
126
|
+
if r.get(header_config.date)
|
|
127
|
+
.is_none_or(|d| matches!(d, &Data::Empty))
|
|
128
|
+
{
|
|
129
|
+
return Err(magnus::Error::new(
|
|
130
|
+
ruby.exception_standard_error(),
|
|
131
|
+
format!("Date value is not present in row: {}", ri),
|
|
132
|
+
));
|
|
75
133
|
}
|
|
76
134
|
|
|
77
|
-
if
|
|
78
|
-
|
|
135
|
+
if ri > 0 {
|
|
136
|
+
if let Some(trust_data) = r.get(header_config.trust_name) {
|
|
137
|
+
trust_validator.validate(ruby, &trust_data.to_string())?;
|
|
138
|
+
}
|
|
79
139
|
}
|
|
80
140
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
141
|
+
let mut date_value = Utc::now().naive_utc();
|
|
142
|
+
|
|
143
|
+
for (i, c) in config.mandatory_headers.iter().enumerate() {
|
|
144
|
+
let column_index = *header_map.get(c).ok_or_else(|| missing_header(ruby, c))?;
|
|
145
|
+
let c = r.get(column_index).ok_or_else(|| missing_value(ruby, c))?;
|
|
84
146
|
|
|
85
147
|
match *c {
|
|
86
148
|
Data::Empty => Ok(()),
|
|
87
|
-
Data::String(ref s) | Data::DurationIso(ref s) =>
|
|
88
|
-
handle_commas(dest, s)
|
|
89
|
-
}
|
|
149
|
+
Data::String(ref s) | Data::DurationIso(ref s) => handle_commas(dest, s),
|
|
90
150
|
Data::Float(ref f) => write!(dest, "{}", f),
|
|
91
151
|
Data::DateTimeIso(ref s) => {
|
|
92
|
-
|
|
93
|
-
let normalized_s = if s.contains('.') {
|
|
94
|
-
let parts: Vec<&str> = s.split('.').collect();
|
|
95
|
-
format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
|
|
96
|
-
} else {
|
|
97
|
-
s.to_string()
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
// Attempt to parse the normalized string as a full datetime
|
|
101
|
-
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
|
102
|
-
.or_else(|_| {
|
|
103
|
-
// If parsing as datetime fails, try parsing as date-only
|
|
104
|
-
NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
|
105
|
-
})
|
|
106
|
-
.or_else(|_| {
|
|
107
|
-
// If parsing as time-only fails, try parsing as time-only
|
|
108
|
-
NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
|
109
|
-
})
|
|
110
|
-
.map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
|
|
111
|
-
|
|
112
|
-
// Apply the same logic as for Data::DateTime
|
|
113
|
-
if i == *date {
|
|
114
|
-
date_value = current;
|
|
115
|
-
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
|
116
|
-
current = transform_time_to_datetime(date_value, current);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Round up to the next second if we have any fractional seconds
|
|
120
|
-
let adjusted_time = if current.nanosecond() > 0 {
|
|
121
|
-
current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
|
122
|
-
} else {
|
|
123
|
-
current
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
// Format the output to ensure consistent precision
|
|
127
|
-
let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
|
|
128
|
-
write!(dest, "{}", formatted_output)
|
|
152
|
+
handle_datetime_iso(ruby, dest, s, ri, i, &header_config, &mut date_value)
|
|
129
153
|
}
|
|
130
154
|
Data::DateTime(ref d) => {
|
|
131
|
-
|
|
132
|
-
if i == *date {
|
|
133
|
-
date_value = current;
|
|
134
|
-
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
|
135
|
-
current = transform_time_to_datetime(date_value, current);
|
|
136
|
-
}
|
|
137
|
-
write!(dest, "{}", current)
|
|
155
|
+
handle_datetime(dest, *d, ri, i, &header_config, &mut date_value)
|
|
138
156
|
}
|
|
139
157
|
Data::Int(ref i) => write!(dest, "{}", i),
|
|
140
158
|
Data::Error(ref e) => write!(dest, "{:?}", e),
|
|
141
159
|
Data::Bool(ref b) => write!(dest, "{}", b),
|
|
142
|
-
}.map_err(|e| magnus_err(ruby, e, format!("error writing xls row: {}, column: {}", ri, i).as_str()))?;
|
|
143
|
-
if i != n {
|
|
144
|
-
write!(dest, ",").map_err(|e| magnus_err(ruby, e, format!("error writing csv comma for row: {}, column: {}", ri, i).as_str()))?;
|
|
145
160
|
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}
|
|
161
|
+
.map_err(|e| {
|
|
162
|
+
magnus_err(
|
|
163
|
+
ruby,
|
|
164
|
+
e,
|
|
165
|
+
format!("error writing xls row: {}, column: {}", ri, i).as_str(),
|
|
166
|
+
)
|
|
167
|
+
})?;
|
|
151
168
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
169
|
+
if i != n {
|
|
170
|
+
write!(dest, ",").map_err(|e| {
|
|
171
|
+
magnus_err(
|
|
172
|
+
ruby,
|
|
173
|
+
e,
|
|
174
|
+
format!("error writing csv comma for row: {}, column: {}", ri, i).as_str(),
|
|
175
|
+
)
|
|
176
|
+
})?;
|
|
177
|
+
}
|
|
158
178
|
}
|
|
179
|
+
write!(dest, "\r\n").map_err(|e| {
|
|
180
|
+
magnus_err(
|
|
181
|
+
ruby,
|
|
182
|
+
e,
|
|
183
|
+
format!("error writing end of line for row: {}", ri).as_str(),
|
|
184
|
+
)
|
|
185
|
+
})?;
|
|
159
186
|
}
|
|
160
187
|
Ok(())
|
|
161
188
|
}
|
|
162
189
|
|
|
163
|
-
fn
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
190
|
+
fn handle_datetime_iso<W: Write>(
|
|
191
|
+
_ruby: &Ruby,
|
|
192
|
+
dest: &mut W,
|
|
193
|
+
s: &str,
|
|
194
|
+
ri: usize,
|
|
195
|
+
i: usize,
|
|
196
|
+
header_config: &HeaderConfig,
|
|
197
|
+
date_value: &mut NaiveDateTime,
|
|
198
|
+
) -> std::io::Result<()> {
|
|
199
|
+
let normalized_s = if s.contains('.') {
|
|
200
|
+
let parts: Vec<&str> = s.split('.').collect();
|
|
201
|
+
format!(
|
|
202
|
+
"{}.{}",
|
|
203
|
+
parts[0],
|
|
204
|
+
&parts[1][..std::cmp::min(parts[1].len(), 6)]
|
|
205
|
+
)
|
|
206
|
+
} else {
|
|
207
|
+
s.to_string()
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
|
211
|
+
.or_else(|_| {
|
|
212
|
+
NaiveDateTime::parse_from_str(
|
|
213
|
+
&format!("{}T00:00:00", normalized_s),
|
|
214
|
+
"%Y-%m-%dT%H:%M:%S%.f",
|
|
215
|
+
)
|
|
216
|
+
})
|
|
217
|
+
.or_else(|_| {
|
|
218
|
+
NaiveDateTime::parse_from_str(
|
|
219
|
+
&format!("1970-01-01T{}", normalized_s),
|
|
220
|
+
"%Y-%m-%dT%H:%M:%S%.f",
|
|
221
|
+
)
|
|
222
|
+
})
|
|
223
|
+
.map_err(|_| {
|
|
224
|
+
std::io::Error::new(
|
|
225
|
+
std::io::ErrorKind::InvalidData,
|
|
226
|
+
format!(
|
|
227
|
+
"Could not parse datetime '{}', row: {}, col: Date or Time",
|
|
228
|
+
s, ri
|
|
229
|
+
),
|
|
230
|
+
)
|
|
231
|
+
})?;
|
|
232
|
+
|
|
233
|
+
if header_config.is_date_column(i) {
|
|
234
|
+
*date_value = current;
|
|
235
|
+
} else if header_config.is_time_column(i) {
|
|
236
|
+
current = DateTimeProcessor::new(*date_value).combine_datetime_parts(current);
|
|
179
237
|
}
|
|
180
238
|
|
|
181
|
-
|
|
182
|
-
|
|
239
|
+
let adjusted_time = if current.nanosecond() > 0 {
|
|
240
|
+
current + chrono::Duration::seconds(1)
|
|
241
|
+
- chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
|
242
|
+
} else {
|
|
243
|
+
current
|
|
244
|
+
};
|
|
183
245
|
|
|
184
|
-
|
|
185
|
-
r[*date] == &Data::Empty
|
|
246
|
+
write!(dest, "{}", adjusted_time.format("%Y-%m-%d %H:%M:%S"))
|
|
186
247
|
}
|
|
187
248
|
|
|
188
|
-
fn
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
249
|
+
fn handle_datetime<W: Write>(
|
|
250
|
+
dest: &mut W,
|
|
251
|
+
d: ExcelDateTime,
|
|
252
|
+
_ri: usize,
|
|
253
|
+
i: usize,
|
|
254
|
+
header_config: &HeaderConfig,
|
|
255
|
+
date_value: &mut NaiveDateTime,
|
|
256
|
+
) -> std::io::Result<()> {
|
|
257
|
+
let mut current = d.as_datetime().ok_or_else(|| {
|
|
258
|
+
std::io::Error::new(
|
|
259
|
+
std::io::ErrorKind::InvalidData,
|
|
260
|
+
format!(
|
|
261
|
+
"Could not parse datetime '{:?}', row: {}, col: Date",
|
|
262
|
+
d, _ri
|
|
263
|
+
),
|
|
264
|
+
)
|
|
265
|
+
})?;
|
|
266
|
+
|
|
267
|
+
if header_config.is_date_column(i) {
|
|
268
|
+
*date_value = current;
|
|
269
|
+
} else if header_config.is_time_column(i) {
|
|
270
|
+
current = DateTimeProcessor::new(*date_value).combine_datetime_parts(current);
|
|
197
271
|
}
|
|
198
272
|
|
|
199
|
-
|
|
200
|
-
exclusions.contains(&value.to_string())
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
fn skip_excluded_status_rows(status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
|
204
|
-
status
|
|
205
|
-
.map(|index| exclusions.contains(&r[*index].to_string()))
|
|
206
|
-
.unwrap_or(false)
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
|
210
|
-
r.into_iter().all(|c| c == &&Data::Empty)
|
|
273
|
+
write!(dest, "{}", current)
|
|
211
274
|
}
|
|
212
275
|
|
|
213
|
-
fn
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
276
|
+
fn get_mandatory_records<'a>(
|
|
277
|
+
_ruby: &Ruby,
|
|
278
|
+
range: &'a Range<Data>,
|
|
279
|
+
csv_header_list: &[String],
|
|
280
|
+
mandatory_headers_list: &[String],
|
|
281
|
+
) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
|
|
282
|
+
let inverse_header_map: HashMap<usize, String> = csv_header_list
|
|
283
|
+
.iter()
|
|
284
|
+
.enumerate()
|
|
285
|
+
.map(|(i, h)| (i, h.to_string()))
|
|
286
|
+
.collect();
|
|
287
|
+
|
|
288
|
+
range
|
|
289
|
+
.rows()
|
|
290
|
+
.map(|row| {
|
|
291
|
+
let mut columns: Vec<MandatoryColumn<&Data>> = row
|
|
292
|
+
.iter()
|
|
293
|
+
.enumerate()
|
|
294
|
+
.filter_map(|(i, column_value)| {
|
|
295
|
+
inverse_header_map.get(&i).and_then(|column_name| {
|
|
296
|
+
if mandatory_headers_list.contains(column_name) {
|
|
297
|
+
index_of_header_in_mandatory_list(
|
|
298
|
+
mandatory_headers_list.to_vec(),
|
|
299
|
+
column_name.to_string(),
|
|
300
|
+
)
|
|
301
|
+
.map(|index| MandatoryColumn::new(column_value, index))
|
|
302
|
+
} else {
|
|
303
|
+
None
|
|
304
|
+
}
|
|
305
|
+
})
|
|
306
|
+
})
|
|
307
|
+
.collect();
|
|
308
|
+
|
|
309
|
+
columns.sort_by_key(|c| c.index);
|
|
310
|
+
|
|
311
|
+
Ok(columns.into_iter().map(|c| c.value).collect())
|
|
312
|
+
})
|
|
313
|
+
.collect()
|
|
219
314
|
}
|
|
220
315
|
|
|
221
316
|
fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
|
|
222
|
-
|
|
223
|
-
|
|
317
|
+
let cleaned = s
|
|
318
|
+
.chars()
|
|
319
|
+
.map(|c| match c {
|
|
320
|
+
'\n' => ' ',
|
|
321
|
+
'\r' | '"' => '\0',
|
|
322
|
+
_ => c,
|
|
323
|
+
})
|
|
324
|
+
.filter(|&c| c != '\0')
|
|
325
|
+
.collect::<String>();
|
|
326
|
+
|
|
327
|
+
if s.contains(',') {
|
|
328
|
+
write!(dest, "{:?}", cleaned.trim_end())
|
|
224
329
|
} else {
|
|
225
|
-
write!(dest, "{}",
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
fn clean_strings(s: &str) -> String {
|
|
230
|
-
s.replace("\n", " ")
|
|
231
|
-
.replace("\r", "")
|
|
232
|
-
.replace("\"", "")
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
struct XlsMandatoryColumn<'a> {
|
|
236
|
-
value: &'a Data,
|
|
237
|
-
index: usize,
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
impl<'a> XlsMandatoryColumn<'a> {
|
|
241
|
-
fn new(value: &'a Data, index: usize) -> Self {
|
|
242
|
-
XlsMandatoryColumn { value, index }
|
|
330
|
+
write!(dest, "{}", cleaned.trim_end())
|
|
243
331
|
}
|
|
244
332
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: patchwork_csv_utils
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.24
|
|
5
5
|
platform: x86_64-darwin
|
|
6
6
|
authors:
|
|
7
7
|
- kingsley.hendrickse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-11-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
|
14
14
|
email:
|
|
@@ -32,6 +32,11 @@ files:
|
|
|
32
32
|
- ext/csv_utils/src/utils/csv.rs
|
|
33
33
|
- ext/csv_utils/src/utils/dedup.rs
|
|
34
34
|
- ext/csv_utils/src/utils/mod.rs
|
|
35
|
+
- ext/csv_utils/src/utils/shared/datetime.rs
|
|
36
|
+
- ext/csv_utils/src/utils/shared/filters.rs
|
|
37
|
+
- ext/csv_utils/src/utils/shared/mod.rs
|
|
38
|
+
- ext/csv_utils/src/utils/shared/types.rs
|
|
39
|
+
- ext/csv_utils/src/utils/shared/validation.rs
|
|
35
40
|
- ext/csv_utils/src/utils/xls.rs
|
|
36
41
|
- lib/csv_utils.rb
|
|
37
42
|
- lib/csv_utils/2.7/csv_utils.bundle
|
|
@@ -63,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
63
68
|
- !ruby/object:Gem::Version
|
|
64
69
|
version: '0'
|
|
65
70
|
requirements: []
|
|
66
|
-
rubygems_version: 3.
|
|
71
|
+
rubygems_version: 3.5.23
|
|
67
72
|
signing_key:
|
|
68
73
|
specification_version: 4
|
|
69
74
|
summary: Fast CSV utils
|