patchwork_csv_utils 0.1.23-x86_64-darwin → 0.1.24-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +238 -278
- data/Gemfile +1 -1
- data/Gemfile.lock +5 -3
- data/ext/csv_utils/Cargo.toml +5 -4
- data/ext/csv_utils/src/lib.rs +3 -3
- data/ext/csv_utils/src/utils/csv.rs +160 -173
- data/ext/csv_utils/src/utils/dedup.rs +102 -67
- data/ext/csv_utils/src/utils/mod.rs +68 -21
- data/ext/csv_utils/src/utils/shared/datetime.rs +79 -0
- data/ext/csv_utils/src/utils/shared/filters.rs +130 -0
- data/ext/csv_utils/src/utils/shared/mod.rs +4 -0
- data/ext/csv_utils/src/utils/shared/types.rs +97 -0
- data/ext/csv_utils/src/utils/shared/validation.rs +34 -0
- data/ext/csv_utils/src/utils/xls.rs +272 -211
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +8 -3
|
@@ -2,270 +2,331 @@ use std::collections::HashMap;
|
|
|
2
2
|
use std::fs::File;
|
|
3
3
|
use std::io::{BufWriter, Write};
|
|
4
4
|
|
|
5
|
-
use calamine::{open_workbook_auto, Data,
|
|
5
|
+
use calamine::{open_workbook_auto, Data, ExcelDateTime, Range, Reader};
|
|
6
6
|
use chrono::{NaiveDateTime, Timelike, Utc};
|
|
7
7
|
use magnus::{RArray, Ruby};
|
|
8
8
|
|
|
9
|
-
use crate::utils::
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
9
|
+
use crate::utils::shared::datetime::DateTimeProcessor;
|
|
10
|
+
use crate::utils::shared::filters::RowFilters;
|
|
11
|
+
use crate::utils::shared::types::{HeaderConfig, MandatoryColumn, ProcessingConfig};
|
|
12
|
+
use crate::utils::shared::validation::TrustValidator;
|
|
13
|
+
use crate::utils::{
|
|
14
|
+
check_mandatory_headers, index_of_header_in_mandatory_list, magnus_err, missing_header,
|
|
15
|
+
missing_value, FileExtension,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
#[allow(clippy::too_many_arguments)]
|
|
19
|
+
pub fn to_csv(
|
|
20
|
+
ruby: &Ruby,
|
|
21
|
+
xls_path: String,
|
|
22
|
+
target_path: String,
|
|
23
|
+
exclusions: RArray,
|
|
24
|
+
mandatory_headers: RArray,
|
|
25
|
+
status_exclusions: RArray,
|
|
26
|
+
expected_trust_name: String,
|
|
27
|
+
is_streamed_file: bool,
|
|
28
|
+
earliest_start_date: Option<String>,
|
|
19
29
|
) -> magnus::error::Result<()> {
|
|
20
|
-
if !xls_path.has_extension(&["xls","xlsx"]) {
|
|
21
|
-
return Err(magnus::Error::new(
|
|
30
|
+
if !xls_path.has_extension(&["xls", "xlsx"]) {
|
|
31
|
+
return Err(magnus::Error::new(
|
|
32
|
+
ruby.exception_standard_error(),
|
|
33
|
+
"xls_path must be an xls or xlsx file".to_string(),
|
|
34
|
+
));
|
|
22
35
|
}
|
|
23
36
|
|
|
24
|
-
let
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
37
|
+
let config = ProcessingConfig::from_ruby(
|
|
38
|
+
exclusions,
|
|
39
|
+
mandatory_headers,
|
|
40
|
+
status_exclusions,
|
|
41
|
+
expected_trust_name,
|
|
42
|
+
is_streamed_file,
|
|
43
|
+
earliest_start_date,
|
|
44
|
+
)?;
|
|
45
|
+
|
|
46
|
+
let mut workbook = open_workbook_auto(&xls_path).map_err(|e| {
|
|
47
|
+
magnus_err(
|
|
48
|
+
ruby,
|
|
49
|
+
e,
|
|
50
|
+
format!("could not open workbook: {}", xls_path).as_str(),
|
|
51
|
+
)
|
|
52
|
+
})?;
|
|
53
|
+
|
|
54
|
+
let range = workbook
|
|
55
|
+
.worksheet_range_at(0)
|
|
56
|
+
.ok_or_else(|| {
|
|
57
|
+
magnus::Error::new(
|
|
58
|
+
ruby.exception_standard_error(),
|
|
59
|
+
"no worksheet found in xls".to_string(),
|
|
60
|
+
)
|
|
61
|
+
})
|
|
36
62
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
|
37
63
|
|
|
38
|
-
let headers = range.headers().
|
|
64
|
+
let headers = range.headers().ok_or_else(|| {
|
|
65
|
+
magnus::Error::new(
|
|
66
|
+
ruby.exception_standard_error(),
|
|
67
|
+
"no headers found in xls".to_string(),
|
|
68
|
+
)
|
|
69
|
+
})?;
|
|
39
70
|
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
|
40
71
|
|
|
41
72
|
if let Some(value) =
|
|
42
|
-
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv")
|
|
73
|
+
check_mandatory_headers(ruby, &headers_list, &config.mandatory_headers, "csv")
|
|
74
|
+
{
|
|
75
|
+
return value;
|
|
76
|
+
}
|
|
43
77
|
|
|
44
|
-
let
|
|
45
|
-
|
|
78
|
+
let csv_out_file = File::create(&target_path).map_err(|e| {
|
|
79
|
+
magnus_err(
|
|
80
|
+
ruby,
|
|
81
|
+
e,
|
|
82
|
+
format!("could not create csv file: {}", target_path).as_str(),
|
|
83
|
+
)
|
|
84
|
+
})?;
|
|
46
85
|
let mut dest = BufWriter::new(csv_out_file);
|
|
47
86
|
|
|
48
|
-
write_csv(ruby, &mut dest, &range,
|
|
87
|
+
write_csv(ruby, &mut dest, &range, config, headers_list)
|
|
49
88
|
}
|
|
50
89
|
|
|
51
|
-
fn write_csv<W: Write>(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
let
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
let
|
|
67
|
-
let
|
|
68
|
-
|
|
69
|
-
|
|
90
|
+
fn write_csv<W: Write>(
|
|
91
|
+
ruby: &Ruby,
|
|
92
|
+
dest: &mut W,
|
|
93
|
+
range: &Range<Data>,
|
|
94
|
+
config: ProcessingConfig,
|
|
95
|
+
headers_list: Vec<String>,
|
|
96
|
+
) -> magnus::error::Result<()> {
|
|
97
|
+
let n = config.mandatory_headers.len() - 1;
|
|
98
|
+
let header_map: HashMap<String, usize> = config
|
|
99
|
+
.mandatory_headers
|
|
100
|
+
.iter()
|
|
101
|
+
.enumerate()
|
|
102
|
+
.map(|(i, h)| (h.to_string(), i))
|
|
103
|
+
.collect();
|
|
104
|
+
|
|
105
|
+
let header_config = HeaderConfig::from_header_map(&header_map, ruby)?;
|
|
106
|
+
let filters = RowFilters::new(
|
|
107
|
+
config.exclusions,
|
|
108
|
+
config.status_exclusions,
|
|
109
|
+
config.earliest_start_date,
|
|
110
|
+
);
|
|
111
|
+
let trust_validator = TrustValidator::new(config.expected_trust_name, config.is_streamed_file);
|
|
112
|
+
|
|
113
|
+
let mandatory_rows =
|
|
114
|
+
get_mandatory_records(ruby, range, &headers_list, &config.mandatory_headers)?;
|
|
70
115
|
|
|
71
116
|
for (ri, r) in mandatory_rows.into_iter().enumerate() {
|
|
72
|
-
|
|
117
|
+
if filters.should_skip(
|
|
118
|
+
&r,
|
|
119
|
+
header_config.request_id,
|
|
120
|
+
header_config.status,
|
|
121
|
+
header_config.date,
|
|
122
|
+
) {
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
73
125
|
|
|
74
|
-
if
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
126
|
+
if r.get(header_config.date)
|
|
127
|
+
.is_none_or(|d| matches!(d, &Data::Empty))
|
|
128
|
+
{
|
|
129
|
+
return Err(magnus::Error::new(
|
|
130
|
+
ruby.exception_standard_error(),
|
|
131
|
+
format!("Date value is not present in row: {}", ri),
|
|
132
|
+
));
|
|
81
133
|
}
|
|
82
134
|
|
|
83
|
-
if
|
|
84
|
-
|
|
135
|
+
if ri > 0 {
|
|
136
|
+
if let Some(trust_data) = r.get(header_config.trust_name) {
|
|
137
|
+
trust_validator.validate(ruby, &trust_data.to_string())?;
|
|
138
|
+
}
|
|
85
139
|
}
|
|
86
140
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
141
|
+
let mut date_value = Utc::now().naive_utc();
|
|
142
|
+
|
|
143
|
+
for (i, c) in config.mandatory_headers.iter().enumerate() {
|
|
144
|
+
let column_index = *header_map.get(c).ok_or_else(|| missing_header(ruby, c))?;
|
|
145
|
+
let c = r.get(column_index).ok_or_else(|| missing_value(ruby, c))?;
|
|
90
146
|
|
|
91
147
|
match *c {
|
|
92
148
|
Data::Empty => Ok(()),
|
|
93
|
-
Data::String(ref s) | Data::DurationIso(ref s) =>
|
|
94
|
-
handle_commas(dest, s)
|
|
95
|
-
}
|
|
149
|
+
Data::String(ref s) | Data::DurationIso(ref s) => handle_commas(dest, s),
|
|
96
150
|
Data::Float(ref f) => write!(dest, "{}", f),
|
|
97
151
|
Data::DateTimeIso(ref s) => {
|
|
98
|
-
|
|
99
|
-
let normalized_s = if s.contains('.') {
|
|
100
|
-
let parts: Vec<&str> = s.split('.').collect();
|
|
101
|
-
format!("{}.{}", parts[0], &parts[1][..std::cmp::min(parts[1].len(), 6)]) // Keep up to 6 fractional seconds
|
|
102
|
-
} else {
|
|
103
|
-
s.to_string()
|
|
104
|
-
};
|
|
105
|
-
|
|
106
|
-
// Attempt to parse the normalized string as a full datetime
|
|
107
|
-
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
|
108
|
-
.or_else(|_| {
|
|
109
|
-
// If parsing as datetime fails, try parsing as date-only
|
|
110
|
-
NaiveDateTime::parse_from_str(&format!("{}T00:00:00", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
|
111
|
-
})
|
|
112
|
-
.or_else(|_| {
|
|
113
|
-
// If parsing as time-only fails, try parsing as time-only
|
|
114
|
-
NaiveDateTime::parse_from_str(&format!("1970-01-01T{}", normalized_s), "%Y-%m-%dT%H:%M:%S%.f")
|
|
115
|
-
})
|
|
116
|
-
.map_err(|_| to_datetime_error(ruby, s, ri, "Date or Time"))?;
|
|
117
|
-
|
|
118
|
-
// Apply the same logic as for Data::DateTime
|
|
119
|
-
if i == *date {
|
|
120
|
-
date_value = current;
|
|
121
|
-
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
|
122
|
-
current = transform_time_to_datetime(date_value, current);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// Round up to the next second if we have any fractional seconds
|
|
126
|
-
let adjusted_time = if current.nanosecond() > 0 {
|
|
127
|
-
current + chrono::Duration::seconds(1) - chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
|
128
|
-
} else {
|
|
129
|
-
current
|
|
130
|
-
};
|
|
131
|
-
|
|
132
|
-
// Format the output to ensure consistent precision
|
|
133
|
-
let formatted_output = adjusted_time.format("%Y-%m-%d %H:%M:%S").to_string();
|
|
134
|
-
write!(dest, "{}", formatted_output)
|
|
152
|
+
handle_datetime_iso(ruby, dest, s, ri, i, &header_config, &mut date_value)
|
|
135
153
|
}
|
|
136
154
|
Data::DateTime(ref d) => {
|
|
137
|
-
|
|
138
|
-
if i == *date {
|
|
139
|
-
date_value = current;
|
|
140
|
-
} else if i == *start || i == *end || i == *actual_start || i == *actual_end {
|
|
141
|
-
current = transform_time_to_datetime(date_value, current);
|
|
142
|
-
}
|
|
143
|
-
write!(dest, "{}", current)
|
|
155
|
+
handle_datetime(dest, *d, ri, i, &header_config, &mut date_value)
|
|
144
156
|
}
|
|
145
157
|
Data::Int(ref i) => write!(dest, "{}", i),
|
|
146
158
|
Data::Error(ref e) => write!(dest, "{:?}", e),
|
|
147
159
|
Data::Bool(ref b) => write!(dest, "{}", b),
|
|
148
|
-
}.map_err(|e| magnus_err(ruby, e, format!("error writing xls row: {}, column: {}", ri, i).as_str()))?;
|
|
149
|
-
if i != n {
|
|
150
|
-
write!(dest, ",").map_err(|e| magnus_err(ruby, e, format!("error writing csv comma for row: {}, column: {}", ri, i).as_str()))?;
|
|
151
160
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
}
|
|
161
|
+
.map_err(|e| {
|
|
162
|
+
magnus_err(
|
|
163
|
+
ruby,
|
|
164
|
+
e,
|
|
165
|
+
format!("error writing xls row: {}, column: {}", ri, i).as_str(),
|
|
166
|
+
)
|
|
167
|
+
})?;
|
|
157
168
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
169
|
+
if i != n {
|
|
170
|
+
write!(dest, ",").map_err(|e| {
|
|
171
|
+
magnus_err(
|
|
172
|
+
ruby,
|
|
173
|
+
e,
|
|
174
|
+
format!("error writing csv comma for row: {}, column: {}", ri, i).as_str(),
|
|
175
|
+
)
|
|
176
|
+
})?;
|
|
177
|
+
}
|
|
164
178
|
}
|
|
179
|
+
write!(dest, "\r\n").map_err(|e| {
|
|
180
|
+
magnus_err(
|
|
181
|
+
ruby,
|
|
182
|
+
e,
|
|
183
|
+
format!("error writing end of line for row: {}", ri).as_str(),
|
|
184
|
+
)
|
|
185
|
+
})?;
|
|
165
186
|
}
|
|
166
187
|
Ok(())
|
|
167
188
|
}
|
|
168
189
|
|
|
169
|
-
fn
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
190
|
+
fn handle_datetime_iso<W: Write>(
|
|
191
|
+
_ruby: &Ruby,
|
|
192
|
+
dest: &mut W,
|
|
193
|
+
s: &str,
|
|
194
|
+
ri: usize,
|
|
195
|
+
i: usize,
|
|
196
|
+
header_config: &HeaderConfig,
|
|
197
|
+
date_value: &mut NaiveDateTime,
|
|
198
|
+
) -> std::io::Result<()> {
|
|
199
|
+
let normalized_s = if s.contains('.') {
|
|
200
|
+
let parts: Vec<&str> = s.split('.').collect();
|
|
201
|
+
format!(
|
|
202
|
+
"{}.{}",
|
|
203
|
+
parts[0],
|
|
204
|
+
&parts[1][..std::cmp::min(parts[1].len(), 6)]
|
|
205
|
+
)
|
|
206
|
+
} else {
|
|
207
|
+
s.to_string()
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
let mut current = NaiveDateTime::parse_from_str(&normalized_s, "%Y-%m-%dT%H:%M:%S%.f")
|
|
211
|
+
.or_else(|_| {
|
|
212
|
+
NaiveDateTime::parse_from_str(
|
|
213
|
+
&format!("{}T00:00:00", normalized_s),
|
|
214
|
+
"%Y-%m-%dT%H:%M:%S%.f",
|
|
215
|
+
)
|
|
216
|
+
})
|
|
217
|
+
.or_else(|_| {
|
|
218
|
+
NaiveDateTime::parse_from_str(
|
|
219
|
+
&format!("1970-01-01T{}", normalized_s),
|
|
220
|
+
"%Y-%m-%dT%H:%M:%S%.f",
|
|
221
|
+
)
|
|
222
|
+
})
|
|
223
|
+
.map_err(|_| {
|
|
224
|
+
std::io::Error::new(
|
|
225
|
+
std::io::ErrorKind::InvalidData,
|
|
226
|
+
format!(
|
|
227
|
+
"Could not parse datetime '{}', row: {}, col: Date or Time",
|
|
228
|
+
s, ri
|
|
229
|
+
),
|
|
230
|
+
)
|
|
231
|
+
})?;
|
|
232
|
+
|
|
233
|
+
if header_config.is_date_column(i) {
|
|
234
|
+
*date_value = current;
|
|
235
|
+
} else if header_config.is_time_column(i) {
|
|
236
|
+
current = DateTimeProcessor::new(*date_value).combine_datetime_parts(current);
|
|
185
237
|
}
|
|
186
238
|
|
|
187
|
-
|
|
188
|
-
|
|
239
|
+
let adjusted_time = if current.nanosecond() > 0 {
|
|
240
|
+
current + chrono::Duration::seconds(1)
|
|
241
|
+
- chrono::Duration::nanoseconds(current.nanosecond() as i64)
|
|
242
|
+
} else {
|
|
243
|
+
current
|
|
244
|
+
};
|
|
189
245
|
|
|
190
|
-
|
|
191
|
-
r[*date] == &Data::Empty
|
|
246
|
+
write!(dest, "{}", adjusted_time.format("%Y-%m-%d %H:%M:%S"))
|
|
192
247
|
}
|
|
193
248
|
|
|
194
|
-
fn
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
249
|
+
fn handle_datetime<W: Write>(
|
|
250
|
+
dest: &mut W,
|
|
251
|
+
d: ExcelDateTime,
|
|
252
|
+
_ri: usize,
|
|
253
|
+
i: usize,
|
|
254
|
+
header_config: &HeaderConfig,
|
|
255
|
+
date_value: &mut NaiveDateTime,
|
|
256
|
+
) -> std::io::Result<()> {
|
|
257
|
+
let mut current = d.as_datetime().ok_or_else(|| {
|
|
258
|
+
std::io::Error::new(
|
|
259
|
+
std::io::ErrorKind::InvalidData,
|
|
260
|
+
format!(
|
|
261
|
+
"Could not parse datetime '{:?}', row: {}, col: Date",
|
|
262
|
+
d, _ri
|
|
263
|
+
),
|
|
264
|
+
)
|
|
265
|
+
})?;
|
|
266
|
+
|
|
267
|
+
if header_config.is_date_column(i) {
|
|
268
|
+
*date_value = current;
|
|
269
|
+
} else if header_config.is_time_column(i) {
|
|
270
|
+
current = DateTimeProcessor::new(*date_value).combine_datetime_parts(current);
|
|
203
271
|
}
|
|
204
272
|
|
|
205
|
-
|
|
206
|
-
exclusions.contains(&value.to_string())
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
fn skip_excluded_status_rows(status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
|
210
|
-
status
|
|
211
|
-
.map(|index| exclusions.contains(&r[*index].to_string()))
|
|
212
|
-
.unwrap_or(false)
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
|
216
|
-
r.into_iter().all(|c| c == &&Data::Empty)
|
|
273
|
+
write!(dest, "{}", current)
|
|
217
274
|
}
|
|
218
275
|
|
|
219
|
-
fn
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
276
|
+
fn get_mandatory_records<'a>(
|
|
277
|
+
_ruby: &Ruby,
|
|
278
|
+
range: &'a Range<Data>,
|
|
279
|
+
csv_header_list: &[String],
|
|
280
|
+
mandatory_headers_list: &[String],
|
|
281
|
+
) -> magnus::error::Result<Vec<Vec<&'a Data>>> {
|
|
282
|
+
let inverse_header_map: HashMap<usize, String> = csv_header_list
|
|
283
|
+
.iter()
|
|
284
|
+
.enumerate()
|
|
285
|
+
.map(|(i, h)| (i, h.to_string()))
|
|
286
|
+
.collect();
|
|
287
|
+
|
|
288
|
+
range
|
|
289
|
+
.rows()
|
|
290
|
+
.map(|row| {
|
|
291
|
+
let mut columns: Vec<MandatoryColumn<&Data>> = row
|
|
292
|
+
.iter()
|
|
293
|
+
.enumerate()
|
|
294
|
+
.filter_map(|(i, column_value)| {
|
|
295
|
+
inverse_header_map.get(&i).and_then(|column_name| {
|
|
296
|
+
if mandatory_headers_list.contains(column_name) {
|
|
297
|
+
index_of_header_in_mandatory_list(
|
|
298
|
+
mandatory_headers_list.to_vec(),
|
|
299
|
+
column_name.to_string(),
|
|
300
|
+
)
|
|
301
|
+
.map(|index| MandatoryColumn::new(column_value, index))
|
|
302
|
+
} else {
|
|
303
|
+
None
|
|
304
|
+
}
|
|
305
|
+
})
|
|
306
|
+
})
|
|
307
|
+
.collect();
|
|
308
|
+
|
|
309
|
+
columns.sort_by_key(|c| c.index);
|
|
310
|
+
|
|
311
|
+
Ok(columns.into_iter().map(|c| c.value).collect())
|
|
312
|
+
})
|
|
313
|
+
.collect()
|
|
246
314
|
}
|
|
247
315
|
|
|
248
316
|
fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
|
|
249
|
-
|
|
250
|
-
|
|
317
|
+
let cleaned = s
|
|
318
|
+
.chars()
|
|
319
|
+
.map(|c| match c {
|
|
320
|
+
'\n' => ' ',
|
|
321
|
+
'\r' | '"' => '\0',
|
|
322
|
+
_ => c,
|
|
323
|
+
})
|
|
324
|
+
.filter(|&c| c != '\0')
|
|
325
|
+
.collect::<String>();
|
|
326
|
+
|
|
327
|
+
if s.contains(',') {
|
|
328
|
+
write!(dest, "{:?}", cleaned.trim_end())
|
|
251
329
|
} else {
|
|
252
|
-
write!(dest, "{}",
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
fn clean_strings(s: &str) -> String {
|
|
257
|
-
s.replace("\n", " ")
|
|
258
|
-
.replace("\r", "")
|
|
259
|
-
.replace("\"", "")
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
struct XlsMandatoryColumn<'a> {
|
|
263
|
-
value: &'a Data,
|
|
264
|
-
index: usize,
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
impl<'a> XlsMandatoryColumn<'a> {
|
|
268
|
-
fn new(value: &'a Data, index: usize) -> Self {
|
|
269
|
-
XlsMandatoryColumn { value, index }
|
|
330
|
+
write!(dest, "{}", cleaned.trim_end())
|
|
270
331
|
}
|
|
271
332
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: patchwork_csv_utils
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.24
|
|
5
5
|
platform: x86_64-darwin
|
|
6
6
|
authors:
|
|
7
7
|
- kingsley.hendrickse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-11-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
|
14
14
|
email:
|
|
@@ -32,6 +32,11 @@ files:
|
|
|
32
32
|
- ext/csv_utils/src/utils/csv.rs
|
|
33
33
|
- ext/csv_utils/src/utils/dedup.rs
|
|
34
34
|
- ext/csv_utils/src/utils/mod.rs
|
|
35
|
+
- ext/csv_utils/src/utils/shared/datetime.rs
|
|
36
|
+
- ext/csv_utils/src/utils/shared/filters.rs
|
|
37
|
+
- ext/csv_utils/src/utils/shared/mod.rs
|
|
38
|
+
- ext/csv_utils/src/utils/shared/types.rs
|
|
39
|
+
- ext/csv_utils/src/utils/shared/validation.rs
|
|
35
40
|
- ext/csv_utils/src/utils/xls.rs
|
|
36
41
|
- lib/csv_utils.rb
|
|
37
42
|
- lib/csv_utils/2.7/csv_utils.bundle
|
|
@@ -63,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
63
68
|
- !ruby/object:Gem::Version
|
|
64
69
|
version: '0'
|
|
65
70
|
requirements: []
|
|
66
|
-
rubygems_version: 3.
|
|
71
|
+
rubygems_version: 3.5.23
|
|
67
72
|
signing_key:
|
|
68
73
|
specification_version: 4
|
|
69
74
|
summary: Fast CSV utils
|