patchwork_csv_utils 0.1.13-arm64-darwin → 0.1.15-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/ext/csv_utils/src/lib.rs +2 -2
- data/ext/csv_utils/src/utils/csv.rs +12 -2
- data/ext/csv_utils/src/utils/xls.rs +15 -6
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 14f80b793a82a72259a5109f10fe4819209d5678b0dfb9e9625f1dcbe369032c
|
4
|
+
data.tar.gz: 88eb4c82a01f9aefed47a4004b0ee3aab5c06c797f13037782379d2652aabb97
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab3d39f423189c11331d585572f6a574d2e77a6b66f67172c4ab82b1c0185b4a251a1cee58e4cbbee361d5a8651a9bf08b7a7d9867cc1e45f47d2e06cd1a8a3c
|
7
|
+
data.tar.gz: b20cfb0ac285189a58f879b4ded074bac5886c780afb7bef983809e8689e9ee24d13f923ec633094e150be528d84702ba3585e28f8951974b4495727c6efb4d7
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -14,8 +14,8 @@ gem install patchwork_csv_utils
|
|
14
14
|
```irb
|
15
15
|
require 'csv_utils'
|
16
16
|
CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv', ['mandatory_headers'])
|
17
|
-
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
18
|
-
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'])
|
17
|
+
CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'], ['status_exclusions'])
|
18
|
+
CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip'], ['mandatory_headers'], ['status_exclusions'])
|
19
19
|
```
|
20
20
|
|
21
21
|
## Release
|
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -9,7 +9,7 @@ pub mod utils;
|
|
9
9
|
fn init() -> Result<(), magnus::Error> {
|
10
10
|
let module = define_module("CsvUtils")?;
|
11
11
|
module.define_singleton_method("dedup", function!(dedup, 4))?;
|
12
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
13
|
-
module.define_singleton_method("transform_csv", function!(transform_csv,
|
12
|
+
module.define_singleton_method("to_csv", function!(to_csv, 5))?;
|
13
|
+
module.define_singleton_method("transform_csv", function!(transform_csv, 5))?;
|
14
14
|
Ok(())
|
15
15
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use std::collections::HashMap;
|
2
2
|
use std::fs::File;
|
3
|
-
|
3
|
+
use calamine::Data;
|
4
4
|
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
5
5
|
use csv::{Reader, StringRecord, Writer};
|
6
6
|
use magnus::{Error, RArray, Ruby};
|
@@ -9,12 +9,14 @@ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error,
|
|
9
9
|
|
10
10
|
pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
11
11
|
target_path: String, exclusions: RArray,
|
12
|
-
mandatory_headers: RArray,
|
12
|
+
mandatory_headers: RArray,
|
13
|
+
status_exclusions: RArray) -> magnus::error::Result<()> {
|
13
14
|
if !csv_path.has_extension(&["csv"]) {
|
14
15
|
return Err(Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
|
15
16
|
}
|
16
17
|
|
17
18
|
let exclusions = RArray::to_vec(exclusions)?;
|
19
|
+
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
18
20
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
19
21
|
|
20
22
|
let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
|
@@ -38,12 +40,14 @@ pub fn transform_csv(ruby: &Ruby, csv_path: String,
|
|
38
40
|
let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
|
39
41
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
40
42
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
43
|
+
let status = header_map.get("Status");
|
41
44
|
|
42
45
|
let mandatory_records = get_mandatory_records(&ruby, &mut csv, &headers_list, &mandatory_headers)?;
|
43
46
|
|
44
47
|
for (ri, record) in mandatory_records.iter().enumerate() {
|
45
48
|
|
46
49
|
if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
|
50
|
+
if skip_excluded_status_rows(&status, &record, &status_exclusions) { continue; }
|
47
51
|
if has_empty_row_skip(&record) { continue; }
|
48
52
|
if has_empty_first_col_skip_row(&record) { continue; }
|
49
53
|
|
@@ -124,6 +128,12 @@ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<Str
|
|
124
128
|
exclusions.contains(&value.to_string())
|
125
129
|
}
|
126
130
|
|
131
|
+
fn skip_excluded_status_rows(status: &Option<&usize>, r: &StringRecord, exclusions: &Vec<String>) -> bool {
|
132
|
+
status
|
133
|
+
.map(|index| exclusions.contains(&r[*index].to_string()))
|
134
|
+
.unwrap_or(false)
|
135
|
+
}
|
136
|
+
|
127
137
|
fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
|
128
138
|
let maybe_correct = correct_datetime(s);
|
129
139
|
if maybe_correct.is_some() { return maybe_correct; }
|
@@ -11,7 +11,8 @@ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error,
|
|
11
11
|
pub fn to_csv(ruby: &Ruby, xls_path: String,
|
12
12
|
target_path: String,
|
13
13
|
exclusions: RArray,
|
14
|
-
mandatory_headers: RArray
|
14
|
+
mandatory_headers: RArray,
|
15
|
+
status_exclusions: RArray,
|
15
16
|
) -> magnus::error::Result<()> {
|
16
17
|
if !xls_path.has_extension(&["xls"]) {
|
17
18
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
@@ -19,6 +20,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
19
20
|
|
20
21
|
let exclusions = RArray::to_vec(exclusions)?;
|
21
22
|
let mandatory_headers: Vec<String> = RArray::to_vec(mandatory_headers)?;
|
23
|
+
let status_exclusions = RArray::to_vec(status_exclusions)?;
|
22
24
|
|
23
25
|
let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
|
24
26
|
let range = workbook.worksheet_range_at(0)
|
@@ -26,7 +28,7 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
26
28
|
.and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
|
27
29
|
|
28
30
|
let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
|
29
|
-
let headers_list
|
31
|
+
let headers_list: Vec<String> = headers.iter().map(|h| h.to_string()).collect();
|
30
32
|
|
31
33
|
if let Some(value) =
|
32
34
|
check_mandatory_headers(ruby, &headers_list, &mandatory_headers, "csv") { return value; }
|
@@ -35,13 +37,14 @@ pub fn to_csv(ruby: &Ruby, xls_path: String,
|
|
35
37
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
36
38
|
let mut dest = BufWriter::new(csv_out_file);
|
37
39
|
|
38
|
-
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list)
|
40
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions, mandatory_headers, headers_list, status_exclusions)
|
39
41
|
}
|
40
42
|
|
41
43
|
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
42
44
|
header_map: HashMap<String, usize>, exclusions: Vec<String>,
|
43
45
|
mandatory_headers: Vec<String>,
|
44
|
-
headers_list: Vec<String
|
46
|
+
headers_list: Vec<String>,
|
47
|
+
status_exclusions: Vec<String>) -> magnus::error::Result<()> {
|
45
48
|
let n = mandatory_headers.len() - 1;
|
46
49
|
let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
|
47
50
|
let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
|
@@ -49,6 +52,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
49
52
|
let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
|
50
53
|
let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
|
51
54
|
let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
|
55
|
+
let status = header_map.get("Status");
|
52
56
|
|
53
57
|
let mandatory_rows = get_mandatory_records(ruby, range, &headers_list, &mandatory_headers)?;
|
54
58
|
|
@@ -56,6 +60,7 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
56
60
|
let mut date_value = Utc::now().naive_utc();
|
57
61
|
|
58
62
|
if skip_excluded_rows(&request_id, &r, &exclusions) { continue; }
|
63
|
+
if skip_excluded_status_rows(&status, &r, &status_exclusions) { continue; }
|
59
64
|
if skip_empty_rows(&r) { continue; }
|
60
65
|
if skip_rows_with_no_request_id(&request_id, &r) { continue; }
|
61
66
|
if date_value_is_not_present(&date, &r) {
|
@@ -63,7 +68,6 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>,
|
|
63
68
|
}
|
64
69
|
|
65
70
|
for (i, c) in mandatory_headers.iter().enumerate() {
|
66
|
-
|
67
71
|
let column_index = header_map.get(c).ok_or(missing_header(ruby, c))?;
|
68
72
|
let c = r.get(*column_index).ok_or(missing_value(ruby, c))?;
|
69
73
|
|
@@ -125,6 +129,12 @@ fn skip_excluded_rows(request_id: &usize, r: &Vec<&Data>, exclusions: &Vec<Strin
|
|
125
129
|
exclusions.contains(&value.to_string())
|
126
130
|
}
|
127
131
|
|
132
|
+
fn skip_excluded_status_rows(status: &Option<&usize>, r: &Vec<&Data>, exclusions: &Vec<String>) -> bool {
|
133
|
+
status
|
134
|
+
.map(|index| exclusions.contains(&r[*index].to_string()))
|
135
|
+
.unwrap_or(false)
|
136
|
+
}
|
137
|
+
|
128
138
|
fn skip_empty_rows(r: &Vec<&Data>) -> bool {
|
129
139
|
r.into_iter().all(|c| c == &&Data::Empty)
|
130
140
|
}
|
@@ -149,7 +159,6 @@ fn clean_strings(s: &str) -> String {
|
|
149
159
|
s.replace("\n", " ")
|
150
160
|
.replace("\r", " ")
|
151
161
|
.replace("\"", "")
|
152
|
-
.replace("'", "")
|
153
162
|
}
|
154
163
|
|
155
164
|
struct XlsMandatoryColumn<'a> {
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|