patchwork_csv_utils 0.1.4-x86_64-darwin → 0.1.6-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/lib.rs +1 -1
- data/ext/csv_utils/src/utils/dedup.rs +22 -0
- data/ext/csv_utils/src/utils/xls.rs +21 -4
- data/lib/csv_utils/2.7/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.0/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.1/csv_utils.bundle +0 -0
- data/lib/csv_utils/3.2/csv_utils.bundle +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc9127c25a2fb4b4a0f99bd3730a4a82975fb249637f4c69ff31e0f5a68288de
|
4
|
+
data.tar.gz: 0b493f77942386540b3fc244fdfcdc853ce8dac8ae41df3fa37c06bff1777d50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23a5632d5cfbaf9eca4397bff6199a34cd52c902a07640e490633260c94c122a8b5fc567c09ad3264e03eb31c5e5db2d676b1efe66ec322d7a6a3ddb4e7017f5
|
7
|
+
data.tar.gz: 656ca8052002ab45dadad145b141de4255ec72a36c9c4d975adfd1e9071e7ac3675647837492b0261e6385fc5159af3e02cf9af92aa1551fdd25efc05bd02589
|
data/Gemfile.lock
CHANGED
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -8,6 +8,6 @@ pub mod utils;
|
|
8
8
|
fn init() -> Result<(), magnus::Error> {
|
9
9
|
let module = define_module("CsvUtils")?;
|
10
10
|
module.define_singleton_method("dedup", function!(dedup, 3))?;
|
11
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
11
|
+
module.define_singleton_method("to_csv", function!(to_csv, 3))?;
|
12
12
|
Ok(())
|
13
13
|
}
|
@@ -33,12 +33,20 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
|
|
33
33
|
let mut previous_records = vec![];
|
34
34
|
for previous_record in previous_csv.records() {
|
35
35
|
let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
|
36
|
+
|
37
|
+
if has_empty_row_skip(&previous_record) { continue; }
|
38
|
+
if has_empty_first_col_skip_row(&previous_record) { continue; }
|
39
|
+
|
36
40
|
let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
37
41
|
previous_records.push(previous_record)
|
38
42
|
}
|
39
43
|
|
40
44
|
for new_record in new_csv.records() {
|
41
45
|
let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
|
46
|
+
|
47
|
+
if has_empty_row_skip(&new_record) { continue; }
|
48
|
+
if has_empty_first_col_skip_row(&new_record) { continue; }
|
49
|
+
|
42
50
|
let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
43
51
|
if !previous_records.contains(&new_record) {
|
44
52
|
wtr.write_byte_record(new_record.as_byte_record()).unwrap();
|
@@ -50,4 +58,18 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
|
|
50
58
|
Ok(())
|
51
59
|
}
|
52
60
|
|
61
|
+
fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
|
62
|
+
if previous_record[0].is_empty() {
|
63
|
+
return true;
|
64
|
+
}
|
65
|
+
false
|
66
|
+
}
|
67
|
+
|
68
|
+
fn has_empty_row_skip(record: &StringRecord) -> bool {
|
69
|
+
if record.iter().all(|r| r.is_empty()) {
|
70
|
+
return true;
|
71
|
+
}
|
72
|
+
false
|
73
|
+
}
|
74
|
+
|
53
75
|
|
@@ -4,11 +4,17 @@ use std::io::{BufWriter, Write};
|
|
4
4
|
|
5
5
|
use calamine::{Data, open_workbook, Range, Reader, Xls};
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
|
-
use magnus::Ruby;
|
7
|
+
use magnus::{RArray, Ruby};
|
8
8
|
|
9
9
|
use crate::utils::{FileExtension, magnus_err};
|
10
10
|
|
11
|
-
pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::error::Result<()> {
|
11
|
+
pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
|
12
|
+
let exclusions = RArray::to_vec(exclusions)?;
|
13
|
+
|
14
|
+
println!("xls_path: {:?}", xls_path);
|
15
|
+
println!("target_path: {:?}", target_path);
|
16
|
+
println!("exclusions: {:?}", exclusions);
|
17
|
+
|
12
18
|
if !xls_path.has_extension(&["xls"]) {
|
13
19
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
14
20
|
}
|
@@ -23,14 +29,15 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::err
|
|
23
29
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
24
30
|
let mut dest = BufWriter::new(csv_out_file);
|
25
31
|
|
26
|
-
write_csv(ruby, &mut dest, &range, header_map)
|
32
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions)
|
27
33
|
}
|
28
34
|
|
29
|
-
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>) -> magnus::error::Result<()> {
|
35
|
+
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
|
30
36
|
let n = range.get_size().1 - 1;
|
31
37
|
for (ri, r) in range.rows().enumerate() {
|
32
38
|
let mut date_value = Utc::now().naive_utc();
|
33
39
|
|
40
|
+
if skip_excluded_rows(&header_map, r, &exclusions) { continue; }
|
34
41
|
if skip_empty_rows(r) { continue; }
|
35
42
|
if skip_rows_with_no_request_id(&header_map, r) { continue; }
|
36
43
|
|
@@ -69,6 +76,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
69
76
|
Ok(())
|
70
77
|
}
|
71
78
|
|
79
|
+
fn skip_excluded_rows(header_map: &HashMap<String, usize>, r: &[Data], exclusions: &Vec<String>) -> bool {
|
80
|
+
if let Some(request_id) = header_map.get("Request Id") {
|
81
|
+
let value = r[*request_id].to_string();
|
82
|
+
if exclusions.contains(&value) {
|
83
|
+
return true;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
false
|
87
|
+
}
|
88
|
+
|
72
89
|
fn skip_empty_rows(r: &[Data]) -> bool {
|
73
90
|
if r.iter().all(|c| c == &Data::Empty) {
|
74
91
|
return true;
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|