patchwork_csv_utils 0.1.4-x86_64-linux → 0.1.6-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/csv_utils/src/lib.rs +1 -1
- data/ext/csv_utils/src/utils/dedup.rs +22 -0
- data/ext/csv_utils/src/utils/xls.rs +21 -4
- data/lib/csv_utils/2.7/csv_utils.so +0 -0
- data/lib/csv_utils/3.0/csv_utils.so +0 -0
- data/lib/csv_utils/3.1/csv_utils.so +0 -0
- data/lib/csv_utils/3.2/csv_utils.so +0 -0
- data/lib/csv_utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69855b96ed7f617719daa482f940ef230dd9368c15708539666d4a204a62674e
|
4
|
+
data.tar.gz: 4dee6d49af40934270beccc5e42ffdc7d90f240389b56b38a7898b91e5180b8f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38925fa8dc2d19ab949fb3e9c3626d7ad1fbde105b0936884f58ccc8d4b849c323bfa092cc469edf00521f8cebed084a606504c76e1ee2c4b00943f38f97fb91
|
7
|
+
data.tar.gz: c1a1371b4063cc96573f2ba89ea5fba730400d27c86a62a20331bc9ad503501ead41a3ebacf447372e7f301d164f77aea8a22b5b9f70c081b1dcc2d72a4a63cc
|
data/Gemfile.lock
CHANGED
data/ext/csv_utils/src/lib.rs
CHANGED
@@ -8,6 +8,6 @@ pub mod utils;
|
|
8
8
|
fn init() -> Result<(), magnus::Error> {
|
9
9
|
let module = define_module("CsvUtils")?;
|
10
10
|
module.define_singleton_method("dedup", function!(dedup, 3))?;
|
11
|
-
module.define_singleton_method("to_csv", function!(to_csv,
|
11
|
+
module.define_singleton_method("to_csv", function!(to_csv, 3))?;
|
12
12
|
Ok(())
|
13
13
|
}
|
@@ -33,12 +33,20 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
|
|
33
33
|
let mut previous_records = vec![];
|
34
34
|
for previous_record in previous_csv.records() {
|
35
35
|
let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
|
36
|
+
|
37
|
+
if has_empty_row_skip(&previous_record) { continue; }
|
38
|
+
if has_empty_first_col_skip_row(&previous_record) { continue; }
|
39
|
+
|
36
40
|
let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
37
41
|
previous_records.push(previous_record)
|
38
42
|
}
|
39
43
|
|
40
44
|
for new_record in new_csv.records() {
|
41
45
|
let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
|
46
|
+
|
47
|
+
if has_empty_row_skip(&new_record) { continue; }
|
48
|
+
if has_empty_first_col_skip_row(&new_record) { continue; }
|
49
|
+
|
42
50
|
let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
43
51
|
if !previous_records.contains(&new_record) {
|
44
52
|
wtr.write_byte_record(new_record.as_byte_record()).unwrap();
|
@@ -50,4 +58,18 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
|
|
50
58
|
Ok(())
|
51
59
|
}
|
52
60
|
|
61
|
+
fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
|
62
|
+
if previous_record[0].is_empty() {
|
63
|
+
return true;
|
64
|
+
}
|
65
|
+
false
|
66
|
+
}
|
67
|
+
|
68
|
+
fn has_empty_row_skip(record: &StringRecord) -> bool {
|
69
|
+
if record.iter().all(|r| r.is_empty()) {
|
70
|
+
return true;
|
71
|
+
}
|
72
|
+
false
|
73
|
+
}
|
74
|
+
|
53
75
|
|
@@ -4,11 +4,17 @@ use std::io::{BufWriter, Write};
|
|
4
4
|
|
5
5
|
use calamine::{Data, open_workbook, Range, Reader, Xls};
|
6
6
|
use chrono::{NaiveDateTime, Utc};
|
7
|
-
use magnus::Ruby;
|
7
|
+
use magnus::{RArray, Ruby};
|
8
8
|
|
9
9
|
use crate::utils::{FileExtension, magnus_err};
|
10
10
|
|
11
|
-
pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::error::Result<()> {
|
11
|
+
pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
|
12
|
+
let exclusions = RArray::to_vec(exclusions)?;
|
13
|
+
|
14
|
+
println!("xls_path: {:?}", xls_path);
|
15
|
+
println!("target_path: {:?}", target_path);
|
16
|
+
println!("exclusions: {:?}", exclusions);
|
17
|
+
|
12
18
|
if !xls_path.has_extension(&["xls"]) {
|
13
19
|
return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
|
14
20
|
}
|
@@ -23,14 +29,15 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::err
|
|
23
29
|
let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
|
24
30
|
let mut dest = BufWriter::new(csv_out_file);
|
25
31
|
|
26
|
-
write_csv(ruby, &mut dest, &range, header_map)
|
32
|
+
write_csv(ruby, &mut dest, &range, header_map, exclusions)
|
27
33
|
}
|
28
34
|
|
29
|
-
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>) -> magnus::error::Result<()> {
|
35
|
+
fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
|
30
36
|
let n = range.get_size().1 - 1;
|
31
37
|
for (ri, r) in range.rows().enumerate() {
|
32
38
|
let mut date_value = Utc::now().naive_utc();
|
33
39
|
|
40
|
+
if skip_excluded_rows(&header_map, r, &exclusions) { continue; }
|
34
41
|
if skip_empty_rows(r) { continue; }
|
35
42
|
if skip_rows_with_no_request_id(&header_map, r) { continue; }
|
36
43
|
|
@@ -69,6 +76,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
|
|
69
76
|
Ok(())
|
70
77
|
}
|
71
78
|
|
79
|
+
fn skip_excluded_rows(header_map: &HashMap<String, usize>, r: &[Data], exclusions: &Vec<String>) -> bool {
|
80
|
+
if let Some(request_id) = header_map.get("Request Id") {
|
81
|
+
let value = r[*request_id].to_string();
|
82
|
+
if exclusions.contains(&value) {
|
83
|
+
return true;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
false
|
87
|
+
}
|
88
|
+
|
72
89
|
fn skip_empty_rows(r: &[Data]) -> bool {
|
73
90
|
if r.iter().all(|c| c == &Data::Empty) {
|
74
91
|
return true;
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/csv_utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patchwork_csv_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Deduplication of CSV files and XLS to CSV conversion.
|
14
14
|
email:
|