patchwork_csv_utils 0.1.4-x86_64-linux → 0.1.6-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b9d62eb73016a46a3ec9caac2d43fd5db562a0856831c7f0e544e9c570e8d550
4
- data.tar.gz: 1262e0f45c2b8cfe896be3d4bd6f1074db61aa2b252049f5090cec5feba05c1a
3
+ metadata.gz: 69855b96ed7f617719daa482f940ef230dd9368c15708539666d4a204a62674e
4
+ data.tar.gz: 4dee6d49af40934270beccc5e42ffdc7d90f240389b56b38a7898b91e5180b8f
5
5
  SHA512:
6
- metadata.gz: 4fca3c59b6ff0b5cfd1dcc2ad0ae6efdf8005716980c0a1d111bff0190a4fae3c9bc6b560086416c794be89e3eab46ae6ffc34bdfc408a806c4ac07fe1660608
7
- data.tar.gz: 28454948d621cd2272b9a6790d735ce7e2d95da51202accb76a3d56f1a5b2b6bc9a274bf7473073bbc99fac25a64de818ed0a1b9839b94dc0893241ff0290c24
6
+ metadata.gz: 38925fa8dc2d19ab949fb3e9c3626d7ad1fbde105b0936884f58ccc8d4b849c323bfa092cc469edf00521f8cebed084a606504c76e1ee2c4b00943f38f97fb91
7
+ data.tar.gz: c1a1371b4063cc96573f2ba89ea5fba730400d27c86a62a20331bc9ad503501ead41a3ebacf447372e7f301d164f77aea8a22b5b9f70c081b1dcc2d72a4a63cc
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- patchwork_csv_utils (0.1.4)
4
+ patchwork_csv_utils (0.1.6)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -8,6 +8,6 @@ pub mod utils;
8
8
  fn init() -> Result<(), magnus::Error> {
9
9
  let module = define_module("CsvUtils")?;
10
10
  module.define_singleton_method("dedup", function!(dedup, 3))?;
11
- module.define_singleton_method("to_csv", function!(to_csv, 2))?;
11
+ module.define_singleton_method("to_csv", function!(to_csv, 3))?;
12
12
  Ok(())
13
13
  }
@@ -33,12 +33,20 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
33
33
  let mut previous_records = vec![];
34
34
  for previous_record in previous_csv.records() {
35
35
  let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
36
+
37
+ if has_empty_row_skip(&previous_record) { continue; }
38
+ if has_empty_first_col_skip_row(&previous_record) { continue; }
39
+
36
40
  let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
37
41
  previous_records.push(previous_record)
38
42
  }
39
43
 
40
44
  for new_record in new_csv.records() {
41
45
  let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
46
+
47
+ if has_empty_row_skip(&new_record) { continue; }
48
+ if has_empty_first_col_skip_row(&new_record) { continue; }
49
+
42
50
  let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
43
51
  if !previous_records.contains(&new_record) {
44
52
  wtr.write_byte_record(new_record.as_byte_record()).unwrap();
@@ -50,4 +58,18 @@ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, targe
50
58
  Ok(())
51
59
  }
52
60
 
61
+ fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
62
+ if previous_record[0].is_empty() {
63
+ return true;
64
+ }
65
+ false
66
+ }
67
+
68
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
69
+ if record.iter().all(|r| r.is_empty()) {
70
+ return true;
71
+ }
72
+ false
73
+ }
74
+
53
75
 
@@ -4,11 +4,17 @@ use std::io::{BufWriter, Write};
4
4
 
5
5
  use calamine::{Data, open_workbook, Range, Reader, Xls};
6
6
  use chrono::{NaiveDateTime, Utc};
7
- use magnus::Ruby;
7
+ use magnus::{RArray, Ruby};
8
8
 
9
9
  use crate::utils::{FileExtension, magnus_err};
10
10
 
11
- pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::error::Result<()> {
11
+ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
12
+ let exclusions = RArray::to_vec(exclusions)?;
13
+
14
+ println!("xls_path: {:?}", xls_path);
15
+ println!("target_path: {:?}", target_path);
16
+ println!("exclusions: {:?}", exclusions);
17
+
12
18
  if !xls_path.has_extension(&["xls"]) {
13
19
  return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
14
20
  }
@@ -23,14 +29,15 @@ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String) -> magnus::err
23
29
  let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
24
30
  let mut dest = BufWriter::new(csv_out_file);
25
31
 
26
- write_csv(ruby, &mut dest, &range, header_map)
32
+ write_csv(ruby, &mut dest, &range, header_map, exclusions)
27
33
  }
28
34
 
29
- fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>) -> magnus::error::Result<()> {
35
+ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
30
36
  let n = range.get_size().1 - 1;
31
37
  for (ri, r) in range.rows().enumerate() {
32
38
  let mut date_value = Utc::now().naive_utc();
33
39
 
40
+ if skip_excluded_rows(&header_map, r, &exclusions) { continue; }
34
41
  if skip_empty_rows(r) { continue; }
35
42
  if skip_rows_with_no_request_id(&header_map, r) { continue; }
36
43
 
@@ -69,6 +76,16 @@ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_ma
69
76
  Ok(())
70
77
  }
71
78
 
79
+ fn skip_excluded_rows(header_map: &HashMap<String, usize>, r: &[Data], exclusions: &Vec<String>) -> bool {
80
+ if let Some(request_id) = header_map.get("Request Id") {
81
+ let value = r[*request_id].to_string();
82
+ if exclusions.contains(&value) {
83
+ return true;
84
+ }
85
+ }
86
+ false
87
+ }
88
+
72
89
  fn skip_empty_rows(r: &[Data]) -> bool {
73
90
  if r.iter().all(|c| c == &Data::Empty) {
74
91
  return true;
Binary file
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CsvUtils
4
- VERSION = '0.1.4'
4
+ VERSION = '0.1.6'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patchwork_csv_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.6
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-01 00:00:00.000000000 Z
11
+ date: 2024-08-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Deduplication of CSV files and XLS to CSV conversion.
14
14
  email: