patchwork_csv_utils 0.1.10-aarch64-linux

Sign up to get free protection for your applications and to get access to all the features.
data/Cargo.toml ADDED
@@ -0,0 +1,7 @@
1
+ # This Cargo.toml is here to let externals tools (IDEs, etc.) know that this is
2
+ # a Rust project. Your extensions dependencies should be added to the Cargo.toml
3
+ # in the ext/ directory.
4
+
5
+ [workspace]
6
+ members = ["./ext/csv_utils"]
7
+ resolver = "2"
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in csv_utils.gemspec
6
+ gemspec
7
+
8
+ group :development do
9
+ gem 'rake', '~> 13.0'
10
+ gem 'rake-compiler'
11
+ gem 'rb_sys', '~> 0.9.98'
12
+ gem 'rspec', '~> 3.0'
13
+ gem 'rubocop', '~> 1.21'
14
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,70 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ patchwork_csv_utils (0.1.10)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.2)
10
+ diff-lcs (1.5.1)
11
+ json (2.7.2)
12
+ language_server-protocol (3.17.0.3)
13
+ parallel (1.25.1)
14
+ parser (3.3.4.0)
15
+ ast (~> 2.4.1)
16
+ racc
17
+ racc (1.8.0)
18
+ rainbow (3.1.1)
19
+ rake (13.2.1)
20
+ rake-compiler (1.2.7)
21
+ rake
22
+ rb_sys (0.9.98)
23
+ regexp_parser (2.9.2)
24
+ rexml (3.3.2)
25
+ strscan
26
+ rspec (3.13.0)
27
+ rspec-core (~> 3.13.0)
28
+ rspec-expectations (~> 3.13.0)
29
+ rspec-mocks (~> 3.13.0)
30
+ rspec-core (3.13.0)
31
+ rspec-support (~> 3.13.0)
32
+ rspec-expectations (3.13.1)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.13.0)
35
+ rspec-mocks (3.13.1)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.13.0)
38
+ rspec-support (3.13.1)
39
+ rubocop (1.65.0)
40
+ json (~> 2.3)
41
+ language_server-protocol (>= 3.17.0)
42
+ parallel (~> 1.10)
43
+ parser (>= 3.3.0.2)
44
+ rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 2.4, < 3.0)
46
+ rexml (>= 3.2.5, < 4.0)
47
+ rubocop-ast (>= 1.31.1, < 2.0)
48
+ ruby-progressbar (~> 1.7)
49
+ unicode-display_width (>= 2.4.0, < 3.0)
50
+ rubocop-ast (1.31.3)
51
+ parser (>= 3.3.1.0)
52
+ ruby-progressbar (1.13.0)
53
+ strscan (3.1.0)
54
+ unicode-display_width (2.5.0)
55
+
56
+ PLATFORMS
57
+ arm64-darwin-22
58
+ arm64-darwin-23
59
+ x86_64-linux
60
+
61
+ DEPENDENCIES
62
+ patchwork_csv_utils!
63
+ rake (~> 13.0)
64
+ rake-compiler
65
+ rb_sys (~> 0.9.98)
66
+ rspec (~> 3.0)
67
+ rubocop (~> 1.21)
68
+
69
+ BUNDLED WITH
70
+ 2.4.10
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # CsvUtils
2
+
3
+ * dedup: Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
4
+ * xls to csv: Given an XLS file, this gem will create a CSV file with the specified name.
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ gem install patchwork_csv_utils
10
+ ```
11
+
12
+ ## Usage
13
+
14
+ ```irb
15
+ require 'csv_utils'
16
+ CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
17
+ CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
+ CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
19
+ ```
20
+
21
+ ## Release
22
+
23
+ * to release a new version, update the version number in `lib/patchwork_csv_utils/version.rb`
24
+ * push the changes to github and then create a tag with the version number
25
+
26
+ ```bash
27
+ git tag -a v0.1.0 -m "v0.1.0"
28
+ git push origin --tags
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ Bug reports and pull requests are welcome on GitHub at http://github.com/patchworkhealth/csv_utils.
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ require 'rb_sys/extensiontask'
13
+
14
+ task build: :compile
15
+
16
+ spec = Bundler.load_gemspec('patchwork_csv_utils.gemspec')
17
+
18
+ Rake::ExtensionTask.new('csv_utils', spec) do |c|
19
+ c.lib_dir = 'lib/csv_utils'
20
+ c.cross_compile = true
21
+ c.cross_platform = %w[
22
+ aarch64-linux
23
+ arm64-darwin
24
+ x64-mingw-ucrt
25
+ x64-mingw32
26
+ x86_64-darwin
27
+ x86_64-linux
28
+ x86_64-linux-musl
29
+ ]
30
+ end
31
+
32
+ RbSys::ExtensionTask.new('csv_utils') do |ext|
33
+ ext.lib_dir = 'lib/csv_utils'
34
+ end
35
+
36
+ task default: %i[compile spec rubocop]
@@ -0,0 +1,15 @@
1
+ [package]
2
+ name = "csv_utils"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
6
+ publish = false
7
+
8
+ [lib]
9
+ crate-type = ["cdylib"]
10
+
11
+ [dependencies]
12
+ magnus = { version = "0.7.1" }
13
+ csv = "1.3.0"
14
+ calamine = { version = "0.25.0", features = ["dates"] }
15
+ chrono = "0.4.38"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('csv_utils/csv_utils')
@@ -0,0 +1,15 @@
1
+ use magnus::{define_module, function, prelude::*};
2
+ use crate::utils::csv::transform_csv;
3
+ use crate::utils::dedup::dedup;
4
+ use crate::utils::xls::to_csv;
5
+
6
+ pub mod utils;
7
+
8
+ #[magnus::init]
9
+ fn init() -> Result<(), magnus::Error> {
10
+ let module = define_module("CsvUtils")?;
11
+ module.define_singleton_method("dedup", function!(dedup, 3))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 3))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 3))?;
14
+ Ok(())
15
+ }
@@ -0,0 +1,113 @@
1
+ use std::collections::HashMap;
2
+ use std::fs::File;
3
+
4
+ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{Error, RArray, Ruby};
7
+
8
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
9
+
10
+ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
11
+ if !csv_path.has_extension(&["csv"]) {
12
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
13
+ }
14
+
15
+ let exclusions = RArray::to_vec(exclusions)?;
16
+
17
+ let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
18
+ let mut csv: csv::Reader<File> = csv::Reader::from_reader(csv_file);
19
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
20
+ let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
21
+ let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
22
+ let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
23
+
24
+ wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
25
+
26
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
27
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
28
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
29
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
30
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
31
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
32
+
33
+ for (ri, record) in csv.records().enumerate() {
34
+ let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
35
+
36
+ if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
37
+ if has_empty_row_skip(&record) { continue; }
38
+ if has_empty_first_col_skip_row(&record) { continue; }
39
+
40
+ let mut date_value = Utc::now().naive_utc();
41
+
42
+ let record = record.iter().enumerate().map(|(i, c)| {
43
+ let c = c.trim_end();
44
+ if i == *date {
45
+ let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
46
+ date_value = current;
47
+ Ok(current.to_string())
48
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
49
+ if c.is_empty() { return Ok(c.to_string()); }
50
+ let column_name = get_column_name(&inverse_header_map, &i);
51
+ process_datetime(ruby, ri, date_value, c, &column_name)
52
+ } else {
53
+ Ok(c.to_string())
54
+ }
55
+ }).collect::<Result<StringRecord, magnus::Error>>()?;
56
+
57
+ let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
58
+ wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
59
+ }
60
+
61
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
62
+
63
+ Ok(())
64
+ }
65
+
66
+ fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
67
+ let maybe_correct = correct_datetime(c);
68
+ if let Some(correct) = maybe_correct {
69
+ return Ok(correct.to_string());
70
+ }
71
+
72
+ let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
73
+ let datetime = transform_time_to_datetime(date_value, current_time);
74
+ Ok(datetime.to_string())
75
+ }
76
+
77
+ fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> String {
78
+ let unknown = "Unknown".to_string();
79
+ let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
80
+ column_name.to_string()
81
+ }
82
+
83
+ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
84
+ let value = r.get(*request_id).unwrap_or_default();
85
+ exclusions.contains(&value.to_string())
86
+ }
87
+
88
+ fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
89
+ let maybe_correct = correct_datetime(s);
90
+ if maybe_correct.is_some() { return maybe_correct; }
91
+
92
+ NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
93
+ }
94
+
95
+ fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
96
+ NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
97
+ }
98
+
99
+ fn string_to_time(s: &str) -> Option<NaiveTime> {
100
+ NaiveTime::parse_from_str(s, "%H:%M").ok()
101
+ }
102
+
103
+ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime {
104
+ NaiveDateTime::new(t1.date(), t2)
105
+ }
106
+
107
+ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
108
+ record[0].is_empty()
109
+ }
110
+
111
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
112
+ record.iter().all(|r| r.is_empty())
113
+ }
@@ -0,0 +1,69 @@
1
+ use std::fs::File;
2
+
3
+ use csv::{StringRecord, Writer};
4
+ use magnus::Ruby;
5
+
6
+ use crate::utils::{FileExtension, magnus_err};
7
+
8
+ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
9
+ if !previous_csv_path.has_extension(&["csv"]) {
10
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "previous_csv_path must be a csv file".to_string()));
11
+ }
12
+ if !new_csv_path.has_extension(&["csv"]) {
13
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "new_csv_path must be a csv file".to_string()));
14
+ }
15
+
16
+ let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
17
+ let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
18
+
19
+ let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
20
+ let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
21
+
22
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
+
24
+ let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
25
+ let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
26
+
27
+ if previous_headers != new_headers {
28
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
29
+ }
30
+
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
32
+
33
+ let mut previous_records = vec![];
34
+ for previous_record in previous_csv.records() {
35
+ let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
36
+
37
+ if has_empty_row_skip(&previous_record) { continue; }
38
+ if has_empty_first_col_skip_row(&previous_record) { continue; }
39
+
40
+ let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
41
+ previous_records.push(previous_record)
42
+ }
43
+
44
+ for new_record in new_csv.records() {
45
+ let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
46
+
47
+ if has_empty_row_skip(&new_record) { continue; }
48
+ if has_empty_first_col_skip_row(&new_record) { continue; }
49
+
50
+ let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
51
+ if !previous_records.contains(&new_record) {
52
+ wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
53
+ }
54
+ }
55
+
56
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
57
+
58
+ Ok(())
59
+ }
60
+
61
+ fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
62
+ previous_record[0].is_empty()
63
+ }
64
+
65
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
66
+ record.iter().all(|r| r.is_empty())
67
+ }
68
+
69
+
@@ -0,0 +1,36 @@
1
+ use std::error::Error;
2
+ use std::ffi::OsStr;
3
+ use std::path::Path;
4
+ use magnus::Ruby;
5
+
6
+ pub mod csv;
7
+ pub mod dedup;
8
+ pub mod xls;
9
+
10
+ fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
11
+ magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
12
+ }
13
+
14
+ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
15
+ magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
16
+ }
17
+
18
+ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
19
+ magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
20
+ }
21
+
22
+ pub trait FileExtension {
23
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
24
+ }
25
+
26
+ impl<P: AsRef<Path>> FileExtension for P {
27
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
28
+ if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
29
+ return extensions
30
+ .iter()
31
+ .any(|x| x.as_ref().eq_ignore_ascii_case(extension));
32
+ }
33
+
34
+ false
35
+ }
36
+ }
@@ -0,0 +1,114 @@
1
+ use std::collections::HashMap;
2
+ use std::fs::File;
3
+ use std::io::{BufWriter, Write};
4
+
5
+ use calamine::{Data, open_workbook, Range, Reader, Xls};
6
+ use chrono::{NaiveDateTime, Utc};
7
+ use magnus::{RArray, Ruby};
8
+
9
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
10
+
11
+ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
12
+ if !xls_path.has_extension(&["xls"]) {
13
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
14
+ }
15
+
16
+ let exclusions = RArray::to_vec(exclusions)?;
17
+
18
+ let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
19
+ let range = workbook.worksheet_range_at(0)
20
+ .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
21
+ .and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
22
+
23
+ let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
24
+ let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
25
+ let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
26
+ let mut dest = BufWriter::new(csv_out_file);
27
+
28
+ write_csv(ruby, &mut dest, &range, header_map, exclusions)
29
+ }
30
+
31
+ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
32
+ let n = range.get_size().1 - 1;
33
+
34
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
35
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
36
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
37
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
38
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
39
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
40
+
41
+ for (ri, r) in range.rows().enumerate() {
42
+ let mut date_value = Utc::now().naive_utc();
43
+
44
+ if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
45
+ if skip_empty_rows(r) { continue; }
46
+ if skip_rows_with_no_request_id(&request_id, r) { continue; }
47
+ if date_value_is_not_present(&date, r) {
48
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
49
+ }
50
+
51
+ for (i, c) in r.iter().enumerate() {
52
+ match *c {
53
+ Data::Empty => Ok(()),
54
+ Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
55
+ handle_commas(dest, s)
56
+ }
57
+ Data::Float(ref f) => write!(dest, "{}", f),
58
+ Data::DateTime(ref d) => {
59
+ let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
60
+ if i == *date {
61
+ date_value = current;
62
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
63
+ current = transform_time_to_datetime(date_value, current);
64
+ }
65
+ write!(dest, "{}", current)
66
+ }
67
+ Data::Int(ref i) => write!(dest, "{}", i),
68
+ Data::Error(ref e) => write!(dest, "{:?}", e),
69
+ Data::Bool(ref b) => write!(dest, "{}", b),
70
+ }.map_err(|e| magnus_err(ruby, e, format!("error writing xls row: {}, column: {}", ri, i).as_str()))?;
71
+ if i != n {
72
+ write!(dest, ",").map_err(|e| magnus_err(ruby, e, format!("error writing csv comma for row: {}, column: {}", ri, i).as_str()))?;
73
+ }
74
+ }
75
+ write!(dest, "\r\n").map_err(|e| magnus_err(ruby, e, format!("error writing end of line for row: {}", ri).as_str()))?;
76
+ }
77
+ Ok(())
78
+ }
79
+
80
+ fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
81
+ r[*date] == Data::Empty
82
+ }
83
+
84
+ fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
85
+ let value = r[*request_id].to_string();
86
+ exclusions.contains(&value.to_string())
87
+ }
88
+
89
+ fn skip_empty_rows(r: &[Data]) -> bool {
90
+ r.iter().all(|c| c == &Data::Empty)
91
+ }
92
+
93
+ fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
94
+ r[*request_id] == Data::Empty
95
+ }
96
+
97
+ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
98
+ NaiveDateTime::new(t1.date(), t2.time())
99
+ }
100
+
101
+ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
102
+ if s.contains(",") {
103
+ write!(dest, "{:?}", clean_strings(s).trim_end())
104
+ } else {
105
+ write!(dest, "{}", clean_strings(s).trim_end())
106
+ }
107
+ }
108
+
109
+ fn clean_strings(s: &str) -> String {
110
+ s.replace("\n", " ")
111
+ .replace("\r", " ")
112
+ .replace("\"", "")
113
+ .replace("'", "")
114
+ }
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CsvUtils
4
+ VERSION = '0.1.10'
5
+ end
data/lib/csv_utils.rb ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load native extension
4
+ begin
5
+ ruby_version = /(\d+\.\d+)/.match(RUBY_VERSION)
6
+ require_relative "csv_utils/#{ruby_version}/csv_utils"
7
+ rescue LoadError
8
+ require_relative 'csv_utils/csv_utils'
9
+ end
10
+
11
+ require_relative 'csv_utils/version'
12
+
13
+ module CsvUtils
14
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patchwork_csv_utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.10
5
+ platform: aarch64-linux
6
+ authors:
7
+ - kingsley.hendrickse
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-08-09 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Deduplication of CSV files and XLS to CSV conversion.
14
+ email:
15
+ - kingsley.hendrickse@patchwork.health
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".rspec"
21
+ - ".rubocop.yml"
22
+ - Cargo.lock
23
+ - Cargo.toml
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - README.md
27
+ - Rakefile
28
+ - ext/csv_utils/Cargo.toml
29
+ - ext/csv_utils/extconf.rb
30
+ - ext/csv_utils/src/lib.rs
31
+ - ext/csv_utils/src/utils/csv.rs
32
+ - ext/csv_utils/src/utils/dedup.rs
33
+ - ext/csv_utils/src/utils/mod.rs
34
+ - ext/csv_utils/src/utils/xls.rs
35
+ - lib/csv_utils.rb
36
+ - lib/csv_utils/2.7/csv_utils.so
37
+ - lib/csv_utils/3.0/csv_utils.so
38
+ - lib/csv_utils/3.1/csv_utils.so
39
+ - lib/csv_utils/3.2/csv_utils.so
40
+ - lib/csv_utils/version.rb
41
+ homepage: http://github.com/patchworkhealth/csv_utils
42
+ licenses: []
43
+ metadata:
44
+ homepage_uri: http://github.com/patchworkhealth/csv_utils
45
+ source_code_uri: http://github.com/patchworkhealth/csv_utils
46
+ rubygems_mfa_required: 'false'
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '2.7'
56
+ - - "<"
57
+ - !ruby/object:Gem::Version
58
+ version: 3.3.dev
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements: []
65
+ rubygems_version: 3.4.4
66
+ signing_key:
67
+ specification_version: 4
68
+ summary: Fast CSV utils
69
+ test_files: []