patchwork_csv_utils 0.1.10-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Cargo.toml ADDED
@@ -0,0 +1,7 @@
1
+ # This Cargo.toml is here to let externals tools (IDEs, etc.) know that this is
2
+ # a Rust project. Your extensions dependencies should be added to the Cargo.toml
3
+ # in the ext/ directory.
4
+
5
+ [workspace]
6
+ members = ["./ext/csv_utils"]
7
+ resolver = "2"
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in csv_utils.gemspec
6
+ gemspec
7
+
8
+ group :development do
9
+ gem 'rake', '~> 13.0'
10
+ gem 'rake-compiler'
11
+ gem 'rb_sys', '~> 0.9.98'
12
+ gem 'rspec', '~> 3.0'
13
+ gem 'rubocop', '~> 1.21'
14
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,70 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ patchwork_csv_utils (0.1.10)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.2)
10
+ diff-lcs (1.5.1)
11
+ json (2.7.2)
12
+ language_server-protocol (3.17.0.3)
13
+ parallel (1.25.1)
14
+ parser (3.3.4.0)
15
+ ast (~> 2.4.1)
16
+ racc
17
+ racc (1.8.0)
18
+ rainbow (3.1.1)
19
+ rake (13.2.1)
20
+ rake-compiler (1.2.7)
21
+ rake
22
+ rb_sys (0.9.98)
23
+ regexp_parser (2.9.2)
24
+ rexml (3.3.2)
25
+ strscan
26
+ rspec (3.13.0)
27
+ rspec-core (~> 3.13.0)
28
+ rspec-expectations (~> 3.13.0)
29
+ rspec-mocks (~> 3.13.0)
30
+ rspec-core (3.13.0)
31
+ rspec-support (~> 3.13.0)
32
+ rspec-expectations (3.13.1)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.13.0)
35
+ rspec-mocks (3.13.1)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.13.0)
38
+ rspec-support (3.13.1)
39
+ rubocop (1.65.0)
40
+ json (~> 2.3)
41
+ language_server-protocol (>= 3.17.0)
42
+ parallel (~> 1.10)
43
+ parser (>= 3.3.0.2)
44
+ rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 2.4, < 3.0)
46
+ rexml (>= 3.2.5, < 4.0)
47
+ rubocop-ast (>= 1.31.1, < 2.0)
48
+ ruby-progressbar (~> 1.7)
49
+ unicode-display_width (>= 2.4.0, < 3.0)
50
+ rubocop-ast (1.31.3)
51
+ parser (>= 3.3.1.0)
52
+ ruby-progressbar (1.13.0)
53
+ strscan (3.1.0)
54
+ unicode-display_width (2.5.0)
55
+
56
+ PLATFORMS
57
+ arm64-darwin-22
58
+ arm64-darwin-23
59
+ x86_64-linux
60
+
61
+ DEPENDENCIES
62
+ patchwork_csv_utils!
63
+ rake (~> 13.0)
64
+ rake-compiler
65
+ rb_sys (~> 0.9.98)
66
+ rspec (~> 3.0)
67
+ rubocop (~> 1.21)
68
+
69
+ BUNDLED WITH
70
+ 2.4.10
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # CsvUtils
2
+
3
+ * dedup: Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
4
+ * xls to csv: Given an XLS file, this gem will create a CSV file with the specified name.
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ gem install patchwork_csv_utils
10
+ ```
11
+
12
+ ## Usage
13
+
14
+ ```irb
15
+ require 'csv_utils'
16
+ CsvUtils.dedup('file1.csv', 'file2.csv', 'output.csv')
17
+ CsvUtils.to_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
18
+ CsvUtils.transform_csv('file1.xls', 'output_file1.csv', ['request_ids_to_skip']])
19
+ ```
20
+
21
+ ## Release
22
+
23
+ * to release a new version, update the version number in `lib/patchwork_csv_utils/version.rb`
24
+ * push the changes to github and then create a tag with the version number
25
+
26
+ ```bash
27
+ git tag -a v0.1.0 -m "v0.1.0"
28
+ git push origin --tags
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ Bug reports and pull requests are welcome on GitHub at http://github.com/patchworkhealth/csv_utils.
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ require 'rb_sys/extensiontask'
13
+
14
+ task build: :compile
15
+
16
+ spec = Bundler.load_gemspec('patchwork_csv_utils.gemspec')
17
+
18
+ Rake::ExtensionTask.new('csv_utils', spec) do |c|
19
+ c.lib_dir = 'lib/csv_utils'
20
+ c.cross_compile = true
21
+ c.cross_platform = %w[
22
+ aarch64-linux
23
+ arm64-darwin
24
+ x64-mingw-ucrt
25
+ x64-mingw32
26
+ x86_64-darwin
27
+ x86_64-linux
28
+ x86_64-linux-musl
29
+ ]
30
+ end
31
+
32
+ RbSys::ExtensionTask.new('csv_utils') do |ext|
33
+ ext.lib_dir = 'lib/csv_utils'
34
+ end
35
+
36
+ task default: %i[compile spec rubocop]
@@ -0,0 +1,15 @@
1
+ [package]
2
+ name = "csv_utils"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
6
+ publish = false
7
+
8
+ [lib]
9
+ crate-type = ["cdylib"]
10
+
11
+ [dependencies]
12
+ magnus = { version = "0.7.1" }
13
+ csv = "1.3.0"
14
+ calamine = { version = "0.25.0", features = ["dates"] }
15
+ chrono = "0.4.38"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('csv_utils/csv_utils')
@@ -0,0 +1,15 @@
1
+ use magnus::{define_module, function, prelude::*};
2
+ use crate::utils::csv::transform_csv;
3
+ use crate::utils::dedup::dedup;
4
+ use crate::utils::xls::to_csv;
5
+
6
+ pub mod utils;
7
+
8
+ #[magnus::init]
9
+ fn init() -> Result<(), magnus::Error> {
10
+ let module = define_module("CsvUtils")?;
11
+ module.define_singleton_method("dedup", function!(dedup, 3))?;
12
+ module.define_singleton_method("to_csv", function!(to_csv, 3))?;
13
+ module.define_singleton_method("transform_csv", function!(transform_csv, 3))?;
14
+ Ok(())
15
+ }
@@ -0,0 +1,113 @@
1
+ use std::collections::HashMap;
2
+ use std::fs::File;
3
+
4
+ use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{Error, RArray, Ruby};
7
+
8
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
9
+
10
+ pub fn transform_csv(ruby: &Ruby, csv_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
11
+ if !csv_path.has_extension(&["csv"]) {
12
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "csv_path must be a csv file".to_string()));
13
+ }
14
+
15
+ let exclusions = RArray::to_vec(exclusions)?;
16
+
17
+ let csv_file = File::open(csv_path).map_err(|e| magnus_err(ruby, e, "csv_path"))?;
18
+ let mut csv: csv::Reader<File> = csv::Reader::from_reader(csv_file);
19
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
20
+ let headers = csv.headers().map_err(|e| magnus_err(ruby, e, "csv_path headers"))?;
21
+ let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
22
+ let inverse_header_map: HashMap<usize, String> = headers.iter().enumerate().map(|(i, h)| (i, h.to_string())).collect();
23
+
24
+ wtr.write_byte_record(headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
25
+
26
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
27
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
28
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
29
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
30
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
31
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
32
+
33
+ for (ri, record) in csv.records().enumerate() {
34
+ let record = record.map_err(|e| magnus_err(ruby, e, "record"))?;
35
+
36
+ if skip_excluded_rows(request_id, &record, &exclusions) { continue; }
37
+ if has_empty_row_skip(&record) { continue; }
38
+ if has_empty_first_col_skip_row(&record) { continue; }
39
+
40
+ let mut date_value = Utc::now().naive_utc();
41
+
42
+ let record = record.iter().enumerate().map(|(i, c)| {
43
+ let c = c.trim_end();
44
+ if i == *date {
45
+ let current = string_to_datetime(c).ok_or(to_datetime_error(ruby, c, ri, "Date"))?;
46
+ date_value = current;
47
+ Ok(current.to_string())
48
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
49
+ if c.is_empty() { return Ok(c.to_string()); }
50
+ let column_name = get_column_name(&inverse_header_map, &i);
51
+ process_datetime(ruby, ri, date_value, c, &column_name)
52
+ } else {
53
+ Ok(c.to_string())
54
+ }
55
+ }).collect::<Result<StringRecord, magnus::Error>>()?;
56
+
57
+ let record = record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
58
+ wtr.write_byte_record(record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
59
+ }
60
+
61
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
62
+
63
+ Ok(())
64
+ }
65
+
66
+ fn process_datetime(ruby: &Ruby, ri: usize, date_value: NaiveDateTime, c: &str, column_name: &String) -> magnus::error::Result<String> {
67
+ let maybe_correct = correct_datetime(c);
68
+ if let Some(correct) = maybe_correct {
69
+ return Ok(correct.to_string());
70
+ }
71
+
72
+ let current_time = string_to_time(c).ok_or(to_datetime_error(ruby, c, ri, column_name))?;
73
+ let datetime = transform_time_to_datetime(date_value, current_time);
74
+ Ok(datetime.to_string())
75
+ }
76
+
77
+ fn get_column_name(inverse_header_map: &HashMap<usize, String>, i: &usize) -> String {
78
+ let unknown = "Unknown".to_string();
79
+ let column_name = inverse_header_map.get(&i).unwrap_or(&unknown);
80
+ column_name.to_string()
81
+ }
82
+
83
+ fn skip_excluded_rows(request_id: &usize, r: &StringRecord, exclusions: &Vec<String>) -> bool {
84
+ let value = r.get(*request_id).unwrap_or_default();
85
+ exclusions.contains(&value.to_string())
86
+ }
87
+
88
+ fn string_to_datetime(s: &str) -> Option<NaiveDateTime> {
89
+ let maybe_correct = correct_datetime(s);
90
+ if maybe_correct.is_some() { return maybe_correct; }
91
+
92
+ NaiveDate::parse_from_str(s, "%d-%b-%y").ok().map(|d| d.and_hms_opt(0, 0, 0)).flatten()
93
+ }
94
+
95
+ fn correct_datetime(s: &str) -> Option<NaiveDateTime> {
96
+ NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S").ok()
97
+ }
98
+
99
+ fn string_to_time(s: &str) -> Option<NaiveTime> {
100
+ NaiveTime::parse_from_str(s, "%H:%M").ok()
101
+ }
102
+
103
+ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveTime) -> NaiveDateTime {
104
+ NaiveDateTime::new(t1.date(), t2)
105
+ }
106
+
107
+ fn has_empty_first_col_skip_row(record: &StringRecord) -> bool {
108
+ record[0].is_empty()
109
+ }
110
+
111
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
112
+ record.iter().all(|r| r.is_empty())
113
+ }
@@ -0,0 +1,69 @@
1
+ use std::fs::File;
2
+
3
+ use csv::{StringRecord, Writer};
4
+ use magnus::Ruby;
5
+
6
+ use crate::utils::{FileExtension, magnus_err};
7
+
8
+ pub fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
9
+ if !previous_csv_path.has_extension(&["csv"]) {
10
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "previous_csv_path must be a csv file".to_string()));
11
+ }
12
+ if !new_csv_path.has_extension(&["csv"]) {
13
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "new_csv_path must be a csv file".to_string()));
14
+ }
15
+
16
+ let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
17
+ let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
18
+
19
+ let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
20
+ let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
21
+
22
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
+
24
+ let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
25
+ let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
26
+
27
+ if previous_headers != new_headers {
28
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "headers of both csv files must be the same".to_string()));
29
+ }
30
+
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
32
+
33
+ let mut previous_records = vec![];
34
+ for previous_record in previous_csv.records() {
35
+ let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
36
+
37
+ if has_empty_row_skip(&previous_record) { continue; }
38
+ if has_empty_first_col_skip_row(&previous_record) { continue; }
39
+
40
+ let previous_record = previous_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
41
+ previous_records.push(previous_record)
42
+ }
43
+
44
+ for new_record in new_csv.records() {
45
+ let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
46
+
47
+ if has_empty_row_skip(&new_record) { continue; }
48
+ if has_empty_first_col_skip_row(&new_record) { continue; }
49
+
50
+ let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
51
+ if !previous_records.contains(&new_record) {
52
+ wtr.write_byte_record(new_record.as_byte_record()).map_err(|e| magnus_err(ruby, e, "write_byte_record"))?;
53
+ }
54
+ }
55
+
56
+ wtr.flush().map_err(|e| magnus_err(ruby, e, "flush"))?;
57
+
58
+ Ok(())
59
+ }
60
+
61
+ fn has_empty_first_col_skip_row(previous_record: &StringRecord) -> bool {
62
+ previous_record[0].is_empty()
63
+ }
64
+
65
+ fn has_empty_row_skip(record: &StringRecord) -> bool {
66
+ record.iter().all(|r| r.is_empty())
67
+ }
68
+
69
+
@@ -0,0 +1,36 @@
1
+ use std::error::Error;
2
+ use std::ffi::OsStr;
3
+ use std::path::Path;
4
+ use magnus::Ruby;
5
+
6
+ pub mod csv;
7
+ pub mod dedup;
8
+ pub mod xls;
9
+
10
+ fn missing_header(ruby: &Ruby, header: &str) -> magnus::Error {
11
+ magnus::Error::new(ruby.exception_standard_error(), format!("Missing '{}' header", header))
12
+ }
13
+
14
+ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
15
+ magnus::Error::new(ruby.exception_standard_error(), format!("{}: {}", msg, e.to_string()))
16
+ }
17
+
18
+ fn to_datetime_error(ruby: &Ruby, value: &str, row: usize, col: &str) -> magnus::Error {
19
+ magnus::Error::new(ruby.exception_standard_error(), format!("Could not parse datetime '{}', row: {}, col: {}", value, row, col))
20
+ }
21
+
22
+ pub trait FileExtension {
23
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
24
+ }
25
+
26
+ impl<P: AsRef<Path>> FileExtension for P {
27
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
28
+ if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
29
+ return extensions
30
+ .iter()
31
+ .any(|x| x.as_ref().eq_ignore_ascii_case(extension));
32
+ }
33
+
34
+ false
35
+ }
36
+ }
@@ -0,0 +1,114 @@
1
+ use std::collections::HashMap;
2
+ use std::fs::File;
3
+ use std::io::{BufWriter, Write};
4
+
5
+ use calamine::{Data, open_workbook, Range, Reader, Xls};
6
+ use chrono::{NaiveDateTime, Utc};
7
+ use magnus::{RArray, Ruby};
8
+
9
+ use crate::utils::{FileExtension, magnus_err, missing_header, to_datetime_error};
10
+
11
+ pub fn to_csv(ruby: &Ruby, xls_path: String, target_path: String, exclusions: RArray) -> magnus::error::Result<()> {
12
+ if !xls_path.has_extension(&["xls"]) {
13
+ return Err(magnus::Error::new(ruby.exception_standard_error(), "xls_path must be an xls file".to_string()));
14
+ }
15
+
16
+ let exclusions = RArray::to_vec(exclusions)?;
17
+
18
+ let mut workbook: Xls<_> = open_workbook(xls_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not open xls: {}", xls_path).as_str()))?;
19
+ let range = workbook.worksheet_range_at(0)
20
+ .ok_or(magnus::Error::new(ruby.exception_standard_error(), "no worksheet found in xls".to_string()))
21
+ .and_then(|r| r.map_err(|e| magnus_err(ruby, e, "could not read worksheet range")))?;
22
+
23
+ let headers = range.headers().ok_or(magnus::Error::new(ruby.exception_standard_error(), "no headers found in xls".to_string()))?;
24
+ let header_map: HashMap<String, usize> = headers.iter().enumerate().map(|(i, h)| (h.to_string(), i)).collect();
25
+ let csv_out_file = File::create(target_path.clone()).map_err(|e| magnus_err(ruby, e, format!("could not create csv file: {}", target_path).as_str()))?;
26
+ let mut dest = BufWriter::new(csv_out_file);
27
+
28
+ write_csv(ruby, &mut dest, &range, header_map, exclusions)
29
+ }
30
+
31
+ fn write_csv<W: Write>(ruby: &Ruby, dest: &mut W, range: &Range<Data>, header_map: HashMap<String, usize>, exclusions: Vec<String>) -> magnus::error::Result<()> {
32
+ let n = range.get_size().1 - 1;
33
+
34
+ let request_id = header_map.get("Request Id").ok_or(missing_header(ruby, "Request Id"))?;
35
+ let date = header_map.get("Date").ok_or(missing_header(ruby, "Date"))?;
36
+ let start = header_map.get("Start").ok_or(missing_header(ruby, "Start"))?;
37
+ let end = header_map.get("End").ok_or(missing_header(ruby, "End"))?;
38
+ let actual_start = header_map.get("Actual Start").ok_or(missing_header(ruby, "Actual Start"))?;
39
+ let actual_end = header_map.get("Actual End").ok_or(missing_header(ruby, "Actual End"))?;
40
+
41
+ for (ri, r) in range.rows().enumerate() {
42
+ let mut date_value = Utc::now().naive_utc();
43
+
44
+ if skip_excluded_rows(&request_id, r, &exclusions) { continue; }
45
+ if skip_empty_rows(r) { continue; }
46
+ if skip_rows_with_no_request_id(&request_id, r) { continue; }
47
+ if date_value_is_not_present(&date, r) {
48
+ return Err(magnus::Error::new(ruby.exception_standard_error(), format!("Date value is not present in row: {}", ri)));
49
+ }
50
+
51
+ for (i, c) in r.iter().enumerate() {
52
+ match *c {
53
+ Data::Empty => Ok(()),
54
+ Data::String(ref s) | Data::DateTimeIso(ref s) | Data::DurationIso(ref s) => {
55
+ handle_commas(dest, s)
56
+ }
57
+ Data::Float(ref f) => write!(dest, "{}", f),
58
+ Data::DateTime(ref d) => {
59
+ let mut current = d.as_datetime().ok_or(to_datetime_error(ruby, &d.to_string(), ri, "Date"))?;
60
+ if i == *date {
61
+ date_value = current;
62
+ } else if i == *start || i == *end || i == *actual_start || i == *actual_end {
63
+ current = transform_time_to_datetime(date_value, current);
64
+ }
65
+ write!(dest, "{}", current)
66
+ }
67
+ Data::Int(ref i) => write!(dest, "{}", i),
68
+ Data::Error(ref e) => write!(dest, "{:?}", e),
69
+ Data::Bool(ref b) => write!(dest, "{}", b),
70
+ }.map_err(|e| magnus_err(ruby, e, format!("error writing xls row: {}, column: {}", ri, i).as_str()))?;
71
+ if i != n {
72
+ write!(dest, ",").map_err(|e| magnus_err(ruby, e, format!("error writing csv comma for row: {}, column: {}", ri, i).as_str()))?;
73
+ }
74
+ }
75
+ write!(dest, "\r\n").map_err(|e| magnus_err(ruby, e, format!("error writing end of line for row: {}", ri).as_str()))?;
76
+ }
77
+ Ok(())
78
+ }
79
+
80
+ fn date_value_is_not_present(date: &usize, r: &[Data]) -> bool {
81
+ r[*date] == Data::Empty
82
+ }
83
+
84
+ fn skip_excluded_rows(request_id: &usize, r: &[Data], exclusions: &Vec<String>) -> bool {
85
+ let value = r[*request_id].to_string();
86
+ exclusions.contains(&value.to_string())
87
+ }
88
+
89
+ fn skip_empty_rows(r: &[Data]) -> bool {
90
+ r.iter().all(|c| c == &Data::Empty)
91
+ }
92
+
93
+ fn skip_rows_with_no_request_id(request_id: &usize, r: &[Data]) -> bool {
94
+ r[*request_id] == Data::Empty
95
+ }
96
+
97
+ fn transform_time_to_datetime(t1: NaiveDateTime, t2: NaiveDateTime) -> NaiveDateTime {
98
+ NaiveDateTime::new(t1.date(), t2.time())
99
+ }
100
+
101
+ fn handle_commas<W: Write>(dest: &mut W, s: &str) -> std::io::Result<()> {
102
+ if s.contains(",") {
103
+ write!(dest, "{:?}", clean_strings(s).trim_end())
104
+ } else {
105
+ write!(dest, "{}", clean_strings(s).trim_end())
106
+ }
107
+ }
108
+
109
+ fn clean_strings(s: &str) -> String {
110
+ s.replace("\n", " ")
111
+ .replace("\r", " ")
112
+ .replace("\"", "")
113
+ .replace("'", "")
114
+ }
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CsvUtils
4
+ VERSION = '0.1.10'
5
+ end
data/lib/csv_utils.rb ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load native extension
4
+ begin
5
+ ruby_version = /(\d+\.\d+)/.match(RUBY_VERSION)
6
+ require_relative "csv_utils/#{ruby_version}/csv_utils"
7
+ rescue LoadError
8
+ require_relative 'csv_utils/csv_utils'
9
+ end
10
+
11
+ require_relative 'csv_utils/version'
12
+
13
+ module CsvUtils
14
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patchwork_csv_utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.10
5
+ platform: aarch64-linux
6
+ authors:
7
+ - kingsley.hendrickse
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-08-09 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Deduplication of CSV files and XLS to CSV conversion.
14
+ email:
15
+ - kingsley.hendrickse@patchwork.health
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".rspec"
21
+ - ".rubocop.yml"
22
+ - Cargo.lock
23
+ - Cargo.toml
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - README.md
27
+ - Rakefile
28
+ - ext/csv_utils/Cargo.toml
29
+ - ext/csv_utils/extconf.rb
30
+ - ext/csv_utils/src/lib.rs
31
+ - ext/csv_utils/src/utils/csv.rs
32
+ - ext/csv_utils/src/utils/dedup.rs
33
+ - ext/csv_utils/src/utils/mod.rs
34
+ - ext/csv_utils/src/utils/xls.rs
35
+ - lib/csv_utils.rb
36
+ - lib/csv_utils/2.7/csv_utils.so
37
+ - lib/csv_utils/3.0/csv_utils.so
38
+ - lib/csv_utils/3.1/csv_utils.so
39
+ - lib/csv_utils/3.2/csv_utils.so
40
+ - lib/csv_utils/version.rb
41
+ homepage: http://github.com/patchworkhealth/csv_utils
42
+ licenses: []
43
+ metadata:
44
+ homepage_uri: http://github.com/patchworkhealth/csv_utils
45
+ source_code_uri: http://github.com/patchworkhealth/csv_utils
46
+ rubygems_mfa_required: 'false'
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '2.7'
56
+ - - "<"
57
+ - !ruby/object:Gem::Version
58
+ version: 3.3.dev
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements: []
65
+ rubygems_version: 3.4.4
66
+ signing_key:
67
+ specification_version: 4
68
+ summary: Fast CSV utils
69
+ test_files: []