dedup_csv 0.1.0-x86_64-darwin → 0.1.1-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 425c4ba7eab950712d066133101253bd2d952ee8236a7d0ca6b2d15d5901d5fb
4
- data.tar.gz: '012540959c43c1c523bd650cb9a1e975508e052f7cca40dda6b1c3d61e011810'
3
+ metadata.gz: 3e03bd04813149e45a6c2d685216693e16d2730984cfa75e34d66987147f2b46
4
+ data.tar.gz: a74999ebf0b6f41d8075dbe4a8bacefdca8b2e76ac63345c532c963ea4bc6556
5
5
  SHA512:
6
- metadata.gz: 8e3ee9ce74ab641d9a23e44f4b0fdfe721a1ca9773a767e7e070f668ef21252a1660dc2c47cc387c596b082db1a9799b74c2f4651a0d060f8c98bc8a0f9f73d1
7
- data.tar.gz: 9e163089404257110ed5ca905f8e18eebcd74dd693b0a6d5d376091178ad6c6d3c7821b6c66ea4be7eb222415df5ce0b5c60b8e9673bc91959f322c943ffad3d
6
+ metadata.gz: 0a40c41a6c56b0ea655c6f236572efb13a94d2b8201a7ffb7482d3de22a5f8ec084190e4eaf6cabcf7b7e68f36995b5ab5d61f930d856f4a757d97eccbb12072
7
+ data.tar.gz: 8743e384b60b21b8596794bda59d57b046693a05f7aeb337d502c47f89c5ccb93d6bc2e31a8e704beb73aa44871abbf3ca9786e5c1148b876958bf9a604bf26d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dedup_csv (0.1.0)
4
+ dedup_csv (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -67,4 +67,4 @@ DEPENDENCIES
67
67
  rubocop (~> 1.21)
68
68
 
69
69
  BUNDLED WITH
70
- 2.4.10
70
+ 2.4.4
data/README.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
4
4
 
5
+ ## Installation
6
+
7
+ ```bash
8
+ gem install dedup_csv
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```irb
14
+ require 'dedup_csv/3.2/dedup_csv'
15
+ DedupCsv.dedup('file1.csv', 'file2.csv', 'output.csv')
16
+ ```
17
+
5
18
  ## Contributing
6
19
 
7
20
  Bug reports and pull requests are welcome on GitHub at https://github.com/kingsleyh/dedup_csv.
data/Rakefile CHANGED
@@ -14,11 +14,11 @@ require 'rb_sys/extensiontask'
14
14
  task build: :compile
15
15
 
16
16
  spec = Bundler.load_gemspec('dedup_csv.gemspec')
17
- spec.requirements.clear
18
- spec.required_ruby_version = nil
19
- spec.required_rubygems_version = nil
20
- spec.extensions.clear
21
- spec.files -= Dir['ext/**/*']
17
+ # spec.requirements.clear
18
+ # spec.required_ruby_version = nil
19
+ # spec.required_rubygems_version = nil
20
+ # spec.extensions.clear
21
+ # spec.files -= Dir['ext/**/*']
22
22
 
23
23
  Rake::ExtensionTask.new('dedup_csv', spec) do |c|
24
24
  c.lib_dir = 'lib/dedup_csv'
@@ -0,0 +1,14 @@
1
+ [package]
2
+ name = "dedup_csv"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
6
+ publish = false
7
+
8
+ [lib]
9
+ crate-type = ["cdylib"]
10
+
11
+ [dependencies]
12
+ magnus = { version = "0.7.1" }
13
+ csv = "1.3.0"
14
+ eyre = "0.6.12"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('dedup_csv/dedup_csv')
@@ -0,0 +1,77 @@
1
+ use std::error::Error;
2
+ use std::ffi::OsStr;
3
+ use std::fs::File;
4
+ use std::path::Path;
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{define_module, function, prelude::*, Ruby};
7
+
8
+ fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
9
+ if !previous_csv_path.has_extension(&["csv"]) {
10
+ return Err(magnus::Error::new(ruby.exception_exception(), "previous_csv_path must be a csv file".to_string()));
11
+ }
12
+ if !new_csv_path.has_extension(&["csv"]) {
13
+ return Err(magnus::Error::new(ruby.exception_exception(), "new_csv_path must be a csv file".to_string()));
14
+ }
15
+
16
+ let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
17
+ let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
18
+
19
+ let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
20
+ let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
21
+
22
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
+
24
+ let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
25
+ let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
26
+
27
+ if previous_headers != new_headers {
28
+ return Err(magnus::Error::new(ruby.exception_exception(), "headers of both csv files must be the same".to_string()));
29
+ }
30
+
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
32
+
33
+ let mut previous_records = vec![];
34
+ for previous_record in previous_csv.records() {
35
+ let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
36
+ previous_records.push(previous_record)
37
+ }
38
+
39
+ for new_record in new_csv.records() {
40
+ let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
41
+ if !previous_records.contains(&new_record) {
42
+ let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
43
+ wtr.write_byte_record(new_record.as_byte_record()).unwrap();
44
+ }
45
+ }
46
+
47
+ wtr.flush().unwrap();
48
+
49
+ Ok(())
50
+ }
51
+
52
+ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
53
+ magnus::Error::new(ruby.exception_exception(), format!("{}: {}", msg, e.to_string()))
54
+ }
55
+
56
+ #[magnus::init]
57
+ fn init() -> Result<(), magnus::Error> {
58
+ let module = define_module("DedupCsv")?;
59
+ module.define_singleton_method("dedup", function!(dedup, 3))?;
60
+ Ok(())
61
+ }
62
+
63
+ pub trait FileExtension {
64
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
65
+ }
66
+
67
+ impl<P: AsRef<Path>> FileExtension for P {
68
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
69
+ if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
70
+ return extensions
71
+ .iter()
72
+ .any(|x| x.as_ref().eq_ignore_ascii_case(extension));
73
+ }
74
+
75
+ false
76
+ }
77
+ }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DedupCsv
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dedup_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - kingsley.hendrickse
@@ -25,6 +25,9 @@ files:
25
25
  - Gemfile.lock
26
26
  - README.md
27
27
  - Rakefile
28
+ - ext/dedup_csv/Cargo.toml
29
+ - ext/dedup_csv/extconf.rb
30
+ - ext/dedup_csv/src/lib.rs
28
31
  - lib/dedup_csv.rb
29
32
  - lib/dedup_csv/2.7/dedup_csv.bundle
30
33
  - lib/dedup_csv/3.0/dedup_csv.bundle
@@ -53,7 +56,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
56
  requirements:
54
57
  - - ">="
55
58
  - !ruby/object:Gem::Version
56
- version: '0'
59
+ version: 3.0.0
57
60
  requirements: []
58
61
  rubygems_version: 3.4.4
59
62
  signing_key: