dedup_csv 0.1.0-x86_64-linux → 0.1.1-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42de28c2a0451667a1520e37726a914fd07fd0d58a14b679fb6df1ba103a4b75
4
- data.tar.gz: b27e2cb7f69bb92d29c30fc832c245cdf67811fac111c924026aaf89648453c7
3
+ metadata.gz: 45c03b08fa55283d608e1de618ba4b2e2ab57cfa8ea2d5b66f3ad3c423aef642
4
+ data.tar.gz: f88169135fbe5ab18388300b7ca28e0492cde3e9a0e0da262c916b7b999f837b
5
5
  SHA512:
6
- metadata.gz: a5969581f0b618658ba0802b51161f023a9d9683b39ed5db81e38b36484df70bc2e36c3a8abfc26d0f62a23532033cca074879849dba9828dd21a990fe0bba0e
7
- data.tar.gz: 8ebe58c390a11c3d8a4e6926d28070a228c96d1137378c40db422ee575a5cff247545b02ae17700cf51aec591d4a78267d7c36c76c61ba4b3a8ccfa86fb167c4
6
+ metadata.gz: 16bddd21882ea9cc516ac45f479cbcdecd33641b441fa2c2b81999a11ba2723faa63c2f4858c5af5111f5695ba1cf134b238a3a88ed3f843d4b4997ca1e5b96e
7
+ data.tar.gz: b5418d9534185df0182a9d5a841efe2c46da0858a272724e1c1ad923295eb69a9a8f386825ee1449d0f21dd10527fd00f9508846a8891bca79dd8fb4575fc9b3
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dedup_csv (0.1.0)
4
+ dedup_csv (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -67,4 +67,4 @@ DEPENDENCIES
67
67
  rubocop (~> 1.21)
68
68
 
69
69
  BUNDLED WITH
70
- 2.4.10
70
+ 2.4.4
data/README.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
4
4
 
5
+ ## Installation
6
+
7
+ ```bash
8
+ gem install dedup_csv
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```irb
14
+ require 'dedup_csv/3.2/dedup_csv'
15
+ DedupCsv.dedup('file1.csv', 'file2.csv', 'output.csv')
16
+ ```
17
+
5
18
  ## Contributing
6
19
 
7
20
  Bug reports and pull requests are welcome on GitHub at https://github.com/kingsleyh/dedup_csv.
data/Rakefile CHANGED
@@ -14,11 +14,11 @@ require 'rb_sys/extensiontask'
14
14
  task build: :compile
15
15
 
16
16
  spec = Bundler.load_gemspec('dedup_csv.gemspec')
17
- spec.requirements.clear
18
- spec.required_ruby_version = nil
19
- spec.required_rubygems_version = nil
20
- spec.extensions.clear
21
- spec.files -= Dir['ext/**/*']
17
+ # spec.requirements.clear
18
+ # spec.required_ruby_version = nil
19
+ # spec.required_rubygems_version = nil
20
+ # spec.extensions.clear
21
+ # spec.files -= Dir['ext/**/*']
22
22
 
23
23
  Rake::ExtensionTask.new('dedup_csv', spec) do |c|
24
24
  c.lib_dir = 'lib/dedup_csv'
@@ -0,0 +1,14 @@
1
+ [package]
2
+ name = "dedup_csv"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
6
+ publish = false
7
+
8
+ [lib]
9
+ crate-type = ["cdylib"]
10
+
11
+ [dependencies]
12
+ magnus = { version = "0.7.1" }
13
+ csv = "1.3.0"
14
+ eyre = "0.6.12"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('dedup_csv/dedup_csv')
@@ -0,0 +1,77 @@
1
+ use std::error::Error;
2
+ use std::ffi::OsStr;
3
+ use std::fs::File;
4
+ use std::path::Path;
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{define_module, function, prelude::*, Ruby};
7
+
8
+ fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
9
+ if !previous_csv_path.has_extension(&["csv"]) {
10
+ return Err(magnus::Error::new(ruby.exception_exception(), "previous_csv_path must be a csv file".to_string()));
11
+ }
12
+ if !new_csv_path.has_extension(&["csv"]) {
13
+ return Err(magnus::Error::new(ruby.exception_exception(), "new_csv_path must be a csv file".to_string()));
14
+ }
15
+
16
+ let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
17
+ let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
18
+
19
+ let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
20
+ let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
21
+
22
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
+
24
+ let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
25
+ let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
26
+
27
+ if previous_headers != new_headers {
28
+ return Err(magnus::Error::new(ruby.exception_exception(), "headers of both csv files must be the same".to_string()));
29
+ }
30
+
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
32
+
33
+ let mut previous_records = vec![];
34
+ for previous_record in previous_csv.records() {
35
+ let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
36
+ previous_records.push(previous_record)
37
+ }
38
+
39
+ for new_record in new_csv.records() {
40
+ let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
41
+ if !previous_records.contains(&new_record) {
42
+ let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
43
+ wtr.write_byte_record(new_record.as_byte_record()).unwrap();
44
+ }
45
+ }
46
+
47
+ wtr.flush().unwrap();
48
+
49
+ Ok(())
50
+ }
51
+
52
+ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
53
+ magnus::Error::new(ruby.exception_exception(), format!("{}: {}", msg, e.to_string()))
54
+ }
55
+
56
+ #[magnus::init]
57
+ fn init() -> Result<(), magnus::Error> {
58
+ let module = define_module("DedupCsv")?;
59
+ module.define_singleton_method("dedup", function!(dedup, 3))?;
60
+ Ok(())
61
+ }
62
+
63
+ pub trait FileExtension {
64
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
65
+ }
66
+
67
+ impl<P: AsRef<Path>> FileExtension for P {
68
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
69
+ if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
70
+ return extensions
71
+ .iter()
72
+ .any(|x| x.as_ref().eq_ignore_ascii_case(extension));
73
+ }
74
+
75
+ false
76
+ }
77
+ }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DedupCsv
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dedup_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - kingsley.hendrickse
@@ -25,6 +25,9 @@ files:
25
25
  - Gemfile.lock
26
26
  - README.md
27
27
  - Rakefile
28
+ - ext/dedup_csv/Cargo.toml
29
+ - ext/dedup_csv/extconf.rb
30
+ - ext/dedup_csv/src/lib.rs
28
31
  - lib/dedup_csv.rb
29
32
  - lib/dedup_csv/2.7/dedup_csv.so
30
33
  - lib/dedup_csv/3.0/dedup_csv.so
@@ -53,7 +56,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
56
  requirements:
54
57
  - - ">="
55
58
  - !ruby/object:Gem::Version
56
- version: '0'
59
+ version: 3.0.0
57
60
  requirements: []
58
61
  rubygems_version: 3.4.4
59
62
  signing_key: