dedup_csv 0.1.0-x86_64-linux → 0.1.2-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/Gemfile.lock +1 -1
- data/README.md +13 -0
- data/Rakefile +5 -5
- data/ext/dedup_csv/Cargo.toml +14 -0
- data/ext/dedup_csv/extconf.rb +6 -0
- data/ext/dedup_csv/src/lib.rs +77 -0
- data/lib/dedup_csv/version.rb +1 -1
- data/lib/dedup_csv.rb +8 -3
- metadata +10 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 399d31ac2650c0c79b87f474840f82e0eb85dab268b574a4edd7f5a4a451c30c
|
4
|
+
data.tar.gz: 6198e1a1cae8565395432d95222d89177c62f09e8edec07f0ea45c982d49fee9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64b7a354fb1a33c78040271c253559cb4a53a60d0854d24e1f73e97215abc9b7b3041e5a6f6cf23f4f5e57248154b3fb28ddc012d2dd3606e78921b9fd852fcb
|
7
|
+
data.tar.gz: '0579cd7b80eced5d733264c779f51f4563f71eabf17aea280dd7aa18d2331db9682cf0bfda38e5195cb3ba6147b89b0fccf1a2f464ca5a032a3fbb0c00981acd'
|
data/.rubocop.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
|
4
4
|
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
gem install dedup_csv
|
9
|
+
```
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
```irb
|
14
|
+
require 'dedup_csv'
|
15
|
+
DedupCsv.dedup('file1.csv', 'file2.csv', 'output.csv')
|
16
|
+
```
|
17
|
+
|
5
18
|
## Contributing
|
6
19
|
|
7
20
|
Bug reports and pull requests are welcome on GitHub at https://github.com/kingsleyh/dedup_csv.
|
data/Rakefile
CHANGED
@@ -14,11 +14,11 @@ require 'rb_sys/extensiontask'
|
|
14
14
|
task build: :compile
|
15
15
|
|
16
16
|
spec = Bundler.load_gemspec('dedup_csv.gemspec')
|
17
|
-
spec.requirements.clear
|
18
|
-
spec.required_ruby_version = nil
|
19
|
-
spec.required_rubygems_version = nil
|
20
|
-
spec.extensions.clear
|
21
|
-
spec.files -= Dir['ext/**/*']
|
17
|
+
# spec.requirements.clear
|
18
|
+
# spec.required_ruby_version = nil
|
19
|
+
# spec.required_rubygems_version = nil
|
20
|
+
# spec.extensions.clear
|
21
|
+
# spec.files -= Dir['ext/**/*']
|
22
22
|
|
23
23
|
Rake::ExtensionTask.new('dedup_csv', spec) do |c|
|
24
24
|
c.lib_dir = 'lib/dedup_csv'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[package]
|
2
|
+
name = "dedup_csv"
|
3
|
+
version = "0.1.0"
|
4
|
+
edition = "2021"
|
5
|
+
authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
|
6
|
+
publish = false
|
7
|
+
|
8
|
+
[lib]
|
9
|
+
crate-type = ["cdylib"]
|
10
|
+
|
11
|
+
[dependencies]
|
12
|
+
magnus = { version = "0.7.1" }
|
13
|
+
csv = "1.3.0"
|
14
|
+
eyre = "0.6.12"
|
@@ -0,0 +1,77 @@
|
|
1
|
+
use std::error::Error;
|
2
|
+
use std::ffi::OsStr;
|
3
|
+
use std::fs::File;
|
4
|
+
use std::path::Path;
|
5
|
+
use csv::{StringRecord, Writer};
|
6
|
+
use magnus::{define_module, function, prelude::*, Ruby};
|
7
|
+
|
8
|
+
fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
|
9
|
+
if !previous_csv_path.has_extension(&["csv"]) {
|
10
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "previous_csv_path must be a csv file".to_string()));
|
11
|
+
}
|
12
|
+
if !new_csv_path.has_extension(&["csv"]) {
|
13
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "new_csv_path must be a csv file".to_string()));
|
14
|
+
}
|
15
|
+
|
16
|
+
let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
|
17
|
+
let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
|
18
|
+
|
19
|
+
let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
|
20
|
+
let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
|
21
|
+
|
22
|
+
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
23
|
+
|
24
|
+
let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
|
25
|
+
let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
|
26
|
+
|
27
|
+
if previous_headers != new_headers {
|
28
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "headers of both csv files must be the same".to_string()));
|
29
|
+
}
|
30
|
+
|
31
|
+
wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
|
32
|
+
|
33
|
+
let mut previous_records = vec![];
|
34
|
+
for previous_record in previous_csv.records() {
|
35
|
+
let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
|
36
|
+
previous_records.push(previous_record)
|
37
|
+
}
|
38
|
+
|
39
|
+
for new_record in new_csv.records() {
|
40
|
+
let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
|
41
|
+
if !previous_records.contains(&new_record) {
|
42
|
+
let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
43
|
+
wtr.write_byte_record(new_record.as_byte_record()).unwrap();
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
wtr.flush().unwrap();
|
48
|
+
|
49
|
+
Ok(())
|
50
|
+
}
|
51
|
+
|
52
|
+
fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
|
53
|
+
magnus::Error::new(ruby.exception_exception(), format!("{}: {}", msg, e.to_string()))
|
54
|
+
}
|
55
|
+
|
56
|
+
#[magnus::init]
|
57
|
+
fn init() -> Result<(), magnus::Error> {
|
58
|
+
let module = define_module("DedupCsv")?;
|
59
|
+
module.define_singleton_method("dedup", function!(dedup, 3))?;
|
60
|
+
Ok(())
|
61
|
+
}
|
62
|
+
|
63
|
+
pub trait FileExtension {
|
64
|
+
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
65
|
+
}
|
66
|
+
|
67
|
+
impl<P: AsRef<Path>> FileExtension for P {
|
68
|
+
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
|
69
|
+
if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
|
70
|
+
return extensions
|
71
|
+
.iter()
|
72
|
+
.any(|x| x.as_ref().eq_ignore_ascii_case(extension));
|
73
|
+
}
|
74
|
+
|
75
|
+
false
|
76
|
+
}
|
77
|
+
}
|
data/lib/dedup_csv/version.rb
CHANGED
data/lib/dedup_csv.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# load native extension
|
4
|
+
begin
|
5
|
+
ruby_version = /(\d+\.\d+)/.match(RUBY_VERSION)
|
6
|
+
require_relative "#{ruby_version}/dedup_csv"
|
7
|
+
rescue LoadError
|
8
|
+
require_relative 'dedup_csv/dedup_csv'
|
9
|
+
end
|
10
|
+
|
3
11
|
require_relative 'dedup_csv/version'
|
4
|
-
require_relative 'dedup_csv/dedup_csv'
|
5
12
|
|
6
13
|
module DedupCsv
|
7
|
-
class Error < StandardError; end
|
8
|
-
# Your code goes here...
|
9
14
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dedup_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
@@ -10,7 +10,8 @@ bindir: exe
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2024-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: Given 2
|
13
|
+
description: Given 2 CSV files, remove all rows from the second CSV that are present
|
14
|
+
in the first CSV.
|
14
15
|
email:
|
15
16
|
- kingsley.hendrickse@patchwork.health
|
16
17
|
executables: []
|
@@ -25,6 +26,9 @@ files:
|
|
25
26
|
- Gemfile.lock
|
26
27
|
- README.md
|
27
28
|
- Rakefile
|
29
|
+
- ext/dedup_csv/Cargo.toml
|
30
|
+
- ext/dedup_csv/extconf.rb
|
31
|
+
- ext/dedup_csv/src/lib.rs
|
28
32
|
- lib/dedup_csv.rb
|
29
33
|
- lib/dedup_csv/2.7/dedup_csv.so
|
30
34
|
- lib/dedup_csv/3.0/dedup_csv.so
|
@@ -32,10 +36,11 @@ files:
|
|
32
36
|
- lib/dedup_csv/3.2/dedup_csv.so
|
33
37
|
- lib/dedup_csv/version.rb
|
34
38
|
- sig/dedup_csv.rbs
|
35
|
-
homepage: http://github.com
|
39
|
+
homepage: http://github.com/kingsleyh/dedup_csv
|
36
40
|
licenses: []
|
37
41
|
metadata:
|
38
|
-
homepage_uri: http://github.com
|
42
|
+
homepage_uri: http://github.com/kingsleyh/dedup_csv
|
43
|
+
source_code_uri: http://github.com/kingsleyh/dedup_csv
|
39
44
|
rubygems_mfa_required: 'true'
|
40
45
|
post_install_message:
|
41
46
|
rdoc_options: []
|
@@ -53,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
58
|
requirements:
|
54
59
|
- - ">="
|
55
60
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
61
|
+
version: 3.0.0
|
57
62
|
requirements: []
|
58
63
|
rubygems_version: 3.4.4
|
59
64
|
signing_key:
|