dedup_csv 0.1.0-x86_64-darwin → 0.1.1-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +13 -0
- data/Rakefile +5 -5
- data/ext/dedup_csv/Cargo.toml +14 -0
- data/ext/dedup_csv/extconf.rb +6 -0
- data/ext/dedup_csv/src/lib.rs +77 -0
- data/lib/dedup_csv/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e03bd04813149e45a6c2d685216693e16d2730984cfa75e34d66987147f2b46
|
4
|
+
data.tar.gz: a74999ebf0b6f41d8075dbe4a8bacefdca8b2e76ac63345c532c963ea4bc6556
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a40c41a6c56b0ea655c6f236572efb13a94d2b8201a7ffb7482d3de22a5f8ec084190e4eaf6cabcf7b7e68f36995b5ab5d61f930d856f4a757d97eccbb12072
|
7
|
+
data.tar.gz: 8743e384b60b21b8596794bda59d57b046693a05f7aeb337d502c47f89c5ccb93d6bc2e31a8e704beb73aa44871abbf3ca9786e5c1148b876958bf9a604bf26d
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
|
4
4
|
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
gem install dedup_csv
|
9
|
+
```
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
```irb
|
14
|
+
require 'dedup_csv/3.2/dedup_csv'
|
15
|
+
DedupCsv.dedup('file1.csv', 'file2.csv', 'output.csv')
|
16
|
+
```
|
17
|
+
|
5
18
|
## Contributing
|
6
19
|
|
7
20
|
Bug reports and pull requests are welcome on GitHub at https://github.com/kingsleyh/dedup_csv.
|
data/Rakefile
CHANGED
@@ -14,11 +14,11 @@ require 'rb_sys/extensiontask'
|
|
14
14
|
task build: :compile
|
15
15
|
|
16
16
|
spec = Bundler.load_gemspec('dedup_csv.gemspec')
|
17
|
-
spec.requirements.clear
|
18
|
-
spec.required_ruby_version = nil
|
19
|
-
spec.required_rubygems_version = nil
|
20
|
-
spec.extensions.clear
|
21
|
-
spec.files -= Dir['ext/**/*']
|
17
|
+
# spec.requirements.clear
|
18
|
+
# spec.required_ruby_version = nil
|
19
|
+
# spec.required_rubygems_version = nil
|
20
|
+
# spec.extensions.clear
|
21
|
+
# spec.files -= Dir['ext/**/*']
|
22
22
|
|
23
23
|
Rake::ExtensionTask.new('dedup_csv', spec) do |c|
|
24
24
|
c.lib_dir = 'lib/dedup_csv'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[package]
|
2
|
+
name = "dedup_csv"
|
3
|
+
version = "0.1.0"
|
4
|
+
edition = "2021"
|
5
|
+
authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
|
6
|
+
publish = false
|
7
|
+
|
8
|
+
[lib]
|
9
|
+
crate-type = ["cdylib"]
|
10
|
+
|
11
|
+
[dependencies]
|
12
|
+
magnus = { version = "0.7.1" }
|
13
|
+
csv = "1.3.0"
|
14
|
+
eyre = "0.6.12"
|
@@ -0,0 +1,77 @@
|
|
1
|
+
use std::error::Error;
|
2
|
+
use std::ffi::OsStr;
|
3
|
+
use std::fs::File;
|
4
|
+
use std::path::Path;
|
5
|
+
use csv::{StringRecord, Writer};
|
6
|
+
use magnus::{define_module, function, prelude::*, Ruby};
|
7
|
+
|
8
|
+
fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
|
9
|
+
if !previous_csv_path.has_extension(&["csv"]) {
|
10
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "previous_csv_path must be a csv file".to_string()));
|
11
|
+
}
|
12
|
+
if !new_csv_path.has_extension(&["csv"]) {
|
13
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "new_csv_path must be a csv file".to_string()));
|
14
|
+
}
|
15
|
+
|
16
|
+
let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
|
17
|
+
let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
|
18
|
+
|
19
|
+
let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
|
20
|
+
let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
|
21
|
+
|
22
|
+
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
23
|
+
|
24
|
+
let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
|
25
|
+
let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
|
26
|
+
|
27
|
+
if previous_headers != new_headers {
|
28
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "headers of both csv files must be the same".to_string()));
|
29
|
+
}
|
30
|
+
|
31
|
+
wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
|
32
|
+
|
33
|
+
let mut previous_records = vec![];
|
34
|
+
for previous_record in previous_csv.records() {
|
35
|
+
let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
|
36
|
+
previous_records.push(previous_record)
|
37
|
+
}
|
38
|
+
|
39
|
+
for new_record in new_csv.records() {
|
40
|
+
let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
|
41
|
+
if !previous_records.contains(&new_record) {
|
42
|
+
let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
43
|
+
wtr.write_byte_record(new_record.as_byte_record()).unwrap();
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
wtr.flush().unwrap();
|
48
|
+
|
49
|
+
Ok(())
|
50
|
+
}
|
51
|
+
|
52
|
+
fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
|
53
|
+
magnus::Error::new(ruby.exception_exception(), format!("{}: {}", msg, e.to_string()))
|
54
|
+
}
|
55
|
+
|
56
|
+
#[magnus::init]
|
57
|
+
fn init() -> Result<(), magnus::Error> {
|
58
|
+
let module = define_module("DedupCsv")?;
|
59
|
+
module.define_singleton_method("dedup", function!(dedup, 3))?;
|
60
|
+
Ok(())
|
61
|
+
}
|
62
|
+
|
63
|
+
pub trait FileExtension {
|
64
|
+
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
65
|
+
}
|
66
|
+
|
67
|
+
impl<P: AsRef<Path>> FileExtension for P {
|
68
|
+
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
|
69
|
+
if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
|
70
|
+
return extensions
|
71
|
+
.iter()
|
72
|
+
.any(|x| x.as_ref().eq_ignore_ascii_case(extension));
|
73
|
+
}
|
74
|
+
|
75
|
+
false
|
76
|
+
}
|
77
|
+
}
|
data/lib/dedup_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dedup_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
@@ -25,6 +25,9 @@ files:
|
|
25
25
|
- Gemfile.lock
|
26
26
|
- README.md
|
27
27
|
- Rakefile
|
28
|
+
- ext/dedup_csv/Cargo.toml
|
29
|
+
- ext/dedup_csv/extconf.rb
|
30
|
+
- ext/dedup_csv/src/lib.rs
|
28
31
|
- lib/dedup_csv.rb
|
29
32
|
- lib/dedup_csv/2.7/dedup_csv.bundle
|
30
33
|
- lib/dedup_csv/3.0/dedup_csv.bundle
|
@@ -53,7 +56,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
56
|
requirements:
|
54
57
|
- - ">="
|
55
58
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
59
|
+
version: 3.0.0
|
57
60
|
requirements: []
|
58
61
|
rubygems_version: 3.4.4
|
59
62
|
signing_key:
|